Merge pull request #166 from github/type_tracking

Minimal implementation of shared type-tracking library
2026-02-20 00:43:44 +01:00 · 2021-05-06 10:59:45 +02:00
parent 73b5699f32 3a3586f14b
commit 07c059cb2e
9 changed files with 641 additions and 40 deletions
--- a/2
+++ b/2
--- a/ql/src/codeql_ruby/controlflow/CfgNodes.qll
+++ b/ql/src/codeql_ruby/controlflow/CfgNodes.qll
@@ -247,6 +247,11 @@ module ExprNodes {
    override predicate relevantChild(Expr e) { e = this.getValue() or e = this.getBranch(_) }
  }

+  /** A control-flow node that wraps a `MethodCall` AST expression. */
+  class MethodCallCfgNode extends CallCfgNode {
+    MethodCallCfgNode() { this.getExpr() instanceof MethodCall }
+  }
+
  /** A control-flow node that wraps a `CaseExpr` AST expression. */
  class CaseExprCfgNode extends ExprCfgNode {
    override CaseExprChildMapping e;
--- a/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll
+++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll
@@ -40,6 +40,12 @@ class DataFlowCallable = CfgScope;

 class DataFlowCall extends CfgNodes::ExprNodes::CallCfgNode {
  DataFlowCallable getEnclosingCallable() { result = this.getScope() }
+
+  DataFlowCallable getTarget() {
+    // TODO: this is a placeholder that finds a method with the same name, iff it's uniquely named.
+    result =
+      unique(DataFlowCallable c | c.(Method).getName() = this.getNode().(MethodCall).getMethodName())
+  }
 }

 /** Gets a viable run-time target for the call `call`. */
--- a/ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll
+++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll
@@ -2133,11 +2133,8 @@ private module Stage4 {

  bindingset[node, cc, config]
  private LocalCc getLocalCc(Node node, Cc cc, Configuration config) {
-    exists(Cc cc0 |
-      cc = pragma[only_bind_into](cc0) and
-      localFlowEntry(node, config) and
-      result = getLocalCallContext(cc0, getNodeEnclosingCallable(node))
-    )
+    localFlowEntry(node, config) and
+    result = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(node))
  }

  private predicate localStep(
@@ -3132,7 +3129,7 @@ private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCt
    conf = mid.getConfiguration() and
    cc = mid.getCallContext() and
    sc = mid.getSummaryCtx() and
-    localCC = getLocalCallContext(cc, getNodeEnclosingCallable(midnode)) and
+    localCC = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(midnode)) and
    ap0 = mid.getAp()
  |
    localFlowBigStep(midnode, node, true, _, conf, localCC) and
--- a/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll
+++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll
@@ -2,6 +2,7 @@ private import ruby
 private import DataFlowDispatch
 private import DataFlowPrivate
 private import codeql_ruby.CFG
+private import codeql_ruby.typetracking.TypeTracker

 /**
 * An element, viewed as a node in a data flow graph. Either an expression
@@ -73,6 +74,37 @@ class ParameterNode extends Node, TParameterNode {
  predicate isParameterOf(Callable c, int i) { p = c.getParameter(i) }
 }

+/**
+ * A data-flow node that is a source of local flow.
+ */
+class LocalSourceNode extends Node {
+  LocalSourceNode() { not simpleLocalFlowStep+(any(ExprNode n), this) }
+
+  /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
+  pragma[inline]
+  predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) }
+
+  /**
+   * Gets a node that this node may flow to using one heap and/or interprocedural step.
+   *
+   * See `TypeTracker` for more details about how to use this.
+   */
+  pragma[inline]
+  LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
+}
+
+predicate hasLocalSource(Node sink, Node source) {
+  // Declaring `source` to be a `SourceNode` currently causes a redundant check in the
+  // recursive case, so instead we check it explicitly here.
+  source = sink and
+  source instanceof LocalSourceNode
+  or
+  exists(Node mid |
+    hasLocalSource(mid, source) and
+    simpleLocalFlowStep(mid, sink)
+  )
+}
+
 /** Gets a node corresponding to expression `e`. */
 ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e }

--- a/ql/src/codeql_ruby/dataflow/internal/SsaImplCommon.qll
+++ b/ql/src/codeql_ruby/dataflow/internal/SsaImplCommon.qll
@@ -1,5 +1,5 @@
 /**
- * Provides a language-independant implementation of static single assignment
+ * Provides a language-independent implementation of static single assignment
 * (SSA) form.
 */

@@ -316,15 +316,23 @@ private module SsaDefReaches {
    )
  }

+  /**
+   * Holds if the reference to `def` at index `i` in basic block `bb` is the
+   * last reference to `v` inside `bb`.
+   */
+  pragma[noinline]
+  predicate lastSsaRef(Definition def, SourceVariable v, BasicBlock bb, int i) {
+    ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v)
+  }
+
  predicate defOccursInBlock(Definition def, BasicBlock bb, SourceVariable v) {
    exists(ssaDefRank(def, v, bb, _, _))
  }

  pragma[noinline]
-  private BasicBlock getAMaybeLiveSuccessor(Definition def, BasicBlock bb) {
-    result = getABasicBlockSuccessor(bb) and
-    not defOccursInBlock(_, bb, def.getSourceVariable()) and
-    ssaDefReachesEndOfBlock(bb, def, _)
+  private predicate ssaDefReachesThroughBlock(Definition def, BasicBlock bb) {
+    ssaDefReachesEndOfBlock(bb, def, _) and
+    not defOccursInBlock(_, bb, def.getSourceVariable())
  }

  /**
@@ -337,7 +345,11 @@ private module SsaDefReaches {
    defOccursInBlock(def, bb1, _) and
    bb2 = getABasicBlockSuccessor(bb1)
    or
-    exists(BasicBlock mid | varBlockReaches(def, bb1, mid) | bb2 = getAMaybeLiveSuccessor(def, mid))
+    exists(BasicBlock mid |
+      varBlockReaches(def, bb1, mid) and
+      ssaDefReachesThroughBlock(def, mid) and
+      bb2 = getABasicBlockSuccessor(mid)
+    )
  }

  /**
@@ -348,24 +360,16 @@ private module SsaDefReaches {
   */
  predicate defAdjacentRead(Definition def, BasicBlock bb1, BasicBlock bb2, int i2) {
    varBlockReaches(def, bb1, bb2) and
-    ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1 and
-    variableRead(bb2, i2, _, _)
+    ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1
  }
 }

 private import SsaDefReaches

-pragma[noinline]
-private predicate ssaDefReachesEndOfBlockRec(BasicBlock bb, Definition def, SourceVariable v) {
-  exists(BasicBlock idom | ssaDefReachesEndOfBlock(idom, def, v) |
-    // The construction of SSA form ensures that each read of a variable is
-    // dominated by its definition. An SSA definition therefore reaches a
-    // control flow node if it is the _closest_ SSA definition that dominates
-    // the node. If two definitions dominate a node then one must dominate the
-    // other, so therefore the definition of _closest_ is given by the dominator
-    // tree. Thus, reaching definitions can be calculated in terms of dominance.
-    idom = getImmediateBasicBlockDominator(bb)
-  )
+pragma[nomagic]
+predicate liveThrough(BasicBlock bb, SourceVariable v) {
+  liveAtExit(bb, v) and
+  not ssaRef(bb, _, v, SsaDef())
 }

 /**
@@ -382,9 +386,14 @@ predicate ssaDefReachesEndOfBlock(BasicBlock bb, Definition def, SourceVariable
    liveAtExit(bb, v)
  )
  or
-  ssaDefReachesEndOfBlockRec(bb, def, v) and
-  liveAtExit(bb, v) and
-  not ssaRef(bb, _, v, SsaDef())
+  // The construction of SSA form ensures that each read of a variable is
+  // dominated by its definition. An SSA definition therefore reaches a
+  // control flow node if it is the _closest_ SSA definition that dominates
+  // the node. If two definitions dominate a node then one must dominate the
+  // other, so therefore the definition of _closest_ is given by the dominator
+  // tree. Thus, reaching definitions can be calculated in terms of dominance.
+  ssaDefReachesEndOfBlock(getImmediateBasicBlockDominator(bb), def, pragma[only_bind_into](v)) and
+  liveThrough(bb, pragma[only_bind_into](v))
 }

 /**
@@ -433,15 +442,22 @@ predicate adjacentDefRead(Definition def, BasicBlock bb1, int i1, BasicBlock bb2
    bb2 = bb1
  )
  or
-  exists(SourceVariable v | ssaDefRank(def, v, bb1, i1, _) = maxSsaRefRank(bb1, v)) and
+  lastSsaRef(def, _, bb1, i1) and
  defAdjacentRead(def, bb1, bb2, i2)
 }

+pragma[noinline]
+private predicate adjacentDefRead(
+  Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2, SourceVariable v
+) {
+  adjacentDefRead(def, bb1, i1, bb2, i2) and
+  v = def.getSourceVariable()
+}
+
 private predicate adjacentDefReachesRead(
  Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
 ) {
-  adjacentDefRead(def, bb1, i1, bb2, i2) and
-  exists(SourceVariable v | v = def.getSourceVariable() |
+  exists(SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) |
    ssaRef(bb1, i1, v, SsaDef())
    or
    variableRead(bb1, i1, v, true)
@@ -474,17 +490,19 @@ predicate adjacentDefNoUncertainReads(Definition def, BasicBlock bb1, int i1, Ba
 */
 pragma[nomagic]
 predicate lastRefRedef(Definition def, BasicBlock bb, int i, Definition next) {
-  exists(int rnk, SourceVariable v, int j | rnk = ssaDefRank(def, v, bb, i, _) |
+  exists(SourceVariable v |
    // Next reference to `v` inside `bb` is a write
-    next.definesAt(v, bb, j) and
-    rnk + 1 = ssaRefRank(bb, j, v, SsaDef())
+    exists(int rnk, int j |
+      rnk = ssaDefRank(def, v, bb, i, _) and
+      next.definesAt(v, bb, j) and
+      rnk + 1 = ssaRefRank(bb, j, v, SsaDef())
+    )
    or
    // Can reach a write using one or more steps
-    rnk = maxSsaRefRank(bb, v) and
+    lastSsaRef(def, v, bb, i) and
    exists(BasicBlock bb2 |
      varBlockReaches(def, bb, bb2) and
-      next.definesAt(v, bb2, j) and
-      1 = ssaRefRank(bb2, j, v, SsaDef())
+      1 = ssaDefRank(next, v, bb2, _, SsaDef())
    )
  )
 }
@@ -538,7 +556,8 @@ pragma[nomagic]
 predicate lastRef(Definition def, BasicBlock bb, int i) {
  lastRefRedef(def, bb, i, _)
  or
-  exists(SourceVariable v | ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v) |
+  lastSsaRef(def, _, bb, i) and
+  (
    // Can reach exit directly
    bb instanceof ExitBasicBlock
    or
--- a/ql/src/codeql_ruby/typetracking/TypeTracker.qll
+++ b/ql/src/codeql_ruby/typetracking/TypeTracker.qll
@@ -0,0 +1,420 @@
+/** Step Summaries and Type Tracking */
+
+private import TypeTrackerSpecific
+
+/**
+ * Any string that may appear as the name of a piece of content. This will usually include things like:
+ * - Attribute names (in Python)
+ * - Property names (in JavaScript)
+ *
+ * In general, this can also be used to model things like stores to specific list indices. To ensure
+ * correctness, it is important that
+ *
+ * - different types of content do not have overlapping names, and
+ * - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
+ *   content instead.
+ */
+class ContentName extends string {
+  ContentName() { this = getPossibleContentName() }
+}
+
+/** Either a content name, or the empty string (representing no content). */
+class OptionalContentName extends string {
+  OptionalContentName() { this instanceof ContentName or this = "" }
+}
+
+/**
+ * A description of a step on an inter-procedural data flow path.
+ */
+private newtype TStepSummary =
+  LevelStep() or
+  CallStep() or
+  ReturnStep() or
+  StoreStep(ContentName content) or
+  LoadStep(ContentName content)
+
+/**
+ * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
+ *
+ * A description of a step on an inter-procedural data flow path.
+ */
+class StepSummary extends TStepSummary {
+  /** Gets a textual representation of this step summary. */
+  string toString() {
+    this instanceof LevelStep and result = "level"
+    or
+    this instanceof CallStep and result = "call"
+    or
+    this instanceof ReturnStep and result = "return"
+    or
+    exists(string content | this = StoreStep(content) | result = "store " + content)
+    or
+    exists(string content | this = LoadStep(content) | result = "load " + content)
+  }
+}
+
+/** Provides predicates for updating step summaries (`StepSummary`s). */
+module StepSummary {
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   */
+  cached
+  predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+    exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Unlike `StepSummary::step`, this predicate does not compress
+   * type-preserving steps.
+   */
+  predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+    jumpStep(nodeFrom, nodeTo) and
+    summary = LevelStep()
+    or
+    callStep(nodeFrom, nodeTo) and summary = CallStep()
+    or
+    returnStep(nodeFrom, nodeTo) and
+    summary = ReturnStep()
+    or
+    exists(string content |
+      localSourceStoreStep(nodeFrom, nodeTo, content) and
+      summary = StoreStep(content)
+      or
+      basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
+    )
+  }
+
+  /**
+   * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
+   *
+   * Note that `nodeTo` will always be a local source node that flows to the place where the content
+   * is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
+   * from the point of view of the execution of the program.
+   *
+   * For instance, if we interpret attribute writes in Python as writing to content with the same
+   * name as the attribute and consider the following snippet
+   *
+   * ```python
+   * def foo(y):
+   *    x = Foo()
+   *    bar(x)
+   *    x.attr = y
+   *    baz(x)
+   *
+   * def bar(x):
+   *    z = x.attr
+   * ```
+   * for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
+   * `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
+   * function. This means we will track the fact that `x.attr` can have the type of `y` into the
+   * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
+   */
+  predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
+    exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
+  }
+}
+
+private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to track a value to a given dataflow node.
+ *
+ * This can be used to track objects that implement a certain API in order to
+ * recognize calls to that API. Note that type-tracking does not by itself provide a
+ * source/sink relation, that is, it may determine that a node has a given type,
+ * but it won't determine where that type came from.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for tracking some type `myType`:
+ * ```ql
+ * DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
+ *   t.start() and
+ *   result = < source of myType >
+ *   or
+ *   exists (DataFlow::TypeTracker t2 |
+ *     result = myType(t2).track(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
+ * ```
+ *
+ * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
+ * `t = t2.step(myType(t2), result)`. If you additionally want to track individual
+ * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
+ */
+class TypeTracker extends TTypeTracker {
+  Boolean hasCall;
+  OptionalContentName content;
+
+  TypeTracker() { this = MkTypeTracker(hasCall, content) }
+
+  /** Gets the summary resulting from appending `step` to this type-tracking summary. */
+  cached
+  TypeTracker append(StepSummary step) {
+    step = LevelStep() and result = this
+    or
+    step = CallStep() and result = MkTypeTracker(true, content)
+    or
+    step = ReturnStep() and hasCall = false and result = this
+    or
+    step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
+    or
+    exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
+  }
+
+  /** Gets a textual representation of this summary. */
+  string toString() {
+    exists(string withCall, string withContent |
+      (if hasCall = true then withCall = "with" else withCall = "without") and
+      (if content != "" then withContent = " with content " + content else withContent = "") and
+      result = "type tracker " + withCall + " call steps" + withContent
+    )
+  }
+
+  /**
+   * Holds if this is the starting point of type tracking.
+   */
+  predicate start() { hasCall = false and content = "" }
+
+  /**
+   * Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
+   * The type tracking only ends after the content has been loaded.
+   */
+  predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
+
+  /**
+   * Holds if this is the starting point of type tracking
+   * when tracking a parameter into a call, but not out of it.
+   */
+  predicate call() { hasCall = true and content = "" }
+
+  /**
+   * Holds if this is the end point of type tracking.
+   */
+  predicate end() { content = "" }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Holds if this type has been tracked into a call.
+   */
+  boolean hasCall() { result = hasCall }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Gets the content associated with this type tracker.
+   */
+  string getContent() { result = content }
+
+  /**
+   * Gets a type tracker that starts where this one has left off to allow continued
+   * tracking.
+   *
+   * This predicate is only defined if the type is not associated to a piece of content.
+   */
+  TypeTracker continue() { content = "" and result = this }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   */
+  pragma[inline]
+  TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
+      result = this.append(pragma[only_bind_into](summary))
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Unlike `TypeTracker::step`, this predicate exposes all edges
+   * in the flow graph, and not just the edges between `Node`s.
+   * It may therefore be less performant.
+   *
+   * Type tracking predicates using small steps typically take the following form:
+   * ```ql
+   * DataFlow::Node myType(DataFlow::TypeTracker t) {
+   *   t.start() and
+   *   result = < source of myType >
+   *   or
+   *   exists (DataFlow::TypeTracker t2 |
+   *     t = t2.smallstep(myType(t2), result)
+   *   )
+   * }
+   *
+   * DataFlow::Node myType() {
+   *   result = myType(DataFlow::TypeTracker::end())
+   * }
+   * ```
+   */
+  pragma[inline]
+  TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+      result = this.append(summary)
+    )
+    or
+    simpleLocalFlowStep(nodeFrom, nodeTo) and
+    result = this
+  }
+}
+
+/** Provides predicates for implementing custom `TypeTracker`s. */
+module TypeTracker {
+  /**
+   * Gets a valid end point of type tracking.
+   */
+  TypeTracker end() { result.end() }
+}
+
+private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to back-track a use of a value to a given dataflow node.
+ *
+ * This can for example be used to track callbacks that are passed to a certain API,
+ * so we can model specific parameters of that callback as having a certain type.
+ *
+ * Note that type back-tracking does not provide a source/sink relation, that is,
+ * it may determine that a node will be used in an API call somewhere, but it won't
+ * determine exactly where that use was, or the path that led to the use.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for back-tracking some callback type `myCallback`:
+ *
+ * ```ql
+ * DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
+ *   t.start() and
+ *   result = (< some API call >).getArgument(< n >).getALocalSource()
+ *   or
+ *   exists (DataFlow::TypeBackTracker t2 |
+ *     result = myCallback(t2).backtrack(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
+ * ```
+ *
+ * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
+ * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
+ * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
+ */
+class TypeBackTracker extends TTypeBackTracker {
+  Boolean hasReturn;
+  string content;
+
+  TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
+
+  /** Gets the summary resulting from prepending `step` to this type-tracking summary. */
+  TypeBackTracker prepend(StepSummary step) {
+    step = LevelStep() and result = this
+    or
+    step = CallStep() and hasReturn = false and result = this
+    or
+    step = ReturnStep() and result = MkTypeBackTracker(true, content)
+    or
+    exists(string p |
+      step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
+    )
+    or
+    step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
+  }
+
+  /** Gets a textual representation of this summary. */
+  string toString() {
+    exists(string withReturn, string withContent |
+      (if hasReturn = true then withReturn = "with" else withReturn = "without") and
+      (if content != "" then withContent = " with content " + content else withContent = "") and
+      result = "type back-tracker " + withReturn + " return steps" + withContent
+    )
+  }
+
+  /**
+   * Holds if this is the starting point of type tracking.
+   */
+  predicate start() { hasReturn = false and content = "" }
+
+  /**
+   * Holds if this is the end point of type tracking.
+   */
+  predicate end() { content = "" }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Holds if this type has been back-tracked into a call through return edge.
+   */
+  boolean hasReturn() { result = hasReturn }
+
+  /**
+   * Gets a type tracker that starts where this one has left off to allow continued
+   * tracking.
+   *
+   * This predicate is only defined if the type has not been tracked into a piece of content.
+   */
+  TypeBackTracker continue() { content = "" and result = this }
+
+  /**
+   * Gets the summary that corresponds to having taken a backwards
+   * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+   */
+  pragma[inline]
+  TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
+      this = result.prepend(pragma[only_bind_into](summary))
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a backwards
+   * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+   *
+   * Unlike `TypeBackTracker::step`, this predicate exposes all edges
+   * in the flowgraph, and not just the edges between
+   * `LocalSourceNode`s. It may therefore be less performant.
+   *
+   * Type tracking predicates using small steps typically take the following form:
+   * ```ql
+   * DataFlow::Node myType(DataFlow::TypeBackTracker t) {
+   *   t.start() and
+   *   result = < some API call >.getArgument(< n >)
+   *   or
+   *   exists (DataFlow::TypeBackTracker t2 |
+   *     t = t2.smallstep(result, myType(t2))
+   *   )
+   * }
+   *
+   * DataFlow::Node myType() {
+   *   result = myType(DataFlow::TypeBackTracker::end())
+   * }
+   * ```
+   */
+  pragma[inline]
+  TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+      this = result.prepend(summary)
+    )
+    or
+    simpleLocalFlowStep(nodeFrom, nodeTo) and
+    this = result
+  }
+}
+
+/** Provides predicates for implementing custom `TypeBackTracker`s. */
+module TypeBackTracker {
+  /**
+   * Gets a valid end point of type back-tracking.
+   */
+  TypeBackTracker end() { result.end() }
+}
--- a/ql/src/codeql_ruby/typetracking/TypeTrackerSpecific.qll
+++ b/ql/src/codeql_ruby/typetracking/TypeTrackerSpecific.qll
@@ -0,0 +1,118 @@
+private import codeql_ruby.AST as AST
+private import codeql_ruby.dataflow.internal.DataFlowPublic as DataFlowPublic
+private import codeql_ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
+private import codeql_ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch
+private import codeql_ruby.controlflow.CfgNodes
+
+class Node = DataFlowPublic::Node;
+
+class LocalSourceNode = DataFlowPublic::LocalSourceNode;
+
+predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
+
+predicate jumpStep = DataFlowPrivate::jumpStep/2;
+
+/**
+ * Gets the name of a possible piece of content. This will usually include things like
+ *
+ * - Attribute names (in Python)
+ * - Property names (in JavaScript)
+ */
+string getPossibleContentName() { result = getSetterCallAttributeName(_) }
+
+/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
+predicate callStep(
+  DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPrivate::ExplicitParameterNode nodeTo
+) {
+  exists(DataFlowDispatch::DataFlowCall call, DataFlowDispatch::DataFlowCallable callable, int i |
+    call.getTarget() = callable and
+    nodeFrom.argumentOf(call, i) and
+    nodeTo.isParameterOf(callable, i)
+  )
+}
+
+/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
+predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
+  exists(DataFlowDispatch::DataFlowCall call |
+    nodeFrom.getEnclosingCallable() = call.getTarget() and
+    nodeTo.asExpr().getNode() = call.getNode()
+  )
+}
+
+/**
+ * Holds if `nodeFrom` is being written to the `content` content of the object
+ * in `nodeTo`.
+ *
+ * Note that the choice of `nodeTo` does not have to make sense
+ * "chronologically". All we care about is whether the `content` content of
+ * `nodeTo` can have a specific type, and the assumption is that if a specific
+ * type appears here, then any access of that particular content can yield
+ * something of that particular type.
+ *
+ * Thus, in an example such as
+ *
+ * ```rb
+ * def foo(y)
+ *    x = Foo.new
+ *    bar(x)
+ *    x.content = y
+ *    baz(x)
+ * end
+ *
+ * def bar(x)
+ *    z = x.content
+ * end
+ * ```
+ * for the content write `x.content = y`, we will have `content` being the
+ * literal string `"content"`, `nodeFrom` will be `y`, and `nodeTo` will be the
+ * `Foo` object created on the first line of the function. This means we will
+ * track the fact that `x.content` can have the type of `y` into the assignment
+ * to `z` inside `bar`, even though this content write happens _after_ `bar` is
+ * called.
+ */
+predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
+  // TODO: support SetterMethodCall inside TuplePattern
+  exists(ExprNodes::AssignmentCfgNode assignment, ExprNodes::MethodCallCfgNode call |
+    assignment.getLhs() = call and
+    content = getSetterCallAttributeName(call.getExpr()) and
+    nodeTo.(DataFlowPublic::ExprNode).getExprNode() = call.getReceiver() and
+    call.getExpr() instanceof AST::SetterMethodCall and
+    assignment.getRhs() = nodeFrom.(DataFlowPublic::ExprNode).getExprNode()
+  )
+}
+
+/**
+ * Returns the name of the attribute being set by the setter method call, i.e.
+ * the name of the setter method without the trailing `=`. In the following
+ * example, the result is `"bar"`.
+ *
+ * ```rb
+ * foo.bar = 1
+ * ```
+ */
+private string getSetterCallAttributeName(AST::SetterMethodCall call) {
+  // TODO: this should be exposed in `SetterMethodCall`
+  not call instanceof AST::ElementReference and
+  exists(string setterName |
+    setterName = call.getMethodName() and result = setterName.prefix(setterName.length() - 1)
+  )
+}
+
+/**
+ * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
+ */
+predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
+  exists(ExprNodes::MethodCallCfgNode call |
+    call.getExpr().getNumberOfArguments() = 0 and
+    content = call.getExpr().(AST::MethodCall).getMethodName() and
+    nodeFrom.asExpr() = call.getReceiver() and
+    nodeTo.asExpr() = call
+  )
+}
+
+/**
+ * A utility class that is equivalent to `boolean` but does not require type joining.
+ */
+class Boolean extends boolean {
+  Boolean() { this = true or this = false }
+}
--- a/scripts/identical-files.json
+++ b/scripts/identical-files.json
@@ -10,5 +10,9 @@
    "DataFlow": [
        "codeql/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl.qll",
        "ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll"
+    ],
+    "TypeTracker": [
+        "codeql/python/ql/src/experimental/typetracking/TypeTracker.qll",
+        "ql/src/codeql_ruby/typetracking/TypeTracker.qll"
    ]
-}
+}