diff --git a/repo-tests/codeql-ruby.txt b/repo-tests/codeql-ruby.txt
new file mode 100644
index 00000000000..a4f62379f8a
--- /dev/null
+++ b/repo-tests/codeql-ruby.txt
@@ -0,0 +1 @@
+236643fc43b8ae09e15dfa13e86bfdb61a106668
diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/AstConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/AstConsistency.ql
new file mode 100644
index 00000000000..8a5ebcdcda7
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/consistency-queries/AstConsistency.ql
@@ -0,0 +1,25 @@
+import codeql.ruby.AST
+import codeql.ruby.ast.internal.Synthesis
+
+query predicate missingParent(AstNode node, string cls) {
+ not exists(node.getParent()) and
+ node.getLocation().getFile().getExtension() != "erb" and
+ not node instanceof Toplevel and
+ cls = node.getPrimaryQlClasses()
+}
+
+pragma[noinline]
+private AstNode parent(AstNode child, int desugarLevel) {
+ result = child.getParent() and
+ desugarLevel = desugarLevel(result)
+}
+
+query predicate multipleParents(AstNode node, AstNode parent, string cls) {
+ parent = node.getParent() and
+ cls = parent.getPrimaryQlClasses() and
+ exists(AstNode one, AstNode two, int desugarLevel |
+ one = parent(node, desugarLevel) and
+ two = parent(node, desugarLevel) and
+ one != two
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/CfgConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/CfgConsistency.ql
new file mode 100644
index 00000000000..c2aaaad0ac1
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/consistency-queries/CfgConsistency.ql
@@ -0,0 +1 @@
+import codeql.ruby.controlflow.internal.ControlFlowGraphImplShared::Consistency
diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/DataFlowConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/DataFlowConsistency.ql
new file mode 100644
index 00000000000..f5bc9552ab6
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/consistency-queries/DataFlowConsistency.ql
@@ -0,0 +1 @@
+import codeql.ruby.dataflow.internal.DataFlowImplConsistency::Consistency
diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/SsaConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/SsaConsistency.ql
new file mode 100644
index 00000000000..79289273f95
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/consistency-queries/SsaConsistency.ql
@@ -0,0 +1,22 @@
+import ruby
+import codeql.ruby.dataflow.SSA
+import codeql.ruby.controlflow.ControlFlowGraph
+
+query predicate nonUniqueDef(CfgNode read, Ssa::Definition def) {
+ read = def.getARead() and
+ exists(Ssa::Definition other | read = other.getARead() and other != def)
+}
+
+query predicate readWithoutDef(LocalVariableReadAccess read) {
+ exists(CfgNode node |
+ node = read.getAControlFlowNode() and
+ not node = any(Ssa::Definition def).getARead()
+ )
+}
+
+query predicate deadDef(Ssa::Definition def, LocalVariable v) {
+ v = def.getSourceVariable() and
+ not v.isCaptured() and
+ not exists(def.getARead()) and
+ not def = any(Ssa::PhiNode phi).getAnInput()
+}
diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/VariablesConsistency.ql b/repo-tests/codeql-ruby/ql/consistency-queries/VariablesConsistency.ql
new file mode 100644
index 00000000000..ed2183340d9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/consistency-queries/VariablesConsistency.ql
@@ -0,0 +1,6 @@
+import codeql.ruby.ast.Variable
+
+query predicate ambiguousVariable(VariableAccess access, Variable variable) {
+ access.getVariable() = variable and
+ count(access.getVariable()) > 1
+}
diff --git a/repo-tests/codeql-ruby/ql/consistency-queries/qlpack.yml b/repo-tests/codeql-ruby/ql/consistency-queries/qlpack.yml
new file mode 100644
index 00000000000..fa76023b646
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/consistency-queries/qlpack.yml
@@ -0,0 +1,5 @@
+name: codeql/ruby-consistency-queries
+version: 0.0.1
+dependencies:
+ codeql/ruby-all: 0.0.1
+
diff --git a/repo-tests/codeql-ruby/ql/examples/qlpack.yml b/repo-tests/codeql-ruby/ql/examples/qlpack.yml
new file mode 100644
index 00000000000..87a6ffae9c1
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/examples/qlpack.yml
@@ -0,0 +1,4 @@
+name: codeql/ruby-examples
+version: 0.0.2
+dependencies:
+ codeql/ruby-all: ^0.0.2
diff --git a/repo-tests/codeql-ruby/ql/examples/snippets/emptythen.ql b/repo-tests/codeql-ruby/ql/examples/snippets/emptythen.ql
new file mode 100644
index 00000000000..531556fc7fa
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/examples/snippets/emptythen.ql
@@ -0,0 +1,18 @@
+/**
+ * @name If statements with empty then branch
+ * @description Finds 'if' statements where the 'then' branch is
+ * an empty block statement
+ * @id ruby/examples/emptythen
+ * @tags if
+ * then
+ * empty
+ * conditional
+ * branch
+ * statement
+ */
+
+import ruby
+
+from IfExpr i
+where not exists(i.getThen().getAChild())
+select i
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/IDEContextual.qll b/repo-tests/codeql-ruby/ql/lib/codeql/IDEContextual.qll
new file mode 100644
index 00000000000..0e58b1d878b
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/IDEContextual.qll
@@ -0,0 +1,19 @@
+private import codeql.files.FileSystem
+
+/**
+ * Returns an appropriately encoded version of a filename `name`
+ * passed by the VS Code extension in order to coincide with the
+ * output of `.getFile()` on locatable entities.
+ */
+cached
+File getFileBySourceArchiveName(string name) {
+ // The name provided for a file in the source archive by the VS Code extension
+ // has some differences from the absolute path in the database:
+ // 1. colons are replaced by underscores
+ // 2. there's a leading slash, even for Windows paths: "C:/foo/bar" ->
+ // "/C_/foo/bar"
+ // 3. double slashes in UNC prefixes are replaced with a single slash
+ // We can handle 2 and 3 together by unconditionally adding a leading slash
+ // before replacing double slashes.
+ name = ("/" + result.getAbsolutePath().replaceAll(":", "_")).replaceAll("//", "/")
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/Locations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/Locations.qll
new file mode 100644
index 00000000000..bd43633d49a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/Locations.qll
@@ -0,0 +1,66 @@
+/** Provides classes for working with locations. */
+
+import files.FileSystem
+
+/**
+ * A location as given by a file, a start line, a start column,
+ * an end line, and an end column.
+ *
+ * For more information about locations see [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+class Location extends @location {
+ /** Gets the file for this location. */
+ File getFile() { locations_default(this, result, _, _, _, _) }
+
+ /** Gets the 1-based line number (inclusive) where this location starts. */
+ int getStartLine() { locations_default(this, _, result, _, _, _) }
+
+ /** Gets the 1-based column number (inclusive) where this location starts. */
+ int getStartColumn() { locations_default(this, _, _, result, _, _) }
+
+ /** Gets the 1-based line number (inclusive) where this location ends. */
+ int getEndLine() { locations_default(this, _, _, _, result, _) }
+
+ /** Gets the 1-based column number (inclusive) where this location ends. */
+ int getEndColumn() { locations_default(this, _, _, _, _, result) }
+
+ /** Gets the number of lines covered by this location. */
+ int getNumLines() { result = getEndLine() - getStartLine() + 1 }
+
+ /** Gets a textual representation of this element. */
+ string toString() {
+ exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+ hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) and
+ result = filepath + "@" + startline + ":" + startcolumn + ":" + endline + ":" + endcolumn
+ )
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ exists(File f |
+ locations_default(this, f, startline, startcolumn, endline, endcolumn) and
+ filepath = f.getAbsolutePath()
+ )
+ }
+
+ /** Holds if this location starts strictly before the specified location. */
+ pragma[inline]
+ predicate strictlyBefore(Location other) {
+ this.getStartLine() < other.getStartLine()
+ or
+ this.getStartLine() = other.getStartLine() and this.getStartColumn() < other.getStartColumn()
+ }
+}
+
+/** An entity representing an empty location. */
+class EmptyLocation extends Location {
+ EmptyLocation() { this.hasLocationInfo("", 0, 0, 0, 0) }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/files/FileSystem.qll b/repo-tests/codeql-ruby/ql/lib/codeql/files/FileSystem.qll
new file mode 100644
index 00000000000..e8b6a8ff691
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/files/FileSystem.qll
@@ -0,0 +1,173 @@
+/** Provides classes for working with files and folders. */
+
+private import codeql.Locations
+
+/** A file or folder. */
+abstract class Container extends @container {
+ /** Gets a file or sub-folder in this container. */
+ Container getAChildContainer() { this = result.getParentContainer() }
+
+ /** Gets a file in this container. */
+ File getAFile() { result = getAChildContainer() }
+
+ /** Gets a sub-folder in this container. */
+ Folder getAFolder() { result = getAChildContainer() }
+
+ /**
+ * Gets the absolute, canonical path of this container, using forward slashes
+ * as path separator.
+ *
+ * The path starts with a _root prefix_ followed by zero or more _path
+ * segments_ separated by forward slashes.
+ *
+ * The root prefix is of one of the following forms:
+ *
+ * 1. A single forward slash `/` (Unix-style)
+ * 2. An upper-case drive letter followed by a colon and a forward slash,
+ * such as `C:/` (Windows-style)
+ * 3. Two forward slashes, a computer name, and then another forward slash,
+ * such as `//FileServer/` (UNC-style)
+ *
+ * Path segments are never empty (that is, absolute paths never contain two
+ * contiguous slashes, except as part of a UNC-style root prefix). Also, path
+ * segments never contain forward slashes, and no path segment is of the
+ * form `.` (one dot) or `..` (two dots).
+ *
+ * Note that an absolute path never ends with a forward slash, except if it is
+ * a bare root prefix, that is, the path has no path segments. A container
+ * whose absolute path has no segments is always a `Folder`, not a `File`.
+ */
+ abstract string getAbsolutePath();
+
+ /**
+ * Gets the base name of this container including extension, that is, the last
+ * segment of its absolute path, or the empty string if it has no segments.
+ *
+ * Here are some examples of absolute paths and the corresponding base names
+ * (surrounded with quotes to avoid ambiguity):
+ *
+ *
+ * | Absolute path | Base name |
+ * | "/tmp/tst.go" | "tst.go" |
+ * | "C:/Program Files (x86)" | "Program Files (x86)" |
+ * | "/" | "" |
+ * | "C:/" | "" |
+ * | "D:/" | "" |
+ * | "//FileServer/" | "" |
+ *
+ */
+ string getBaseName() {
+ result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
+ }
+
+ /**
+ * Gets the extension of this container, that is, the suffix of its base name
+ * after the last dot character, if any.
+ *
+ * In particular,
+ *
+ * - if the name does not include a dot, there is no extension, so this
+ * predicate has no result;
+ * - if the name ends in a dot, the extension is the empty string;
+ * - if the name contains multiple dots, the extension follows the last dot.
+ *
+ * Here are some examples of absolute paths and the corresponding extensions
+ * (surrounded with quotes to avoid ambiguity):
+ *
+ *
+ * | Absolute path | Extension |
+ * | "/tmp/tst.go" | "go" |
+ * | "/tmp/.classpath" | "classpath" |
+ * | "/bin/bash" | not defined |
+ * | "/tmp/tst2." | "" |
+ * | "/tmp/x.tar.gz" | "gz" |
+ *
+ */
+ string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) }
+
+ /** Gets the file in this container that has the given `baseName`, if any. */
+ File getFile(string baseName) {
+ result = getAFile() and
+ result.getBaseName() = baseName
+ }
+
+ /** Gets the sub-folder in this container that has the given `baseName`, if any. */
+ Folder getFolder(string baseName) {
+ result = getAFolder() and
+ result.getBaseName() = baseName
+ }
+
+ /** Gets the parent container of this file or folder, if any. */
+ Container getParentContainer() { containerparent(result, this) }
+
+ /**
+ * Gets the relative path of this file or folder from the root folder of the
+ * analyzed source location. The relative path of the root folder itself is
+ * the empty string.
+ *
+ * This has no result if the container is outside the source root, that is,
+ * if the root folder is not a reflexive, transitive parent of this container.
+ */
+ string getRelativePath() {
+ exists(string absPath, string pref |
+ absPath = getAbsolutePath() and sourceLocationPrefix(pref)
+ |
+ absPath = pref and result = ""
+ or
+ absPath = pref.regexpReplaceAll("/$", "") + "/" + result and
+ not result.matches("/%")
+ )
+ }
+
+ /**
+ * Gets the stem of this container, that is, the prefix of its base name up to
+ * (but not including) the last dot character if there is one, or the entire
+ * base name if there is not.
+ *
+ * Here are some examples of absolute paths and the corresponding stems
+ * (surrounded with quotes to avoid ambiguity):
+ *
+ *
+ * | Absolute path | Stem |
+ * | "/tmp/tst.go" | "tst" |
+ * | "/tmp/.classpath" | "" |
+ * | "/bin/bash" | "bash" |
+ * | "/tmp/tst2." | "tst2" |
+ * | "/tmp/x.tar.gz" | "x.tar" |
+ *
+ */
+ string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) }
+
+ /**
+ * Gets a URL representing the location of this container.
+ *
+ * For more information see https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls.
+ */
+ abstract string getURL();
+
+ /**
+ * Gets a textual representation of the path of this container.
+ *
+ * This is the absolute path of the container.
+ */
+ string toString() { result = getAbsolutePath() }
+}
+
+/** A folder. */
+class Folder extends Container, @folder {
+ override string getAbsolutePath() { folders(this, result) }
+
+ /** Gets the URL of this folder. */
+ override string getURL() { result = "folder://" + getAbsolutePath() }
+}
+
+/** A file. */
+class File extends Container, @file {
+ override string getAbsolutePath() { files(this, result) }
+
+ /** Gets the URL of this file. */
+ override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
+
+ /** Holds if this file was extracted from ordinary source code. */
+ predicate fromSource() { any() }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/AST.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/AST.qll
new file mode 100644
index 00000000000..2d006b6312a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/AST.qll
@@ -0,0 +1,141 @@
+import codeql.Locations
+import ast.Call
+import ast.Control
+import ast.Constant
+import ast.Erb
+import ast.Expr
+import ast.Literal
+import ast.Method
+import ast.Module
+import ast.Parameter
+import ast.Operation
+import ast.Pattern
+import ast.Scope
+import ast.Statement
+import ast.Variable
+private import ast.internal.AST
+private import ast.internal.Scope
+private import ast.internal.Synthesis
+private import ast.internal.TreeSitter
+
+/**
+ * A node in the abstract syntax tree. This class is the base class for all Ruby
+ * program elements.
+ */
+class AstNode extends TAstNode {
+ /**
+ * Gets the name of a primary CodeQL class to which this node belongs.
+ *
+ * This predicate always has a result. If no primary class can be
+ * determined, the result is `"???"`. If multiple primary classes match,
+ * this predicate can have multiple results.
+ */
+ string getAPrimaryQlClass() { result = "???" }
+
+ /**
+ * Gets a comma-separated list of the names of the primary CodeQL classes to
+ * which this element belongs.
+ */
+ final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
+
+ /** Gets the enclosing module, if any. */
+ ModuleBase getEnclosingModule() {
+ exists(Scope::Range s |
+ s = scopeOf(toGeneratedInclSynth(this)) and
+ toGeneratedInclSynth(result) = s.getEnclosingModule()
+ )
+ }
+
+ /** Gets the enclosing method, if any. */
+ MethodBase getEnclosingMethod() {
+ exists(Scope::Range s |
+ s = scopeOf(toGeneratedInclSynth(this)) and
+ toGeneratedInclSynth(result) = s.getEnclosingMethod()
+ )
+ }
+
+ /** Gets a textual representation of this node. */
+ cached
+ string toString() { none() }
+
+ /** Gets the location of this node. */
+ Location getLocation() { result = getLocation(this) }
+
+ /** Gets the file of this node. */
+ final File getFile() { result = this.getLocation().getFile() }
+
+ /** Gets a child node of this `AstNode`. */
+ final AstNode getAChild() { result = this.getAChild(_) }
+
+ /** Gets the parent of this `AstNode`, if this node is not a root node. */
+ final AstNode getParent() { result.getAChild() = this }
+
+ /**
+ * Gets a child of this node, which can also be retrieved using a predicate
+ * named `pred`.
+ */
+ cached
+ AstNode getAChild(string pred) {
+ pred = "getDesugared" and
+ result = this.getDesugared()
+ }
+
+ /**
+ * Holds if this node was synthesized to represent an implicit AST node not
+ * present in the source code. In the following example method call, the
+ * receiver is an implicit `self` reference, for which there is a synthesized
+ * `Self` node.
+ *
+ * ```rb
+ * foo(123)
+ * ```
+ */
+ final predicate isSynthesized() { this = getSynthChild(_, _) }
+
+ /**
+ * Gets the desugared version of this AST node, if any.
+ *
+ * For example, the desugared version of
+ *
+ * ```rb
+ * x += y
+ * ```
+ *
+ * is
+ *
+ * ```rb
+ * x = x + y
+ * ```
+ *
+ * when `x` is a variable. Whenever an AST node can be desugared,
+ * then the desugared version is used in the control-flow graph.
+ */
+ final AstNode getDesugared() { result = getSynthChild(this, -1) }
+}
+
+/** A Ruby source file */
+class RubyFile extends File {
+ RubyFile() { ruby_ast_node_parent(_, this, _) }
+
+ /** Gets a token in this file. */
+ private Ruby::Token getAToken() { result.getLocation().getFile() = this }
+
+ /** Holds if `line` contains a token. */
+ private predicate line(int line, boolean comment) {
+ exists(Ruby::Token token, Location l |
+ token = this.getAToken() and
+ l = token.getLocation() and
+ line in [l.getStartLine() .. l.getEndLine()] and
+ if token instanceof @ruby_token_comment then comment = true else comment = false
+ )
+ }
+
+ /** Gets the number of lines in this file. */
+ int getNumberOfLines() { result = max([0, this.getAToken().getLocation().getEndLine()]) }
+
+ /** Gets the number of lines of code in this file. */
+ int getNumberOfLinesOfCode() { result = count(int line | this.line(line, false)) }
+
+ /** Gets the number of lines of comments in this file. */
+ int getNumberOfLinesOfComments() { result = count(int line | this.line(line, true)) }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ApiGraphs.qll
new file mode 100644
index 00000000000..f260251cd24
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ApiGraphs.qll
@@ -0,0 +1,408 @@
+/**
+ * Provides an implementation of _API graphs_, which are an abstract representation of the API
+ * surface used and/or defined by a code base.
+ *
+ * The nodes of the API graph represent definitions and uses of API components. The edges are
+ * directed and labeled; they specify how the components represented by nodes relate to each other.
+ */
+
+private import ruby
+import codeql.ruby.DataFlow
+import codeql.ruby.typetracking.TypeTracker
+import codeql.ruby.ast.internal.Module
+private import codeql.ruby.controlflow.CfgNodes
+
+/**
+ * Provides classes and predicates for working with APIs used in a database.
+ */
+module API {
+ /**
+ * An abstract representation of a definition or use of an API component such as a Ruby module,
+ * or the result of a method call.
+ */
+ class Node extends Impl::TApiNode {
+ /**
+ * Gets a data-flow node corresponding to a use of the API component represented by this node.
+ *
+ * For example, `Kernel.format "%s world!", "Hello"` is a use of the return of the `format` function of
+ * the `Kernel` module.
+ *
+ * This includes indirect uses found via data flow.
+ */
+ DataFlow::Node getAUse() {
+ exists(DataFlow::LocalSourceNode src | Impl::use(this, src) |
+ Impl::trackUseNode(src).flowsTo(result)
+ )
+ }
+
+ /**
+ * Gets an immediate use of the API component represented by this node.
+ *
+ * Unlike `getAUse()`, this predicate only gets the immediate references, not the indirect uses
+ * found via data flow.
+ */
+ DataFlow::LocalSourceNode getAnImmediateUse() { Impl::use(this, result) }
+
+ /**
+ * Gets a call to a method on the receiver represented by this API component.
+ */
+ DataFlow::CallNode getAMethodCall(string method) {
+ result = getReturn(method).getAnImmediateUse()
+ }
+
+ /**
+ * Gets a node representing member `m` of this API component.
+ *
+ * For example, a member can be:
+ *
+ * - A submodule of a module
+ * - An attribute of an object
+ */
+ bindingset[m]
+ bindingset[result]
+ Node getMember(string m) { result = getASuccessor(Label::member(m)) }
+
+ /**
+ * Gets a node representing a member of this API component where the name of the member is
+ * not known statically.
+ */
+ Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
+
+ /**
+ * Gets a node representing a member of this API component where the name of the member may
+ * or may not be known statically.
+ */
+ Node getAMember() {
+ result = getASuccessor(Label::member(_)) or
+ result = getUnknownMember()
+ }
+
+ /**
+ * Gets a node representing an instance of this API component, that is, an object whose
+ * constructor is the function represented by this node.
+ *
+ * For example, if this node represents a use of some class `A`, then there might be a node
+ * representing instances of `A`, typically corresponding to expressions `new A()` at the
+ * source level.
+ *
+ * This predicate may have multiple results when there are multiple constructor calls invoking this API component.
+ * Consider using `getAnInstantiation()` if there is a need to distinguish between individual constructor calls.
+ */
+ Node getInstance() { result = getASuccessor(Label::instance()) }
+
+ /**
+ * Gets a node representing the result of calling a method on the receiver represented by this node.
+ */
+ Node getReturn(string method) { result = getASuccessor(Label::return(method)) }
+
+ /**
+ * Gets a `new` call to the function represented by this API component.
+ */
+ DataFlow::Node getAnInstantiation() { result = getInstance().getAnImmediateUse() }
+
+ /**
+ * Gets a node representing a subclass of the class represented by this node.
+ */
+ Node getASubclass() { result = getASuccessor(Label::subclass()) }
+
+ /**
+ * Gets a string representation of the lexicographically least among all shortest access paths
+ * from the root to this node.
+ */
+ string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
+
+ /**
+ * Gets a node such that there is an edge in the API graph between this node and the other
+ * one, and that edge is labeled with `lbl`.
+ */
+ Node getASuccessor(string lbl) { Impl::edge(this, lbl, result) }
+
+ /**
+ * Gets a node such that there is an edge in the API graph between that other node and
+ * this one, and that edge is labeled with `lbl`
+ */
+ Node getAPredecessor(string lbl) { this = result.getASuccessor(lbl) }
+
+ /**
+ * Gets a node such that there is an edge in the API graph between this node and the other
+ * one.
+ */
+ Node getAPredecessor() { result = getAPredecessor(_) }
+
+ /**
+ * Gets a node such that there is an edge in the API graph between that other node and
+ * this one.
+ */
+ Node getASuccessor() { result = getASuccessor(_) }
+
+ /**
+ * Gets the data-flow node that gives rise to this node, if any.
+ */
+ DataFlow::Node getInducingNode() { this = Impl::MkUse(result) }
+
+ /** Gets the location of this node. */
+ Location getLocation() {
+ result = this.getInducingNode().getLocation()
+ or
+ // For nodes that do not have a meaningful location, `path` is the empty string and all other
+ // parameters are zero.
+ not exists(getInducingNode()) and
+ result instanceof EmptyLocation
+ }
+
+ /**
+ * Gets a textual representation of this element.
+ */
+ abstract string toString();
+
+ /**
+ * Gets a path of the given `length` from the root to this node.
+ */
+ private string getAPath(int length) {
+ this instanceof Impl::MkRoot and
+ length = 0 and
+ result = ""
+ or
+ exists(Node pred, string lbl, string predpath |
+ Impl::edge(pred, lbl, this) and
+ lbl != "" and
+ predpath = pred.getAPath(length - 1) and
+ exists(string dot | if length = 1 then dot = "" else dot = "." |
+ result = predpath + dot + lbl and
+ // avoid producing strings longer than 1MB
+ result.length() < 1000 * 1000
+ )
+ ) and
+ length in [1 .. Impl::distanceFromRoot(this)]
+ }
+
+ /** Gets the shortest distance from the root to this node in the API graph. */
+ int getDepth() { result = Impl::distanceFromRoot(this) }
+ }
+
+ /** The root node of an API graph. */
+ class Root extends Node, Impl::MkRoot {
+ override string toString() { result = "root" }
+ }
+
+ /** A node corresponding to the use of an API component. */
+ class Use extends Node, Impl::MkUse {
+ override string toString() {
+ exists(string type | type = "Use " |
+ result = type + getPath()
+ or
+ not exists(this.getPath()) and result = type + "with no path"
+ )
+ }
+ }
+
+ /** Gets the root node. */
+ Root root() { any() }
+
+ /**
+ * Gets a node corresponding to a top-level member `m` (typically a module).
+ *
+ * This is equivalent to `root().getAMember("m")`.
+ *
+ * Note: You should only use this predicate for top level modules or classes. If you want nodes corresponding to a nested module or class,
+ * you should use `.getMember` on the parent module/class. For example, for nodes corresponding to the class `Gem::Version`,
+ * use `getTopLevelMember("Gem").getMember("Version")`.
+ */
+ Node getTopLevelMember(string m) { result = root().getMember(m) }
+
+ /**
+ * Provides the actual implementation of API graphs, cached for performance.
+ *
+ * Ideally, we'd like nodes to correspond to (global) access paths, with edge labels
+ * corresponding to extending the access path by one element. We also want to be able to map
+ * nodes to their definitions and uses in the data-flow graph, and this should happen modulo
+ * (inter-procedural) data flow.
+ *
+ * This, however, is not easy to implement, since access paths can have unbounded length
+ * and we need some way of recognizing cycles to avoid non-termination. Unfortunately, expressing
+ * a condition like "this node hasn't been involved in constructing any predecessor of
+ * this node in the API graph" without negative recursion is tricky.
+ *
+ * So instead most nodes are directly associated with a data-flow node, representing
+ * either a use or a definition of an API component. This ensures that we only have a finite
+ * number of nodes. However, we can now have multiple nodes with the same access
+ * path, which are essentially indistinguishable for a client of the API.
+ *
+ * On the other hand, a single node can have multiple access paths (which is, of
+ * course, unavoidable). We pick as canonical the alphabetically least access path with
+ * shortest length.
+ */
+ cached
+ private module Impl {
+ cached
+ newtype TApiNode =
+ /** The root of the API graph. */
+ MkRoot() or
+ /** A use of an API member at the node `nd`. */
+ MkUse(DataFlow::Node nd) { isUse(nd) }
+
+ private string resolveTopLevel(ConstantReadAccess read) {
+ TResolved(result) = resolveScopeExpr(read) and
+ not result.matches("%::%")
+ }
+
+ /**
+ * Holds if `ref` is a use of a node that should have an incoming edge from the root
+ * node labeled `lbl` in the API graph.
+ */
+ cached
+ predicate useRoot(string lbl, DataFlow::Node ref) {
+ exists(string name, ExprNodes::ConstantAccessCfgNode access, ConstantReadAccess read |
+ access = ref.asExpr() and
+ lbl = Label::member(read.getName()) and
+ read = access.getExpr()
+ |
+ name = resolveTopLevel(read)
+ or
+ name = read.getName() and
+ not exists(resolveTopLevel(read)) and
+ not exists(read.getScopeExpr())
+ )
+ }
+
+ /**
+ * Holds if `ref` is a use of a node that should have an incoming edge from use node
+ * `base` labeled `lbl` in the API graph.
+ */
+ cached
+ predicate useUse(DataFlow::LocalSourceNode base, string lbl, DataFlow::Node ref) {
+ exists(ExprCfgNode node |
+ // First, we find a predecessor of the node `ref` that we want to determine. The predecessor
+ // is any node that is a type-tracked use of a data flow node (`src`), which is itself a
+ // reference to the API node `base`. Thus, `pred` and `src` both represent uses of `base`.
+ //
+ // Once we have identified the predecessor, we define its relation to the successor `ref` as
+ // well as the label on the edge from `pred` to `ref`. This label describes the nature of
+ // the relationship between `pred` and `ref`.
+ useExpr(node, base)
+ |
+ // // Referring to an attribute on a node that is a use of `base`:
+ // pred = `Rails` part of `Rails::Whatever`
+ // lbl = `Whatever`
+ // ref = `Rails::Whatever`
+ exists(ExprNodes::ConstantAccessCfgNode c, ConstantReadAccess read |
+ not exists(resolveTopLevel(read)) and
+ node = c.getScopeExpr() and
+ lbl = Label::member(read.getName()) and
+ ref.asExpr() = c and
+ read = c.getExpr()
+ )
+ or
+ // Calling a method on a node that is a use of `base`
+ exists(ExprNodes::MethodCallCfgNode call, string name |
+ node = call.getReceiver() and
+ name = call.getExpr().getMethodName() and
+ lbl = Label::return(name) and
+ name != "new" and
+ ref.asExpr() = call
+ )
+ or
+ // Calling the `new` method on a node that is a use of `base`, which creates a new instance
+ exists(ExprNodes::MethodCallCfgNode call |
+ node = call.getReceiver() and
+ lbl = Label::instance() and
+ call.getExpr().getMethodName() = "new" and
+ ref.asExpr() = call
+ )
+ )
+ }
+
+ pragma[nomagic]
+ private predicate isUse(DataFlow::Node nd) {
+ useRoot(_, nd)
+ or
+ useUse(_, _, nd)
+ }
+
+ pragma[nomagic]
+ private predicate useExpr(ExprCfgNode node, DataFlow::LocalSourceNode src) {
+ exists(DataFlow::LocalSourceNode pred |
+ pred = trackUseNode(src) and
+ pred.flowsTo(any(DataFlow::ExprNode n | n.getExprNode() = node))
+ )
+ }
+
+ /**
+ * Holds if `ref` is a use of node `nd`.
+ */
+ cached
+ predicate use(TApiNode nd, DataFlow::Node ref) { nd = MkUse(ref) }
+
+ /**
+ * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
+ *
+ * The flow from `src` to that node may be inter-procedural.
+ */
+ private DataFlow::LocalSourceNode trackUseNode(DataFlow::Node src, TypeTracker t) {
+ // Declaring `src` to be a `LocalSourceNode` currently causes a redundant check in the
+ // recursive case, so instead we check it explicitly here.
+ src instanceof DataFlow::LocalSourceNode and
+ t.start() and
+ isUse(src) and
+ result = src
+ or
+ exists(TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
+ }
+
+ /**
+ * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
+ *
+ * The flow from `src` to that node may be inter-procedural.
+ */
+ cached
+ DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
+ result = trackUseNode(src, TypeTracker::end())
+ }
+
+ /**
+ * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
+ */
+ cached
+ predicate edge(TApiNode pred, string lbl, TApiNode succ) {
+ /* Every node that is a use of an API component is itself added to the API graph. */
+ exists(DataFlow::LocalSourceNode ref | succ = MkUse(ref) |
+ pred = MkRoot() and
+ useRoot(lbl, ref)
+ or
+ exists(DataFlow::Node nd |
+ pred = MkUse(nd) and
+ useUse(nd, lbl, ref)
+ )
+ )
+ }
+
+ /**
+ * Holds if there is an edge from `pred` to `succ` in the API graph.
+ */
+ private predicate edge(TApiNode pred, TApiNode succ) { edge(pred, _, succ) }
+
+ /** Gets the shortest distance from the root to `nd` in the API graph. */
+ cached
+ int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result)
+ }
+}
+
+private module Label {
+ /** Gets the `member` edge label for member `m`. */
+ bindingset[m]
+ bindingset[result]
+ string member(string m) { result = "getMember(\"" + m + "\")" }
+
+ /** Gets the `member` edge label for the unknown member. */
+ string unknownMember() { result = "getUnknownMember()" }
+
+ /** Gets the `instance` edge label. */
+ string instance() { result = "instance" }
+
+ /** Gets the `return` edge label. */
+ bindingset[m]
+ bindingset[result]
+ string return(string m) { result = "getReturn(\"" + m + "\")" }
+
+ string subclass() { result = "getASubclass()" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/CFG.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/CFG.qll
new file mode 100644
index 00000000000..77507b05a7f
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/CFG.qll
@@ -0,0 +1,5 @@
+/** Provides classes representing the control flow graph. */
+
+import controlflow.ControlFlowGraph
+import controlflow.CfgNodes as CfgNodes
+import controlflow.BasicBlocks
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Concepts.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Concepts.qll
new file mode 100644
index 00000000000..f06995d1d36
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Concepts.qll
@@ -0,0 +1,585 @@
+/**
+ * Provides abstract classes representing generic concepts such as file system
+ * access or system command execution, for which individual framework libraries
+ * provide concrete subclasses.
+ */
+
+private import codeql.ruby.AST
+private import codeql.ruby.CFG
+private import codeql.ruby.DataFlow
+private import codeql.ruby.Frameworks
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.ApiGraphs
+
+/**
+ * A data-flow node that executes SQL statements.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `SqlExecution::Range` instead.
+ */
+class SqlExecution extends DataFlow::Node instanceof SqlExecution::Range {
+ /** Gets the argument that specifies the SQL statements to be executed. */
+ DataFlow::Node getSql() { result = super.getSql() }
+}
+
+/** Provides a class for modeling new SQL execution APIs. */
+module SqlExecution {
+ /**
+ * A data-flow node that executes SQL statements.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `SqlExecution` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets the argument that specifies the SQL statements to be executed. */
+ abstract DataFlow::Node getSql();
+ }
+}
+
+/**
+ * A data flow node that performs a file system access, including reading and writing data,
+ * creating and deleting files and folders, checking and updating permissions, and so on.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `FileSystemAccess::Range` instead.
+ */
+class FileSystemAccess extends DataFlow::Node instanceof FileSystemAccess::Range {
+ /** Gets an argument to this file system access that is interpreted as a path. */
+ DataFlow::Node getAPathArgument() { result = super.getAPathArgument() }
+}
+
+/** Provides a class for modeling new file system access APIs. */
+module FileSystemAccess {
+ /**
+ * A data-flow node that performs a file system access, including reading and writing data,
+ * creating and deleting files and folders, checking and updating permissions, and so on.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `FileSystemAccess` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets an argument to this file system access that is interpreted as a path. */
+ abstract DataFlow::Node getAPathArgument();
+ }
+}
+
+/**
+ * A data flow node that reads data from the file system.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `FileSystemReadAccess::Range` instead.
+ */
+class FileSystemReadAccess extends FileSystemAccess instanceof FileSystemReadAccess::Range {
+ /**
+ * Gets a node that represents data read from the file system access.
+ */
+ DataFlow::Node getADataNode() { result = FileSystemReadAccess::Range.super.getADataNode() }
+}
+
+/** Provides a class for modeling new file system reads. */
+module FileSystemReadAccess {
+ /**
+ * A data flow node that reads data from the file system.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `FileSystemReadAccess` instead.
+ */
+ abstract class Range extends FileSystemAccess::Range {
+ /**
+ * Gets a node that represents data read from the file system.
+ */
+ abstract DataFlow::Node getADataNode();
+ }
+}
+
+/**
+ * A data flow node that sets the permissions for one or more files.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `FileSystemPermissionModification::Range` instead.
+ */
+class FileSystemPermissionModification extends DataFlow::Node instanceof FileSystemPermissionModification::Range {
+ /**
+ * Gets an argument to this permission modification that is interpreted as a
+ * set of permissions.
+ */
+ DataFlow::Node getAPermissionNode() { result = super.getAPermissionNode() }
+}
+
+/** Provides a class for modeling new file system permission modifications. */
+module FileSystemPermissionModification {
+ /**
+ * A data-flow node that sets permissions for a one or more files.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `FileSystemPermissionModification` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /**
+ * Gets an argument to this permission modification that is interpreted as a
+ * set of permissions.
+ */
+ abstract DataFlow::Node getAPermissionNode();
+ }
+}
+
+/**
+ * A data flow node that contains a file name or an array of file names from the local file system.
+ */
+abstract class FileNameSource extends DataFlow::Node { }
+
+/**
+ * A data-flow node that escapes meta-characters, which could be used to prevent
+ * injection attacks.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `Escaping::Range` instead.
+ */
+class Escaping extends DataFlow::Node instanceof Escaping::Range {
+ Escaping() {
+ // escapes that don't have _both_ input/output defined are not valid
+ exists(super.getAnInput()) and
+ exists(super.getOutput())
+ }
+
+ /** Gets an input that will be escaped. */
+ DataFlow::Node getAnInput() { result = super.getAnInput() }
+
+ /** Gets the output that contains the escaped data. */
+ DataFlow::Node getOutput() { result = super.getOutput() }
+
+ /**
+ * Gets the context that this function escapes for, such as `html`, or `url`.
+ */
+ string getKind() { result = super.getKind() }
+}
+
+/** Provides a class for modeling new escaping APIs. */
+module Escaping {
+ /**
+ * A data-flow node that escapes meta-characters, which could be used to prevent
+ * injection attacks.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `Escaping` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets an input that will be escaped. */
+ abstract DataFlow::Node getAnInput();
+
+ /** Gets the output that contains the escaped data. */
+ abstract DataFlow::Node getOutput();
+
+ /**
+ * Gets the context that this function escapes for.
+ *
+ * While kinds are represented as strings, this should not be relied upon. Use the
+ * predicates in the `Escaping` module, such as `getHtmlKind`.
+ */
+ abstract string getKind();
+ }
+
+ /** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
+ string getHtmlKind() { result = "html" }
+}
+
+/**
+ * An escape of a string so it can be safely included in
+ * the body of an HTML element, for example, replacing `{}` in
+ * `{}
`.
+ */
+class HtmlEscaping extends Escaping {
+ HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() }
+}
+
+/** Provides classes for modeling HTTP-related APIs. */
+module HTTP {
+ /** Provides classes for modeling HTTP servers. */
+ module Server {
+ /**
+ * A data-flow node that sets up a route on a server.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `RouteSetup::Range` instead.
+ */
+ class RouteSetup extends DataFlow::Node instanceof RouteSetup::Range {
+ /** Gets the URL pattern for this route, if it can be statically determined. */
+ string getUrlPattern() { result = super.getUrlPattern() }
+
+ /**
+ * Gets a function that will handle incoming requests for this route, if any.
+ *
+ * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`.
+ */
+ Method getARequestHandler() { result = super.getARequestHandler() }
+
+ /**
+ * Gets a parameter that will receive parts of the url when handling incoming
+ * requests for this route, if any. These automatically become a `RemoteFlowSource`.
+ */
+ Parameter getARoutedParameter() { result = super.getARoutedParameter() }
+
+ /** Gets a string that identifies the framework used for this route setup. */
+ string getFramework() { result = super.getFramework() }
+ }
+
+ /** Provides a class for modeling new HTTP routing APIs. */
+ module RouteSetup {
+ /**
+ * A data-flow node that sets up a route on a server.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `RouteSetup` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets the argument used to set the URL pattern. */
+ abstract DataFlow::Node getUrlPatternArg();
+
+ /** Gets the URL pattern for this route, if it can be statically determined. */
+ string getUrlPattern() {
+ exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode |
+ this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(strNode) and
+ result = strNode.getExpr().getValueText()
+ )
+ }
+
+ /**
+ * Gets a function that will handle incoming requests for this route, if any.
+ *
+ * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`.
+ */
+ abstract Method getARequestHandler();
+
+ /**
+ * Gets a parameter that will receive parts of the url when handling incoming
+ * requests for this route, if any. These automatically become a `RemoteFlowSource`.
+ */
+ abstract Parameter getARoutedParameter();
+
+ /** Gets a string that identifies the framework used for this route setup. */
+ abstract string getFramework();
+ }
+ }
+
+ /**
+ * A function that will handle incoming HTTP requests.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `RequestHandler::Range` instead.
+ */
+ class RequestHandler extends Method instanceof RequestHandler::Range {
+ /**
+ * Gets a parameter that could receive parts of the url when handling incoming
+ * requests, if any. These automatically become a `RemoteFlowSource`.
+ */
+ Parameter getARoutedParameter() { result = super.getARoutedParameter() }
+
+ /** Gets a string that identifies the framework used for this route setup. */
+ string getFramework() { result = super.getFramework() }
+ }
+
+ /** Provides a class for modeling new HTTP request handlers. */
+ module RequestHandler {
+ /**
+ * A function that will handle incoming HTTP requests.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `RequestHandler` instead.
+ *
+ * Only extend this class if you can't provide a `RouteSetup`, since we handle that case automatically.
+ */
+ abstract class Range extends Method {
+ /**
+ * Gets a parameter that could receive parts of the url when handling incoming
+ * requests, if any. These automatically become a `RemoteFlowSource`.
+ */
+ abstract Parameter getARoutedParameter();
+
+ /** Gets a string that identifies the framework used for this request handler. */
+ abstract string getFramework();
+ }
+ }
+
+ private class RequestHandlerFromRouteSetup extends RequestHandler::Range {
+ RouteSetup rs;
+
+ RequestHandlerFromRouteSetup() { this = rs.getARequestHandler() }
+
+ override Parameter getARoutedParameter() {
+ result = rs.getARoutedParameter() and
+ result = this.getAParameter()
+ }
+
+ override string getFramework() { result = rs.getFramework() }
+ }
+
+ /** A parameter that will receive parts of the url when handling an incoming request. */
+ private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
+ RequestHandler handler;
+
+ RoutedParameter() { this.getParameter() = handler.getARoutedParameter() }
+
+ override string getSourceType() { result = handler.getFramework() + " RoutedParameter" }
+ }
+
+ /**
+ * A data-flow node that creates a HTTP response on a server.
+ *
+ * Note: we don't require that this response must be sent to a client (a kind of
+ * "if a tree falls in a forest and nobody hears it" situation).
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `HttpResponse::Range` instead.
+ */
+ class HttpResponse extends DataFlow::Node instanceof HttpResponse::Range {
+ /** Gets the data-flow node that specifies the body of this HTTP response. */
+ DataFlow::Node getBody() { result = super.getBody() }
+
+ /** Gets the mimetype of this HTTP response, if it can be statically determined. */
+ string getMimetype() { result = super.getMimetype() }
+ }
+
+ /** Provides a class for modeling new HTTP response APIs. */
+ module HttpResponse {
+ /**
+ * A data-flow node that creates a HTTP response on a server.
+ *
+ * Note: we don't require that this response must be sent to a client (a kind of
+ * "if a tree falls in a forest and nobody hears it" situation).
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `HttpResponse` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets the data-flow node that specifies the body of this HTTP response. */
+ abstract DataFlow::Node getBody();
+
+ /** Gets the data-flow node that specifies the content-type/mimetype of this HTTP response, if any. */
+ abstract DataFlow::Node getMimetypeOrContentTypeArg();
+
+ /** Gets the default mimetype that should be used if `getMimetypeOrContentTypeArg` has no results. */
+ abstract string getMimetypeDefault();
+
+ /** Gets the mimetype of this HTTP response, if it can be statically determined. */
+ string getMimetype() {
+ exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode |
+ this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(strNode) and
+ result = strNode.getExpr().getValueText().splitAt(";", 0)
+ )
+ or
+ not exists(this.getMimetypeOrContentTypeArg()) and
+ result = this.getMimetypeDefault()
+ }
+ }
+ }
+
+ /**
+ * A data-flow node that creates a HTTP redirect response on a server.
+ *
+ * Note: we don't require that this redirect must be sent to a client (a kind of
+ * "if a tree falls in a forest and nobody hears it" situation).
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `HttpRedirectResponse::Range` instead.
+ */
+ class HttpRedirectResponse extends HttpResponse instanceof HttpRedirectResponse::Range {
+ /** Gets the data-flow node that specifies the location of this HTTP redirect response. */
+ DataFlow::Node getRedirectLocation() { result = super.getRedirectLocation() }
+ }
+
+ /** Provides a class for modeling new HTTP redirect response APIs. */
+ module HttpRedirectResponse {
+ /**
+ * A data-flow node that creates a HTTP redirect response on a server.
+ *
+ * Note: we don't require that this redirect must be sent to a client (a kind of
+ * "if a tree falls in a forest and nobody hears it" situation).
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `HttpResponse` instead.
+ */
+ abstract class Range extends HTTP::Server::HttpResponse::Range {
+ /** Gets the data-flow node that specifies the location of this HTTP redirect response. */
+ abstract DataFlow::Node getRedirectLocation();
+ }
+ }
+ }
+
+ /** Provides classes for modeling HTTP clients. */
+ module Client {
+ /**
+ * A method call that makes an outgoing HTTP request.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `Request::Range` instead.
+ */
+ class Request extends MethodCall instanceof Request::Range {
+ /** Gets a node which returns the body of the response */
+ DataFlow::Node getResponseBody() { result = super.getResponseBody() }
+
+ /** Gets a string that identifies the framework used for this request. */
+ string getFramework() { result = super.getFramework() }
+
+ /**
+ * Holds if this request is made using a mode that disables SSL/TLS
+ * certificate validation, where `disablingNode` represents the point at
+ * which the validation was disabled.
+ */
+ predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ super.disablesCertificateValidation(disablingNode)
+ }
+ }
+
+ /** Provides a class for modeling new HTTP requests. */
+ module Request {
+ /**
+ * A method call that makes an outgoing HTTP request.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `Request` instead.
+ */
+ abstract class Range extends MethodCall {
+ /** Gets a node which returns the body of the response */
+ abstract DataFlow::Node getResponseBody();
+
+ /** Gets a string that identifies the framework used for this request. */
+ abstract string getFramework();
+
+ /**
+ * Holds if this request is made using a mode that disables SSL/TLS
+ * certificate validation, where `disablingNode` represents the point at
+ * which the validation was disabled.
+ */
+ abstract predicate disablesCertificateValidation(DataFlow::Node disablingNode);
+ }
+ }
+
+ /** The response body from an outgoing HTTP request, considered as a remote flow source */
+ private class RequestResponseBody extends RemoteFlowSource::Range, DataFlow::Node {
+ Request request;
+
+ RequestResponseBody() { this = request.getResponseBody() }
+
+ override string getSourceType() { result = request.getFramework() }
+ }
+ }
+}
+
+/**
+ * A data flow node that executes an operating system command,
+ * for instance by spawning a new process.
+ */
+class SystemCommandExecution extends DataFlow::Node instanceof SystemCommandExecution::Range {
+ /** Holds if a shell interprets `arg`. */
+ predicate isShellInterpreted(DataFlow::Node arg) { super.isShellInterpreted(arg) }
+
+ /** Gets an argument to this execution that specifies the command or an argument to it. */
+ DataFlow::Node getAnArgument() { result = super.getAnArgument() }
+}
+
+/** Provides a class for modeling new operating system command APIs. */
+module SystemCommandExecution {
+ /**
+ * A data flow node that executes an operating system command, for instance by spawning a new
+ * process.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `SystemCommandExecution` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets an argument to this execution that specifies the command or an argument to it. */
+ abstract DataFlow::Node getAnArgument();
+
+ /** Holds if a shell interprets `arg`. */
+ predicate isShellInterpreted(DataFlow::Node arg) { none() }
+ }
+}
+
+/**
+ * A data-flow node that dynamically executes Ruby code.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `CodeExecution::Range` instead.
+ */
+class CodeExecution extends DataFlow::Node instanceof CodeExecution::Range {
+ /** Gets the argument that specifies the code to be executed. */
+ DataFlow::Node getCode() { result = super.getCode() }
+}
+
+/** Provides a class for modeling new dynamic code execution APIs. */
+module CodeExecution {
+ /**
+ * A data-flow node that dynamically executes Ruby code.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `CodeExecution` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets the argument that specifies the code to be executed. */
+ abstract DataFlow::Node getCode();
+ }
+}
+
+/**
+ * A data-flow node that parses XML content.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `XmlParserCall::Range` instead.
+ */
+class XmlParserCall extends DataFlow::Node {
+ XmlParserCall::Range range;
+
+ XmlParserCall() { this = range }
+
+ /** Gets the argument that specifies the XML content to be parsed. */
+ DataFlow::Node getInput() { result = range.getInput() }
+
+ /** Holds if this XML parser call is configured to process external entities */
+ predicate externalEntitiesEnabled() { range.externalEntitiesEnabled() }
+}
+
+/** Provides a class for modeling new XML parsing APIs. */
+module XmlParserCall {
+ /**
+ * A data-flow node that parses XML content.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `class XmlParserCall` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets the argument that specifies the XML content to be parsed. */
+ abstract DataFlow::Node getInput();
+
+ /** Holds if this XML parser call is configured to process external entities */
+ abstract predicate externalEntitiesEnabled();
+ }
+}
+
+/**
+ * A data-flow node that may represent a database object in an ORM system.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `OrmInstantiation::Range` instead.
+ */
+class OrmInstantiation extends DataFlow::Node instanceof OrmInstantiation::Range {
+ /** Holds if a call to `methodName` on this instance may return a field of this ORM object. */
+ bindingset[methodName]
+ predicate methodCallMayAccessField(string methodName) {
+ super.methodCallMayAccessField(methodName)
+ }
+}
+
+/** Provides a class for modeling new ORM object instantiation APIs. */
+module OrmInstantiation {
+ /**
+ * A data-flow node that may represent a database object in an ORM system.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `OrmInstantiation` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Holds if a call to `methodName` on this instance may return a field of this ORM object. */
+ bindingset[methodName]
+ abstract predicate methodCallMayAccessField(string methodName);
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow.qll
new file mode 100644
index 00000000000..e7645ce0c10
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow.qll
@@ -0,0 +1,7 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) data flow analyses.
+ */
+module DataFlow {
+ import codeql.ruby.dataflow.internal.DataFlowImpl
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow2.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow2.qll
new file mode 100644
index 00000000000..7486f52052d
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/DataFlow2.qll
@@ -0,0 +1,7 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) data flow analyses.
+ */
+module DataFlow2 {
+ import codeql.ruby.dataflow.internal.DataFlowImpl2
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Diagnostics.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Diagnostics.qll
new file mode 100644
index 00000000000..b8995c01bc2
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Diagnostics.qll
@@ -0,0 +1,52 @@
+private import codeql.Locations
+
+/** A diagnostic emitted during extraction, such as a parse error */
+class Diagnostic extends @diagnostic {
+ int severity;
+ string tag;
+ string message;
+ string fullMessage;
+ Location location;
+
+ Diagnostic() { diagnostics(this, severity, tag, message, fullMessage, location) }
+
+ /**
+ * Gets the numerical severity level associated with this diagnostic.
+ */
+ int getSeverity() { result = severity }
+
+ /** Gets a string representation of the severity of this diagnostic. */
+ string getSeverityText() {
+ severity = 10 and result = "Debug"
+ or
+ severity = 20 and result = "Info"
+ or
+ severity = 30 and result = "Warning"
+ or
+ severity = 40 and result = "Error"
+ }
+
+ /** Gets the error code associated with this diagnostic, e.g. parse_error. */
+ string getTag() { result = tag }
+
+ /**
+ * Gets the error message text associated with this diagnostic.
+ */
+ string getMessage() { result = message }
+
+ /**
+ * Gets the full error message text associated with this diagnostic.
+ */
+ string getFullMessage() { result = fullMessage }
+
+ /** Gets the source location of this diagnostic. */
+ Location getLocation() { result = location }
+
+ /** Gets a textual representation of this diagnostic. */
+ string toString() { result = this.getMessage() }
+}
+
+/** A diagnostic relating to a particular error in extracting a file. */
+class ExtractionError extends Diagnostic, @diagnostic_error {
+ ExtractionError() { this.getTag() = "parse_error" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Frameworks.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Frameworks.qll
new file mode 100644
index 00000000000..bd75177c401
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/Frameworks.qll
@@ -0,0 +1,11 @@
+/**
+ * Helper file that imports all framework modeling.
+ */
+
+private import codeql.ruby.frameworks.ActionController
+private import codeql.ruby.frameworks.ActiveRecord
+private import codeql.ruby.frameworks.ActionView
+private import codeql.ruby.frameworks.StandardLibrary
+private import codeql.ruby.frameworks.Files
+private import codeql.ruby.frameworks.HttpClients
+private import codeql.ruby.frameworks.XmlParsing
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/TaintTracking.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/TaintTracking.qll
new file mode 100755
index 00000000000..e443b294273
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/TaintTracking.qll
@@ -0,0 +1,7 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ */
+module TaintTracking {
+ import codeql.ruby.dataflow.internal.tainttracking1.TaintTrackingImpl
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Call.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Call.qll
new file mode 100644
index 00000000000..d34034f14cd
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Call.qll
@@ -0,0 +1,215 @@
+private import codeql.ruby.AST
+private import internal.AST
+private import internal.Call
+private import internal.TreeSitter
+private import codeql.ruby.dataflow.internal.DataFlowDispatch
+private import codeql.ruby.dataflow.internal.DataFlowImplCommon
+
+/**
+ * A call.
+ */
+class Call extends Expr instanceof CallImpl {
+ override string getAPrimaryQlClass() { result = "Call" }
+
+ /**
+ * Gets the `n`th argument of this method call. In the following example, the
+ * result for n=0 is the `IntegerLiteral` 0, while for n=1 the result is a
+ * `Pair` (whose `getKey` returns the `SymbolLiteral` for `bar`, and
+ * `getValue` returns the `IntegerLiteral` 1). Keyword arguments like this
+ * can be accessed more naturally using the
+ * `getKeywordArgument(string keyword)` predicate.
+ * ```rb
+ * foo(0, bar: 1)
+ * yield 0, bar: 1
+ * ```
+ */
+ final Expr getArgument(int n) { result = super.getArgumentImpl(n) }
+
+ /**
+ * Gets an argument of this method call.
+ */
+ final Expr getAnArgument() { result = this.getArgument(_) }
+
+ /**
+ * Gets the value of the keyword argument whose key is `keyword`, if any. For
+ * example, the result for `getKeywordArgument("qux")` in the following
+ * example is the `IntegerLiteral` 123.
+ * ```rb
+ * foo :bar "baz", qux: 123
+ * ```
+ */
+ final Expr getKeywordArgument(string keyword) {
+ exists(Pair p |
+ p = this.getAnArgument() and
+ p.getKey().(SymbolLiteral).getValueText() = keyword and
+ result = p.getValue()
+ )
+ }
+
+ /**
+ * Gets the number of arguments of this method call.
+ */
+ final int getNumberOfArguments() { result = super.getNumberOfArgumentsImpl() }
+
+ /** Gets a potential target of this call, if any. */
+ final Callable getATarget() {
+ exists(DataFlowCall c | this = c.asCall().getExpr() |
+ TCfgScope(result) = [viableCallable(c), viableCallableLambda(c, _)]
+ )
+ }
+
+ override AstNode getAChild(string pred) {
+ result = Expr.super.getAChild(pred)
+ or
+ pred = "getArgument" and result = this.getArgument(_)
+ }
+}
+
+/**
+ * A method call.
+ */
+class MethodCall extends Call instanceof MethodCallImpl {
+ override string getAPrimaryQlClass() { result = "MethodCall" }
+
+ /**
+ * Gets the receiver of this call, if any. For example:
+ *
+ * ```rb
+ * foo.bar
+ * Baz::qux
+ * corge()
+ * ```
+ *
+ * The result for the call to `bar` is the `Expr` for `foo`; the result for
+ * the call to `qux` is the `Expr` for `Baz`; for the call to `corge` there
+ * is no result.
+ */
+ final Expr getReceiver() { result = super.getReceiverImpl() }
+
+ /**
+ * Gets the name of the method being called. For example, in:
+ *
+ * ```rb
+ * foo.bar x, y
+ * ```
+ *
+ * the result is `"bar"`.
+ */
+ final string getMethodName() { result = super.getMethodNameImpl() }
+
+ /**
+ * Gets the block of this method call, if any.
+ * ```rb
+ * foo.each { |x| puts x }
+ * ```
+ */
+ final Block getBlock() { result = super.getBlockImpl() }
+
+ override string toString() { result = "call to " + this.getMethodName() }
+
+ override AstNode getAChild(string pred) {
+ result = Call.super.getAChild(pred)
+ or
+ pred = "getReceiver" and result = this.getReceiver()
+ or
+ pred = "getBlock" and result = this.getBlock()
+ }
+}
+
+/**
+ * A call to a setter method.
+ * ```rb
+ * self.foo = 10
+ * a[0] = 10
+ * ```
+ */
+class SetterMethodCall extends MethodCall, TMethodCallSynth {
+ SetterMethodCall() { this = TMethodCallSynth(_, _, _, true, _) }
+
+ final override string getAPrimaryQlClass() { result = "SetterMethodCall" }
+}
+
+/**
+ * An element reference; a call to the `[]` method.
+ * ```rb
+ * a[0]
+ * ```
+ */
+class ElementReference extends MethodCall instanceof ElementReferenceImpl {
+ final override string getAPrimaryQlClass() { result = "ElementReference" }
+
+ final override string toString() { result = "...[...]" }
+}
+
+/**
+ * A call to `yield`.
+ * ```rb
+ * yield x, y
+ * ```
+ */
+class YieldCall extends Call instanceof YieldCallImpl {
+ final override string getAPrimaryQlClass() { result = "YieldCall" }
+
+ final override string toString() { result = "yield ..." }
+}
+
+/**
+ * A call to `super`.
+ * ```rb
+ * class Foo < Bar
+ * def baz
+ * super
+ * end
+ * end
+ * ```
+ */
+class SuperCall extends MethodCall instanceof SuperCallImpl {
+ final override string getAPrimaryQlClass() { result = "SuperCall" }
+}
+
+/**
+ * A block argument in a method call.
+ * ```rb
+ * foo(&block)
+ * ```
+ */
+class BlockArgument extends Expr, TBlockArgument {
+ private Ruby::BlockArgument g;
+
+ BlockArgument() { this = TBlockArgument(g) }
+
+ final override string getAPrimaryQlClass() { result = "BlockArgument" }
+
+ /**
+ * Gets the underlying expression representing the block. In the following
+ * example, the result is the `Expr` for `bar`:
+ * ```rb
+ * foo(&bar)
+ * ```
+ */
+ final Expr getValue() { toGenerated(result) = g.getChild() }
+
+ final override string toString() { result = "&..." }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getValue" and result = this.getValue()
+ }
+}
+
+/**
+ * A `...` expression that contains forwarded arguments.
+ * ```rb
+ * foo(...)
+ * ```
+ */
+class ForwardedArguments extends Expr, TForwardArgument {
+ private Ruby::ForwardArgument g;
+
+ ForwardedArguments() { this = TForwardArgument(g) }
+
+ final override string getAPrimaryQlClass() { result = "ForwardedArguments" }
+
+ final override string toString() { result = "..." }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Constant.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Constant.qll
new file mode 100644
index 00000000000..11683d694b7
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Constant.qll
@@ -0,0 +1,210 @@
+private import codeql.ruby.AST
+private import internal.AST
+private import internal.Module
+private import internal.Variable
+private import internal.TreeSitter
+
+/** An access to a constant. */
+class ConstantAccess extends Expr, TConstantAccess {
+ /** Gets the name of the constant being accessed. */
+ string getName() { none() }
+
+ /** Holds if the name of the constant being accessed is `name`. */
+ final predicate hasName(string name) { this.getName() = name }
+
+ /**
+ * Gets the expression used in the access's scope resolution operation, if
+ * any. In the following example, the result is the `Call` expression for
+ * `foo()`.
+ *
+ * ```rb
+ * foo()::MESSAGE
+ * ```
+ *
+ * However, there is no result for the following example, since there is no
+ * scope resolution operation.
+ *
+ * ```rb
+ * MESSAGE
+ * ```
+ */
+ Expr getScopeExpr() { none() }
+
+ /**
+ * Holds if the access uses the scope resolution operator to refer to the
+ * global scope, as in this example:
+ *
+ * ```rb
+ * ::MESSAGE
+ * ```
+ */
+ predicate hasGlobalScope() { none() }
+
+ override string toString() { result = this.getName() }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getScopeExpr" and result = this.getScopeExpr()
+ }
+}
+
+private class TokenConstantAccess extends ConstantAccess, TTokenConstantAccess {
+ private Ruby::Constant g;
+
+ TokenConstantAccess() { this = TTokenConstantAccess(g) }
+
+ final override string getName() { result = g.getValue() }
+}
+
+private class ScopeResolutionConstantAccess extends ConstantAccess, TScopeResolutionConstantAccess {
+ private Ruby::ScopeResolution g;
+ private Ruby::Constant constant;
+
+ ScopeResolutionConstantAccess() { this = TScopeResolutionConstantAccess(g, constant) }
+
+ final override string getName() { result = constant.getValue() }
+
+ final override Expr getScopeExpr() { toGenerated(result) = g.getScope() }
+
+ final override predicate hasGlobalScope() { not exists(g.getScope()) }
+}
+
+private class ConstantReadAccessSynth extends ConstantAccess, TConstantReadAccessSynth {
+ private string value;
+
+ ConstantReadAccessSynth() { this = TConstantReadAccessSynth(_, _, value) }
+
+ final override string getName() {
+ if this.hasGlobalScope() then result = value.suffix(2) else result = value
+ }
+
+ final override Expr getScopeExpr() { synthChild(this, 0, result) }
+
+ final override predicate hasGlobalScope() { value.matches("::%") }
+}
+
+/**
+ * A use (read) of a constant.
+ *
+ * For example, the right-hand side of the assignment in:
+ *
+ * ```rb
+ * x = Foo
+ * ```
+ *
+ * Or the superclass `Bar` in this example:
+ *
+ * ```rb
+ * class Foo < Bar
+ * end
+ * ```
+ */
+class ConstantReadAccess extends ConstantAccess {
+ ConstantReadAccess() {
+ not this instanceof ConstantWriteAccess
+ or
+ // `X` in `X ||= 10` is considered both a read and a write
+ this = any(AssignOperation a).getLeftOperand()
+ or
+ this instanceof TConstantReadAccessSynth
+ }
+
+ /**
+ * Gets the value being read, if any. For example, in
+ *
+ * ```rb
+ * module M
+ * CONST = "const"
+ * end
+ *
+ * puts M::CONST
+ * ```
+ *
+ * the value being read at `M::CONST` is `"const"`.
+ */
+ Expr getValue() {
+ not exists(this.getScopeExpr()) and
+ result = lookupConst(this.getEnclosingModule+().getModule(), this.getName()) and
+ // For now, we restrict the scope of top-level declarations to their file.
+ // This may remove some plausible targets, but also removes a lot of
+ // implausible targets
+ if result.getEnclosingModule() instanceof Toplevel
+ then result.getFile() = this.getFile()
+ else any()
+ or
+ this.hasGlobalScope() and
+ result = lookupConst(TResolved("Object"), this.getName())
+ or
+ result = lookupConst(resolveScopeExpr(this.getScopeExpr()), this.getName())
+ }
+
+ override string getValueText() { result = this.getValue().getValueText() }
+
+ final override string getAPrimaryQlClass() { result = "ConstantReadAccess" }
+}
+
+/**
+ * A definition of a constant.
+ *
+ * Examples:
+ *
+ * ```rb
+ * Foo = 1 # defines constant Foo as an integer
+ * M::Foo = 1 # defines constant Foo as an integer in module M
+ *
+ * class Bar; end # defines constant Bar as a class
+ * class M::Bar; end # defines constant Bar as a class in module M
+ *
+ * module Baz; end # defines constant Baz as a module
+ * module M::Baz; end # defines constant Baz as a module in module M
+ * ```
+ */
+class ConstantWriteAccess extends ConstantAccess {
+ ConstantWriteAccess() {
+ explicitAssignmentNode(toGenerated(this), _) or this instanceof TNamespace
+ }
+
+ override string getAPrimaryQlClass() { result = "ConstantWriteAccess" }
+
+ /**
+ * Gets the fully qualified name for this constant, based on the context in
+ * which it is defined.
+ *
+ * For example, given
+ * ```rb
+ * module Foo
+ * module Bar
+ * class Baz
+ * end
+ * end
+ * CONST_A = "a"
+ * end
+ * ```
+ *
+ * the constant `Baz` has the fully qualified name `Foo::Bar::Baz`, and
+ * `CONST_A` has the fully qualified name `Foo::CONST_A`.
+ */
+ string getQualifiedName() {
+ /* get the qualified name for the parent module, then append w */
+ exists(ConstantWriteAccess parent | parent = this.getEnclosingModule() |
+ result = parent.getQualifiedName() + "::" + this.getName()
+ )
+ or
+ /* base case - there's no parent module */
+ not exists(ConstantWriteAccess parent | parent = this.getEnclosingModule()) and
+ result = this.getName()
+ }
+}
+
+/**
+ * A definition of a constant via assignment. For example, the left-hand
+ * operand in the following example:
+ *
+ * ```rb
+ * MAX_SIZE = 100
+ * ```
+ */
+class ConstantAssignment extends ConstantWriteAccess, LhsExpr {
+ override string getAPrimaryQlClass() { result = "ConstantAssignment" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Control.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Control.qll
new file mode 100644
index 00000000000..33f52c02413
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Control.qll
@@ -0,0 +1,611 @@
+private import codeql.ruby.AST
+private import internal.AST
+private import internal.TreeSitter
+
+/**
+ * A control expression that can be any of the following:
+ * - `case`
+ * - `if`/`unless` (including expression-modifier variants)
+ * - ternary-if (`?:`)
+ * - `while`/`until` (including expression-modifier variants)
+ * - `for`
+ */
+class ControlExpr extends Expr, TControlExpr { }
+
+/**
+ * A conditional expression: `if`/`unless` (including expression-modifier
+ * variants), and ternary-if (`?:`) expressions.
+ */
+class ConditionalExpr extends ControlExpr, TConditionalExpr {
+ /**
+ * Gets the condition expression. For example, the result is `foo` in the
+ * following:
+ * ```rb
+ * if foo
+ * bar = 1
+ * end
+ * ```
+ */
+ Expr getCondition() { none() }
+
+ /**
+ * Gets the branch of this conditional expression that is taken when the
+ * condition evaluates to `cond`, if any.
+ */
+ Stmt getBranch(boolean cond) { none() }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getCondition" and result = this.getCondition()
+ or
+ pred = "getBranch" and result = this.getBranch(_)
+ }
+}
+
+/**
+ * An `if` or `elsif` expression.
+ * ```rb
+ * if x
+ * a += 1
+ * elsif y
+ * a += 2
+ * end
+ * ```
+ */
+class IfExpr extends ConditionalExpr, TIfExpr {
+ final override string getAPrimaryQlClass() { result = "IfExpr" }
+
+ /** Holds if this is an `elsif` expression. */
+ predicate isElsif() { none() }
+
+ /** Gets the 'then' branch of this `if`/`elsif` expression. */
+ Stmt getThen() { none() }
+
+ /**
+ * Gets the `elsif`/`else` branch of this `if`/`elsif` expression, if any. In
+ * the following example, the result is a `StmtSequence` containing `b`.
+ * ```rb
+ * if foo
+ * a
+ * else
+ * b
+ * end
+ * ```
+ * But there is no result for the following:
+ * ```rb
+ * if foo
+ * a
+ * end
+ * ```
+ * There can be at most one result, since `elsif` branches nest. In the
+ * following example, `ifExpr.getElse()` returns an `ElsifExpr`, and the
+ * `else` branch is nested inside that. To get the `StmtSequence` for the
+ * `else` branch, i.e. the one containing `c`, use
+ * `getElse().(ElsifExpr).getElse()`.
+ * ```rb
+ * if foo
+ * a
+ * elsif bar
+ * b
+ * else
+ * c
+ * end
+ * ```
+ */
+ Stmt getElse() { none() }
+
+ final override Stmt getBranch(boolean cond) {
+ cond = true and result = this.getThen()
+ or
+ cond = false and result = this.getElse()
+ }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getThen" and result = this.getThen()
+ or
+ pred = "getElse" and result = this.getElse()
+ }
+}
+
+private class If extends IfExpr, TIf {
+ private Ruby::If g;
+
+ If() { this = TIf(g) }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ final override Stmt getThen() { toGenerated(result) = g.getConsequence() }
+
+ final override Stmt getElse() { toGenerated(result) = g.getAlternative() }
+
+ final override string toString() { result = "if ..." }
+}
+
+private class Elsif extends IfExpr, TElsif {
+ private Ruby::Elsif g;
+
+ Elsif() { this = TElsif(g) }
+
+ final override predicate isElsif() { any() }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ final override Stmt getThen() { toGenerated(result) = g.getConsequence() }
+
+ final override Stmt getElse() { toGenerated(result) = g.getAlternative() }
+
+ final override string toString() { result = "elsif ..." }
+}
+
+/**
+ * An `unless` expression.
+ * ```rb
+ * unless x == 0
+ * y /= x
+ * end
+ * ```
+ */
+class UnlessExpr extends ConditionalExpr, TUnlessExpr {
+ private Ruby::Unless g;
+
+ UnlessExpr() { this = TUnlessExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "UnlessExpr" }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ /**
+ * Gets the 'then' branch of this `unless` expression. In the following
+ * example, the result is the `StmtSequence` containing `foo`.
+ * ```rb
+ * unless a == b then
+ * foo
+ * else
+ * bar
+ * end
+ * ```
+ */
+ final Stmt getThen() { toGenerated(result) = g.getConsequence() }
+
+ /**
+ * Gets the 'else' branch of this `unless` expression. In the following
+ * example, the result is the `StmtSequence` containing `bar`.
+ * ```rb
+ * unless a == b then
+ * foo
+ * else
+ * bar
+ * end
+ * ```
+ */
+ final Stmt getElse() { toGenerated(result) = g.getAlternative() }
+
+ final override Expr getBranch(boolean cond) {
+ cond = false and result = getThen()
+ or
+ cond = true and result = getElse()
+ }
+
+ final override string toString() { result = "unless ..." }
+
+ override AstNode getAChild(string pred) {
+ result = ConditionalExpr.super.getAChild(pred)
+ or
+ pred = "getThen" and result = this.getThen()
+ or
+ pred = "getElse" and result = this.getElse()
+ }
+}
+
+/**
+ * An expression modified using `if`.
+ * ```rb
+ * foo if bar
+ * ```
+ */
+class IfModifierExpr extends ConditionalExpr, TIfModifierExpr {
+ private Ruby::IfModifier g;
+
+ IfModifierExpr() { this = TIfModifierExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "IfModifierExpr" }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ final override Stmt getBranch(boolean cond) { cond = true and result = this.getBody() }
+
+ /**
+ * Gets the statement that is conditionally evaluated. In the following
+ * example, the result is the `Expr` for `foo`.
+ * ```rb
+ * foo if bar
+ * ```
+ */
+ final Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ final override string toString() { result = "... if ..." }
+
+ override AstNode getAChild(string pred) {
+ result = ConditionalExpr.super.getAChild(pred)
+ or
+ pred = "getBody" and result = this.getBody()
+ }
+}
+
+/**
+ * An expression modified using `unless`.
+ * ```rb
+ * y /= x unless x == 0
+ * ```
+ */
+class UnlessModifierExpr extends ConditionalExpr, TUnlessModifierExpr {
+ private Ruby::UnlessModifier g;
+
+ UnlessModifierExpr() { this = TUnlessModifierExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "UnlessModifierExpr" }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ final override Stmt getBranch(boolean cond) { cond = false and result = this.getBody() }
+
+ /**
+ * Gets the statement that is conditionally evaluated. In the following
+ * example, the result is the `Expr` for `foo`.
+ * ```rb
+ * foo unless bar
+ * ```
+ */
+ final Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ final override string toString() { result = "... unless ..." }
+
+ override AstNode getAChild(string pred) {
+ result = ConditionalExpr.super.getAChild(pred)
+ or
+ pred = "getBody" and result = this.getBody()
+ }
+}
+
+/**
+ * A conditional expression using the ternary (`?:`) operator.
+ * ```rb
+ * (a > b) ? a : b
+ * ```
+ */
+class TernaryIfExpr extends ConditionalExpr, TTernaryIfExpr {
+ private Ruby::Conditional g;
+
+ TernaryIfExpr() { this = TTernaryIfExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "TernaryIfExpr" }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ /** Gets the 'then' branch of this ternary if expression. */
+ final Stmt getThen() { toGenerated(result) = g.getConsequence() }
+
+ /** Gets the 'else' branch of this ternary if expression. */
+ final Stmt getElse() { toGenerated(result) = g.getAlternative() }
+
+ final override Stmt getBranch(boolean cond) {
+ cond = true and result = getThen()
+ or
+ cond = false and result = getElse()
+ }
+
+ final override string toString() { result = "... ? ... : ..." }
+
+ override AstNode getAChild(string pred) {
+ result = ConditionalExpr.super.getAChild(pred)
+ or
+ pred = "getThen" and result = this.getThen()
+ or
+ pred = "getElse" and result = this.getElse()
+ }
+}
+
+class CaseExpr extends ControlExpr, TCaseExpr {
+ private Ruby::Case g;
+
+ CaseExpr() { this = TCaseExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "CaseExpr" }
+
+ /**
+ * Gets the expression being compared, if any. For example, `foo` in the following example.
+ * ```rb
+ * case foo
+ * when 0
+ * puts 'zero'
+ * when 1
+ * puts 'one'
+ * end
+ * ```
+ * There is no result for the following example:
+ * ```rb
+ * case
+ * when a then 0
+ * when b then 1
+ * else 2
+ * end
+ * ```
+ */
+ final Expr getValue() { toGenerated(result) = g.getValue() }
+
+ /**
+ * Gets the `n`th branch of this case expression, either a `WhenExpr` or a
+ * `StmtSequence`.
+ */
+ final Expr getBranch(int n) { toGenerated(result) = g.getChild(n) }
+
+ /**
+ * Gets a branch of this case expression, either a `WhenExpr` or an
+ * `ElseExpr`.
+ */
+ final Expr getABranch() { result = this.getBranch(_) }
+
+ /** Gets a `when` branch of this case expression. */
+ final WhenExpr getAWhenBranch() { result = getABranch() }
+
+ /** Gets the `else` branch of this case expression, if any. */
+ final StmtSequence getElseBranch() { result = getABranch() }
+
+ /**
+ * Gets the number of branches of this case expression.
+ */
+ final int getNumberOfBranches() { result = count(this.getBranch(_)) }
+
+ final override string toString() { result = "case ..." }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getValue" and result = this.getValue()
+ or
+ pred = "getBranch" and result = this.getBranch(_)
+ }
+}
+
+/**
+ * A `when` branch of a `case` expression.
+ * ```rb
+ * case
+ * when a > b then x
+ * end
+ * ```
+ */
+class WhenExpr extends Expr, TWhenExpr {
+ private Ruby::When g;
+
+ WhenExpr() { this = TWhenExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "WhenExpr" }
+
+ /** Gets the body of this case-when expression. */
+ final Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ /**
+ * Gets the `n`th pattern (or condition) in this case-when expression. In the
+ * following example, the 0th pattern is `x`, the 1st pattern is `y`, and the
+ * 2nd pattern is `z`.
+ * ```rb
+ * case foo
+ * when x, y, z
+ * puts 'x/y/z'
+ * end
+ * ```
+ */
+ final Expr getPattern(int n) { toGenerated(result) = g.getPattern(n).getChild() }
+
+ /**
+ * Gets a pattern (or condition) in this case-when expression.
+ */
+ final Expr getAPattern() { result = this.getPattern(_) }
+
+ /**
+ * Gets the number of patterns in this case-when expression.
+ */
+ final int getNumberOfPatterns() { result = count(this.getPattern(_)) }
+
+ final override string toString() { result = "when ..." }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getBody" and result = this.getBody()
+ or
+ pred = "getPattern" and result = this.getPattern(_)
+ }
+}
+
+/**
+ * A loop. That is, a `for` loop, a `while` or `until` loop, or their
+ * expression-modifier variants.
+ */
+class Loop extends ControlExpr, TLoop {
+ /** Gets the body of this loop. */
+ Stmt getBody() { none() }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getBody" and result = this.getBody()
+ }
+}
+
+/**
+ * A loop using a condition expression. That is, a `while` or `until` loop, or
+ * their expression-modifier variants.
+ */
+class ConditionalLoop extends Loop, TConditionalLoop {
+ /** Gets the condition expression of this loop. */
+ Expr getCondition() { none() }
+
+ override AstNode getAChild(string pred) {
+ result = Loop.super.getAChild(pred)
+ or
+ pred = "getCondition" and result = this.getCondition()
+ }
+
+ /** Holds if the loop body is entered when the condition is `condValue`. */
+ predicate entersLoopWhenConditionIs(boolean condValue) { none() }
+}
+
+/**
+ * A `while` loop.
+ * ```rb
+ * while a < b
+ * p a
+ * a += 2
+ * end
+ * ```
+ */
+class WhileExpr extends ConditionalLoop, TWhileExpr {
+ private Ruby::While g;
+
+ WhileExpr() { this = TWhileExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "WhileExpr" }
+
+ /** Gets the body of this `while` loop. */
+ final override Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ /**
+ * Holds if the loop body is entered when the condition is `condValue`. For
+ * `while` loops, this holds when `condValue` is true.
+ */
+ final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true }
+
+ final override string toString() { result = "while ..." }
+}
+
+/**
+ * An `until` loop.
+ * ```rb
+ * until a >= b
+ * p a
+ * a += 1
+ * end
+ * ```
+ */
+class UntilExpr extends ConditionalLoop, TUntilExpr {
+ private Ruby::Until g;
+
+ UntilExpr() { this = TUntilExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "UntilExpr" }
+
+ /** Gets the body of this `until` loop. */
+ final override Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ /**
+ * Holds if the loop body is entered when the condition is `condValue`. For
+ * `until` loops, this holds when `condValue` is false.
+ */
+ final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false }
+
+ final override string toString() { result = "until ..." }
+}
+
+/**
+ * An expression looped using the `while` modifier.
+ * ```rb
+ * foo while bar
+ * ```
+ */
+class WhileModifierExpr extends ConditionalLoop, TWhileModifierExpr {
+ private Ruby::WhileModifier g;
+
+ WhileModifierExpr() { this = TWhileModifierExpr(g) }
+
+ final override Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ /**
+ * Holds if the loop body is entered when the condition is `condValue`. For
+ * `while`-modifier loops, this holds when `condValue` is true.
+ */
+ final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true }
+
+ final override string getAPrimaryQlClass() { result = "WhileModifierExpr" }
+
+ final override string toString() { result = "... while ..." }
+}
+
+/**
+ * An expression looped using the `until` modifier.
+ * ```rb
+ * foo until bar
+ * ```
+ */
+class UntilModifierExpr extends ConditionalLoop, TUntilModifierExpr {
+ private Ruby::UntilModifier g;
+
+ UntilModifierExpr() { this = TUntilModifierExpr(g) }
+
+ final override Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ final override Expr getCondition() { toGenerated(result) = g.getCondition() }
+
+ /**
+ * Holds if the loop body is entered when the condition is `condValue`. For
+ * `until`-modifier loops, this holds when `condValue` is false.
+ */
+ final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false }
+
+ final override string getAPrimaryQlClass() { result = "UntilModifierExpr" }
+
+ final override string toString() { result = "... until ..." }
+}
+
+/**
+ * A `for` loop.
+ * ```rb
+ * for val in 1..n
+ * sum += val
+ * end
+ * ```
+ */
+class ForExpr extends Loop, TForExpr {
+ private Ruby::For g;
+
+ ForExpr() { this = TForExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "ForExpr" }
+
+ /** Gets the body of this `for` loop. */
+ final override Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ /** Gets the pattern representing the iteration argument. */
+ final Pattern getPattern() { toGenerated(result) = g.getPattern() }
+
+ /**
+ * Gets the value being iterated over. In the following example, the result
+ * is the expression `1..10`:
+ * ```rb
+ * for n in 1..10 do
+ * puts n
+ * end
+ * ```
+ */
+ final Expr getValue() { toGenerated(result) = g.getValue().getChild() }
+
+ final override string toString() { result = "for ... in ..." }
+
+ override AstNode getAChild(string pred) {
+ result = Loop.super.getAChild(pred)
+ or
+ pred = "getPattern" and result = this.getPattern()
+ or
+ pred = "getValue" and result = this.getValue()
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Erb.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Erb.qll
new file mode 100644
index 00000000000..52b14b70aa6
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Erb.qll
@@ -0,0 +1,313 @@
+private import codeql.Locations
+private import codeql.ruby.AST
+private import internal.Erb
+private import internal.TreeSitter
+
+/**
+ * A node in the ERB abstract syntax tree. This class is the base class for all
+ * ERB elements.
+ */
+class ErbAstNode extends TAstNode {
+ /** Gets a textual representation of this node. */
+ cached
+ string toString() { none() }
+
+ /** Gets the location of this node. */
+ Location getLocation() { result = getLocation(this) }
+
+ /**
+ * Gets the name of a primary CodeQL class to which this node belongs.
+ *
+ * This predicate always has a result. If no primary class can be
+ * determined, the result is `"???"`. If multiple primary classes match,
+ * this predicate can have multiple results.
+ */
+ string getAPrimaryQlClass() { result = "???" }
+}
+
+/**
+ * An ERB template. This can contain multiple directives to be executed when
+ * the template is compiled.
+ */
+class ErbTemplate extends TTemplate, ErbAstNode {
+ private Erb::Template g;
+
+ ErbTemplate() { this = TTemplate(g) }
+
+ override string toString() { result = "erb template" }
+
+ final override string getAPrimaryQlClass() { result = "ErbTemplate" }
+
+ ErbAstNode getAChildNode() { toGenerated(result) = g.getChild(_) }
+}
+
+// Truncate the token string value to 32 char max
+bindingset[val]
+private string displayToken(string val) {
+ val.length() <= 32 and result = val
+ or
+ val.length() > 32 and result = val.prefix(29) + "..."
+}
+
+/**
+ * An ERB token. This could be embedded code, a comment, or arbitrary text.
+ */
+class ErbToken extends TTokenNode, ErbAstNode {
+ override string toString() { result = displayToken(this.getValue()) }
+
+ /** Gets the string value of this token. */
+ string getValue() { exists(Erb::Token g | this = fromGenerated(g) | result = g.getValue()) }
+
+ override string getAPrimaryQlClass() { result = "ErbToken" }
+}
+
+/**
+ * An ERB token appearing within a comment directive.
+ */
+class ErbComment extends ErbToken {
+ private Erb::Comment g;
+
+ ErbComment() { this = TComment(g) }
+
+ override string getValue() { result = g.getValue() }
+
+ final override string getAPrimaryQlClass() { result = "ErbComment" }
+}
+
+/**
+ * An ERB token appearing within a code directive. This will typically be
+ * interpreted as Ruby code or a GraphQL query, depending on context.
+ */
+class ErbCode extends ErbToken {
+ private Erb::Code g;
+
+ ErbCode() { this = TCode(g) }
+
+ override string getValue() { result = g.getValue() }
+
+ final override string getAPrimaryQlClass() { result = "ErbCode" }
+}
+
+bindingset[line, col]
+private predicate locationIncludesPosition(Location loc, int line, int col) {
+ // position between start and end line, exclusive
+ line > loc.getStartLine() and
+ line < loc.getEndLine()
+ or
+ // position on start line, multi line location
+ line = loc.getStartLine() and
+ not loc.getStartLine() = loc.getEndLine() and
+ col >= loc.getStartColumn()
+ or
+ // position on end line, multi line location
+ line = loc.getEndLine() and
+ not loc.getStartLine() = loc.getEndLine() and
+ col <= loc.getEndColumn()
+ or
+ // single line location, position between start and end column
+ line = loc.getStartLine() and
+ loc.getStartLine() = loc.getEndLine() and
+ col >= loc.getStartColumn() and
+ col <= loc.getEndColumn()
+}
+
+/** A file containing an ERB directive. */
+private class ErbDirectiveFile extends File {
+ pragma[nomagic]
+ ErbDirectiveFile() { this = any(ErbDirective dir).getLocation().getFile() }
+
+ /** Gets a statement in this file. */
+ pragma[nomagic]
+ Stmt getAStmt(int startLine, int startColumn) {
+ exists(Location loc |
+ result.getLocation() = loc and
+ loc.getFile() = this and
+ loc.getStartLine() = startLine and
+ loc.getStartColumn() = startColumn
+ )
+ }
+}
+
+/**
+ * A directive in an ERB template.
+ */
+class ErbDirective extends TDirectiveNode, ErbAstNode {
+ /** Holds if this directive spans line `line` in the file `file`. */
+ pragma[nomagic]
+ private predicate spans(ErbDirectiveFile file, int line) {
+ exists(Location loc |
+ loc = this.getLocation() and
+ file = loc.getFile() and
+ line in [loc.getStartLine() .. loc.getEndLine()]
+ )
+ }
+
+ private predicate containsStmtStart(Stmt s) {
+ // `Toplevel` statements are not contained within individual directives,
+ // though their start location may appear within a directive location
+ not s instanceof Toplevel and
+ exists(ErbDirectiveFile file, int startLine, int startColumn |
+ this.spans(file, startLine) and
+ s = file.getAStmt(startLine, startColumn) and
+ locationIncludesPosition(this.getLocation(), startLine, startColumn)
+ )
+ }
+
+ /**
+ * Gets a statement that starts in directive that is not a child of any other
+ * statement starting in this directive.
+ */
+ Stmt getAChildStmt() {
+ this.containsStmtStart(result) and
+ not this.containsStmtStart(result.getParent())
+ }
+
+ /**
+ * Gets the last child statement in this directive.
+ * See `getAChildStmt` for more details.
+ */
+ Stmt getTerminalStmt() {
+ result = this.getAChildStmt() and
+ forall(Stmt s | s = this.getAChildStmt() and not s = result |
+ s.getLocation().strictlyBefore(result.getLocation())
+ )
+ }
+
+ /** Gets the child token of this directive. */
+ ErbToken getToken() {
+ exists(Erb::Directive g | this = fromGenerated(g) | toGenerated(result) = g.getChild())
+ }
+
+ override string toString() { result = "erb directive" }
+
+ override string getAPrimaryQlClass() { result = "ErbDirective" }
+}
+
+/**
+ * A comment directive in an ERB template.
+ * ```erb
+ * <%#= 2 + 2 %>
+ * <%# for x in xs do %>
+ * ```
+ */
+class ErbCommentDirective extends ErbDirective {
+ private Erb::CommentDirective g;
+
+ ErbCommentDirective() { this = TCommentDirective(g) }
+
+ override ErbComment getToken() { toGenerated(result) = g.getChild() }
+
+ final override string toString() { result = "<%#" + this.getToken().toString() + "%>" }
+
+ final override string getAPrimaryQlClass() { result = "ErbCommentDirective" }
+}
+
+/**
+ * A GraphQL directive in an ERB template.
+ * ```erb
+ * <%graphql
+ * fragment Foo on Bar {
+ * some {
+ * queryText
+ * moreProperties
+ * }
+ * }
+ * %>
+ * ```
+ */
+class ErbGraphqlDirective extends ErbDirective {
+ private Erb::GraphqlDirective g;
+
+ ErbGraphqlDirective() { this = TGraphqlDirective(g) }
+
+ override ErbCode getToken() { toGenerated(result) = g.getChild() }
+
+ final override string toString() { result = "<%graphql" + this.getToken().toString() + "%>" }
+
+ final override string getAPrimaryQlClass() { result = "ErbGraphqlDirective" }
+}
+
+/**
+ * An output directive in an ERB template.
+ * ```erb
+ * <%=
+ * fragment Foo on Bar {
+ * some {
+ * queryText
+ * moreProperties
+ * }
+ * }
+ * %>
+ * ```
+ */
+class ErbOutputDirective extends ErbDirective {
+ private Erb::OutputDirective g;
+
+ ErbOutputDirective() { this = TOutputDirective(g) }
+
+ override ErbCode getToken() { toGenerated(result) = g.getChild() }
+
+ final override string toString() { result = "<%=" + this.getToken().toString() + "%>" }
+
+ final override string getAPrimaryQlClass() { result = "ErbOutputDirective" }
+}
+
+/**
+ * An execution directive in an ERB template.
+ * This code will be executed as Ruby, but not rendered.
+ * ```erb
+ * <% books = author.books
+ * for book in books do %>
+ * ```
+ */
+class ErbExecutionDirective extends ErbDirective {
+ private Erb::Directive g;
+
+ ErbExecutionDirective() { this = TDirective(g) }
+
+ final override string toString() { result = "<%" + this.getToken().toString() + "%>" }
+
+ final override string getAPrimaryQlClass() { result = "ErbExecutionDirective" }
+}
+
+/**
+ * A `File` containing an Embedded Ruby template.
+ * This is typically a file containing snippets of Ruby code that can be
+ * evaluated to create a compiled version of the file.
+ */
+class ErbFile extends File {
+ private ErbTemplate template;
+
+ ErbFile() { this = template.getLocation().getFile() }
+
+ /**
+ * Holds if the file represents a partial to be rendered in the context of
+ * another template.
+ */
+ predicate isPartial() { this.getStem().charAt(0) = "_" }
+
+ /**
+ * Gets the base template name associated with this ERB file.
+ * For instance, a file named `foo.html.erb` has a template name of `foo`.
+ * A partial template file named `_item.html.erb` has a template name of `item`.
+ */
+ string getTemplateName() { none() }
+
+ /**
+ * Gets the erb template contained within this file.
+ */
+ ErbTemplate getTemplate() { result = template }
+}
+
+private class PartialErbFile extends ErbFile {
+ PartialErbFile() { this.isPartial() }
+
+ // Drop the leading underscore
+ override string getTemplateName() { result = this.getStem().splitAt(".", 0).suffix(1) }
+}
+
+private class FullErbFile extends ErbFile {
+ FullErbFile() { not this.isPartial() }
+
+ override string getTemplateName() { result = this.getStem().splitAt(".", 0) }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Expr.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Expr.qll
new file mode 100644
index 00000000000..46b5bdd3d36
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Expr.qll
@@ -0,0 +1,456 @@
+private import codeql.ruby.AST
+private import codeql.ruby.CFG
+private import internal.AST
+private import internal.TreeSitter
+
+/**
+ * An expression.
+ *
+ * This is the root QL class for all expressions.
+ */
+class Expr extends Stmt, TExpr {
+ /** Gets the textual (constant) value of this expression, if any. */
+ string getValueText() {
+ forex(CfgNodes::ExprCfgNode n | n = this.getAControlFlowNode() | result = n.getValueText())
+ }
+}
+
+/**
+ * A reference to the current object. For example:
+ * - `self == other`
+ * - `self.method_name`
+ * - `def self.method_name ... end`
+ *
+ * This also includes implicit references to the current object in method
+ * calls. For example, the method call `foo(123)` has an implicit `self`
+ * receiver, and is equivalent to the explicit `self.foo(123)`.
+ */
+class Self extends Expr, TSelf {
+ final override string getAPrimaryQlClass() { result = "Self" }
+
+ final override string toString() { result = "self" }
+}
+
+/**
+ * A sequence of expressions in the right-hand side of an assignment or
+ * a `return`, `break` or `next` statement.
+ * ```rb
+ * x = 1, *items, 3, *more
+ * return 1, 2
+ * next *list
+ * break **map
+ * return 1, 2, *items, k: 5, **map
+ * ```
+ */
+class ArgumentList extends Expr, TArgumentList {
+ private Ruby::AstNode g;
+
+ ArgumentList() { this = TArgumentList(g) }
+
+ /** Gets the `i`th element in this argument list. */
+ Expr getElement(int i) {
+ toGenerated(result) in [
+ g.(Ruby::ArgumentList).getChild(i), g.(Ruby::RightAssignmentList).getChild(i)
+ ]
+ }
+
+ final override string getAPrimaryQlClass() { result = "ArgumentList" }
+
+ final override string toString() { result = "..., ..." }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getElement" and result = this.getElement(_)
+ }
+}
+
+/** A sequence of expressions. */
+class StmtSequence extends Expr, TStmtSequence {
+ override string getAPrimaryQlClass() { result = "StmtSequence" }
+
+ /** Gets the `n`th statement in this sequence. */
+ Stmt getStmt(int n) { none() }
+
+ /** Gets a statement in this sequence. */
+ final Stmt getAStmt() { result = this.getStmt(_) }
+
+ /** Gets the last statement in this sequence, if any. */
+ final Stmt getLastStmt() { result = this.getStmt(this.getNumberOfStatements() - 1) }
+
+ /** Gets the number of statements in this sequence. */
+ final int getNumberOfStatements() { result = count(this.getAStmt()) }
+
+ /** Holds if this sequence has no statements. */
+ final predicate isEmpty() { this.getNumberOfStatements() = 0 }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getStmt" and result = this.getStmt(_)
+ }
+}
+
+private class StmtSequenceSynth extends StmtSequence, TStmtSequenceSynth {
+ final override Stmt getStmt(int n) { synthChild(this, n, result) }
+
+ final override string toString() { result = "..." }
+}
+
+private class Then extends StmtSequence, TThen {
+ private Ruby::Then g;
+
+ Then() { this = TThen(g) }
+
+ override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+
+ final override string toString() { result = "then ..." }
+}
+
+private class Else extends StmtSequence, TElse {
+ private Ruby::Else g;
+
+ Else() { this = TElse(g) }
+
+ override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+
+ final override string toString() { result = "else ..." }
+}
+
+private class Do extends StmtSequence, TDo {
+ private Ruby::Do g;
+
+ Do() { this = TDo(g) }
+
+ override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+
+ final override string toString() { result = "do ..." }
+}
+
+private class Ensure extends StmtSequence, TEnsure {
+ private Ruby::Ensure g;
+
+ Ensure() { this = TEnsure(g) }
+
+ override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+
+ final override string toString() { result = "ensure ..." }
+}
+
+/**
+ * A sequence of statements representing the body of a method, class, module,
+ * or do-block. That is, any body that may also include rescue/ensure/else
+ * statements.
+ */
+class BodyStmt extends StmtSequence, TBodyStmt {
+ // Not defined by dispatch, as it should not be exposed
+ private Ruby::AstNode getChild(int i) {
+ result = any(Ruby::Method g | this = TMethod(g)).getChild(i)
+ or
+ result = any(Ruby::SingletonMethod g | this = TSingletonMethod(g)).getChild(i)
+ or
+ exists(Ruby::Lambda g | this = TLambda(g) |
+ result = g.getBody().(Ruby::DoBlock).getChild(i) or
+ result = g.getBody().(Ruby::Block).getChild(i)
+ )
+ or
+ result = any(Ruby::DoBlock g | this = TDoBlock(g)).getChild(i)
+ or
+ result = any(Ruby::Program g | this = TToplevel(g)).getChild(i) and
+ not result instanceof Ruby::BeginBlock
+ or
+ result = any(Ruby::Class g | this = TClassDeclaration(g)).getChild(i)
+ or
+ result = any(Ruby::SingletonClass g | this = TSingletonClass(g)).getChild(i)
+ or
+ result = any(Ruby::Module g | this = TModuleDeclaration(g)).getChild(i)
+ or
+ result = any(Ruby::Begin g | this = TBeginExpr(g)).getChild(i)
+ }
+
+ final override Stmt getStmt(int n) {
+ result =
+ rank[n + 1](AstNode node, int i |
+ toGenerated(node) = this.getChild(i) and
+ not node instanceof Else and
+ not node instanceof RescueClause and
+ not node instanceof Ensure
+ |
+ node order by i
+ )
+ }
+
+ /** Gets the `n`th rescue clause in this block. */
+ final RescueClause getRescue(int n) {
+ result =
+ rank[n + 1](RescueClause node, int i | toGenerated(node) = getChild(i) | node order by i)
+ }
+
+ /** Gets a rescue clause in this block. */
+ final RescueClause getARescue() { result = this.getRescue(_) }
+
+ /** Gets the `else` clause in this block, if any. */
+ final StmtSequence getElse() { result = unique(Else s | toGenerated(s) = getChild(_)) }
+
+ /** Gets the `ensure` clause in this block, if any. */
+ final StmtSequence getEnsure() { result = unique(Ensure s | toGenerated(s) = getChild(_)) }
+
+ final predicate hasEnsure() { exists(this.getEnsure()) }
+
+ override AstNode getAChild(string pred) {
+ result = StmtSequence.super.getAChild(pred)
+ or
+ pred = "getRescue" and result = this.getRescue(_)
+ or
+ pred = "getElse" and result = this.getElse()
+ or
+ pred = "getEnsure" and result = this.getEnsure()
+ }
+}
+
+/**
+ * A parenthesized expression sequence, typically containing a single expression:
+ * ```rb
+ * (x + 1)
+ * ```
+ * However, they can also contain multiple expressions (the value of the parenthesized
+ * expression is the last expression):
+ * ```rb
+ * (foo; bar)
+ * ```
+ * or even an empty sequence (value is `nil`):
+ * ```rb
+ * ()
+ * ```
+ */
+class ParenthesizedExpr extends StmtSequence, TParenthesizedExpr {
+ private Ruby::ParenthesizedStatements g;
+
+ ParenthesizedExpr() { this = TParenthesizedExpr(g) }
+
+ final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+
+ final override string getAPrimaryQlClass() { result = "ParenthesizedExpr" }
+
+ final override string toString() { result = "( ... )" }
+}
+
+/**
+ * A pair expression. For example, in a hash:
+ * ```rb
+ * { foo: bar }
+ * ```
+ * Or a keyword argument:
+ * ```rb
+ * baz(qux: 1)
+ * ```
+ */
+class Pair extends Expr, TPair {
+ private Ruby::Pair g;
+
+ Pair() { this = TPair(g) }
+
+ final override string getAPrimaryQlClass() { result = "Pair" }
+
+ /**
+ * Gets the key expression of this pair. For example, the `SymbolLiteral`
+ * representing the keyword `foo` in the following example:
+ * ```rb
+ * bar(foo: 123)
+ * ```
+ * Or the `StringLiteral` for `'foo'` in the following hash pair:
+ * ```rb
+ * { 'foo' => 123 }
+ * ```
+ */
+ final Expr getKey() { toGenerated(result) = g.getKey() }
+
+ /**
+ * Gets the value expression of this pair. For example, the `InteralLiteral`
+ * 123 in the following hash pair:
+ * ```rb
+ * { 'foo' => 123 }
+ * ```
+ */
+ final Expr getValue() { toGenerated(result) = g.getValue() }
+
+ final override string toString() { result = "Pair" }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getKey" and result = this.getKey()
+ or
+ pred = "getValue" and result = this.getValue()
+ }
+}
+
+/**
+ * A rescue clause. For example:
+ * ```rb
+ * begin
+ * write_file
+ * rescue StandardError => msg
+ * puts msg
+ * end
+ */
+class RescueClause extends Expr, TRescueClause {
+ private Ruby::Rescue g;
+
+ RescueClause() { this = TRescueClause(g) }
+
+ final override string getAPrimaryQlClass() { result = "RescueClause" }
+
+ /**
+ * Gets the `n`th exception to match, if any. For example `FirstError` or `SecondError` in:
+ * ```rb
+ * begin
+ * do_something
+ * rescue FirstError, SecondError => e
+ * handle_error(e)
+ * end
+ * ```
+ */
+ final Expr getException(int n) { toGenerated(result) = g.getExceptions().getChild(n) }
+
+ /**
+ * Gets an exception to match, if any. For example `FirstError` or `SecondError` in:
+ * ```rb
+ * begin
+ * do_something
+ * rescue FirstError, SecondError => e
+ * handle_error(e)
+ * end
+ * ```
+ */
+ final Expr getAnException() { result = this.getException(_) }
+
+ /**
+ * Gets the variable to which to assign the matched exception, if any.
+ * For example `err` in:
+ * ```rb
+ * begin
+ * do_something
+ * rescue StandardError => err
+ * handle_error(err)
+ * end
+ * ```
+ */
+ final LhsExpr getVariableExpr() { toGenerated(result) = g.getVariable().getChild() }
+
+ /**
+ * Gets the exception handler body.
+ */
+ final StmtSequence getBody() { toGenerated(result) = g.getBody() }
+
+ final override string toString() { result = "rescue ..." }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getException" and result = this.getException(_)
+ or
+ pred = "getVariableExpr" and result = this.getVariableExpr()
+ or
+ pred = "getBody" and result = this.getBody()
+ }
+}
+
+/**
+ * An expression with a `rescue` modifier. For example:
+ * ```rb
+ * contents = read_file rescue ""
+ * ```
+ */
+class RescueModifierExpr extends Expr, TRescueModifierExpr {
+ private Ruby::RescueModifier g;
+
+ RescueModifierExpr() { this = TRescueModifierExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "RescueModifierExpr" }
+
+ /**
+ * Gets the body of this `RescueModifierExpr`.
+ * ```rb
+ * body rescue handler
+ * ```
+ */
+ final Stmt getBody() { toGenerated(result) = g.getBody() }
+
+ /**
+ * Gets the exception handler of this `RescueModifierExpr`.
+ * ```rb
+ * body rescue handler
+ * ```
+ */
+ final Stmt getHandler() { toGenerated(result) = g.getHandler() }
+
+ final override string toString() { result = "... rescue ..." }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getBody" and result = this.getBody()
+ or
+ pred = "getHandler" and result = this.getHandler()
+ }
+}
+
+/**
+ * A concatenation of string literals.
+ *
+ * ```rb
+ * "foo" "bar" "baz"
+ * ```
+ */
+class StringConcatenation extends Expr, TStringConcatenation {
+ private Ruby::ChainedString g;
+
+ StringConcatenation() { this = TStringConcatenation(g) }
+
+ final override string getAPrimaryQlClass() { result = "StringConcatenation" }
+
+ /** Gets the `n`th string literal in this concatenation. */
+ final StringLiteral getString(int n) { toGenerated(result) = g.getChild(n) }
+
+ /** Gets a string literal in this concatenation. */
+ final StringLiteral getAString() { result = this.getString(_) }
+
+ /** Gets the number of string literals in this concatenation. */
+ final int getNumberOfStrings() { result = count(this.getString(_)) }
+
+ /**
+ * Gets the result of concatenating all the string literals, if and only if
+ * they do not contain any interpolations.
+ *
+ * For the following example, the result is `"foobar"`:
+ *
+ * ```rb
+ * "foo" 'bar'
+ * ```
+ *
+ * And for the following example, where one of the string literals includes
+ * an interpolation, there is no result:
+ *
+ * ```rb
+ * "foo" "bar#{ n }"
+ * ```
+ */
+ final string getConcatenatedValueText() {
+ forall(StringLiteral c | c = this.getString(_) | exists(c.getValueText())) and
+ result =
+ concat(string valueText, int i |
+ valueText = this.getString(i).getValueText()
+ |
+ valueText order by i
+ )
+ }
+
+ final override string toString() { result = "\"...\" \"...\"" }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getString" and result = this.getString(_)
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Literal.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Literal.qll
new file mode 100644
index 00000000000..3e9714e3ce6
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Literal.qll
@@ -0,0 +1,892 @@
+private import codeql.ruby.AST
+private import codeql.ruby.regexp.RegExpTreeView as RETV
+private import internal.AST
+private import internal.Scope
+private import internal.TreeSitter
+
+/**
+ * A literal.
+ *
+ * This is the QL root class for all literals.
+ */
+class Literal extends Expr, TLiteral {
+ /**
+ * Gets the source text for this literal, if this is a simple literal.
+ *
+ * For complex literals, such as arrays, hashes, and strings with
+ * interpolations, this predicate has no result.
+ */
+ override string getValueText() { none() }
+}
+
+/**
+ * A numeric literal, i.e. an integer, floating-point, rational, or complex
+ * value.
+ *
+ * ```rb
+ * 123
+ * 0xff
+ * 3.14159
+ * 1.0E2
+ * 7r
+ * 1i
+ * ```
+ */
+class NumericLiteral extends Literal, TNumericLiteral { }
+
+/**
+ * An integer literal.
+ *
+ * ```rb
+ * 123
+ * 0xff
+ * ```
+ */
+class IntegerLiteral extends NumericLiteral, TIntegerLiteral {
+ /** Gets the numerical value of this integer literal. */
+ int getValue() { none() }
+
+ final override string toString() { result = this.getValueText() }
+
+ final override string getAPrimaryQlClass() { result = "IntegerLiteral" }
+}
+
+private class IntegerLiteralReal extends IntegerLiteral, TIntegerLiteralReal {
+ private Ruby::Integer g;
+
+ IntegerLiteralReal() { this = TIntegerLiteralReal(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override int getValue() {
+ exists(string s, string values, string str |
+ s = this.getValueText().toLowerCase() and
+ (
+ s.matches("0b%") and
+ values = "01" and
+ str = s.suffix(2)
+ or
+ s.matches("0x%") and
+ values = "0123456789abcdef" and
+ str = s.suffix(2)
+ or
+ s.charAt(0) = "0" and
+ not s.charAt(1) = ["b", "x", "o"] and
+ values = "01234567" and
+ str = s.suffix(1)
+ or
+ s.matches("0o%") and
+ values = "01234567" and
+ str = s.suffix(2)
+ or
+ s.charAt(0) != "0" and values = "0123456789" and str = s
+ )
+ |
+ result =
+ sum(int index, string c, int v, int exp |
+ c = str.replaceAll("_", "").charAt(index) and
+ v = values.indexOf(c.toLowerCase()) and
+ exp = str.replaceAll("_", "").length() - index - 1
+ |
+ v * values.length().pow(exp)
+ )
+ )
+ }
+}
+
+private class IntegerLiteralSynth extends IntegerLiteral, TIntegerLiteralSynth {
+ private int value;
+
+ IntegerLiteralSynth() { this = TIntegerLiteralSynth(_, _, value) }
+
+ final override string getValueText() { result = value.toString() }
+
+ final override int getValue() { result = value }
+}
+
+/**
+ * A floating-point literal.
+ *
+ * ```rb
+ * 1.3
+ * 2.7e+5
+ * ```
+ */
+class FloatLiteral extends NumericLiteral, TFloatLiteral {
+ private Ruby::Float g;
+
+ FloatLiteral() { this = TFloatLiteral(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override string toString() { result = this.getValueText() }
+
+ final override string getAPrimaryQlClass() { result = "FloatLiteral" }
+}
+
+/**
+ * A rational literal.
+ *
+ * ```rb
+ * 123r
+ * ```
+ */
+class RationalLiteral extends NumericLiteral, TRationalLiteral {
+ private Ruby::Rational g;
+
+ RationalLiteral() { this = TRationalLiteral(g) }
+
+ final override string getValueText() { result = g.getChild().(Ruby::Token).getValue() + "r" }
+
+ final override string toString() { result = this.getValueText() }
+
+ final override string getAPrimaryQlClass() { result = "RationalLiteral" }
+}
+
+/**
+ * A complex literal.
+ *
+ * ```rb
+ * 1i
+ * ```
+ */
+class ComplexLiteral extends NumericLiteral, TComplexLiteral {
+ private Ruby::Complex g;
+
+ ComplexLiteral() { this = TComplexLiteral(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override string toString() { result = this.getValueText() }
+
+ final override string getAPrimaryQlClass() { result = "ComplexLiteral" }
+}
+
+/** A `nil` literal. */
+class NilLiteral extends Literal, TNilLiteral {
+ private Ruby::Nil g;
+
+ NilLiteral() { this = TNilLiteral(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override string toString() { result = this.getValueText() }
+
+ final override string getAPrimaryQlClass() { result = "NilLiteral" }
+}
+
+/**
+ * A Boolean literal.
+ * ```rb
+ * true
+ * false
+ * TRUE
+ * FALSE
+ * ```
+ */
+class BooleanLiteral extends Literal, TBooleanLiteral {
+ final override string getAPrimaryQlClass() { result = "BooleanLiteral" }
+
+ final override string toString() { result = this.getValueText() }
+
+ /** Holds if the Boolean literal is `true` or `TRUE`. */
+ predicate isTrue() { none() }
+
+ /** Holds if the Boolean literal is `false` or `FALSE`. */
+ predicate isFalse() { none() }
+
+ /** Gets the value of this Boolean literal. */
+ boolean getValue() {
+ this.isTrue() and result = true
+ or
+ this.isFalse() and result = false
+ }
+}
+
+private class TrueLiteral extends BooleanLiteral, TTrueLiteral {
+ private Ruby::True g;
+
+ TrueLiteral() { this = TTrueLiteral(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override predicate isTrue() { any() }
+}
+
+private class FalseLiteral extends BooleanLiteral, TFalseLiteral {
+ private Ruby::False g;
+
+ FalseLiteral() { this = TFalseLiteral(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override predicate isFalse() { any() }
+}
+
+/**
+ * The base class for a component of a string: `StringTextComponent`,
+ * `StringEscapeSequenceComponent`, or `StringInterpolationComponent`.
+ */
+class StringComponent extends AstNode, TStringComponent {
+ /**
+ * Gets the source text for this string component. Has no result if this is
+ * a `StringInterpolationComponent`.
+ */
+ string getValueText() { none() }
+}
+
+/**
+ * A component of a string (or string-like) literal that is simply text.
+ *
+ * For example, the following string literals all contain `StringTextComponent`
+ * components whose `getValueText()` returns `"foo"`:
+ *
+ * ```rb
+ * 'foo'
+ * "#{ bar() }foo"
+ * "foo#{ bar() } baz"
+ * ```
+ */
+class StringTextComponent extends StringComponent, TStringTextComponent {
+ private Ruby::Token g;
+
+ StringTextComponent() { this = TStringTextComponent(g) }
+
+ final override string toString() { result = g.getValue() }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override string getAPrimaryQlClass() { result = "StringTextComponent" }
+}
+
+/**
+ * An escape sequence component of a string or string-like literal.
+ */
+class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequenceComponent {
+ private Ruby::EscapeSequence g;
+
+ StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponent(g) }
+
+ final override string toString() { result = g.getValue() }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override string getAPrimaryQlClass() { result = "StringEscapeSequenceComponent" }
+}
+
+/**
+ * An interpolation expression component of a string or string-like literal.
+ */
+class StringInterpolationComponent extends StringComponent, StmtSequence,
+ TStringInterpolationComponent {
+ private Ruby::Interpolation g;
+
+ StringInterpolationComponent() { this = TStringInterpolationComponent(g) }
+
+ final override string toString() { result = "#{...}" }
+
+ final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+
+ final override string getValueText() { none() }
+
+ final override string getAPrimaryQlClass() { result = "StringInterpolationComponent" }
+}
+
+/**
+ * A string, symbol, regexp, or subshell literal.
+ */
+class StringlikeLiteral extends Literal, TStringlikeLiteral {
+ /**
+ * Gets the `n`th component of this string or string-like literal. The result
+ * will be one of `StringTextComponent`, `StringInterpolationComponent`, and
+ * `StringEscapeSequenceComponent`.
+ *
+ * In the following example, the result for `n = 0` is the
+ * `StringTextComponent` for `foo_`, and the result for `n = 1` is the
+ * `StringInterpolationComponent` for `Time.now`.
+ *
+ * ```rb
+ * "foo_#{ Time.now }"
+ * ```
+ */
+ StringComponent getComponent(int n) { none() }
+
+ /**
+ * Gets the number of components in this string or string-like literal.
+ *
+ * For the empty string `""`, the result is 0.
+ *
+ * For the string `"foo"`, the result is 1: there is a single
+ * `StringTextComponent`.
+ *
+ * For the following example, the result is 3: there is a
+ * `StringTextComponent` for the substring `"foo_"`; a
+ * `StringEscapeSequenceComponent` for the escaped quote; and a
+ * `StringInterpolationComponent` for the interpolation.
+ *
+ * ```rb
+ * "foo\"#{bar}"
+ * ```
+ */
+ final int getNumberOfComponents() { result = count(this.getComponent(_)) }
+
+ private string getStartDelimiter() {
+ this instanceof TStringLiteral and
+ result = "\""
+ or
+ this instanceof TRegExpLiteral and
+ result = "/"
+ or
+ this instanceof TSimpleSymbolLiteral and
+ result = ":"
+ or
+ this instanceof TComplexSymbolLiteral and
+ result = ":\""
+ or
+ this instanceof THashKeySymbolLiteral and
+ result = ""
+ or
+ this instanceof TSubshellLiteral and
+ result = "`"
+ or
+ this instanceof THereDoc and
+ result = ""
+ }
+
+ private string getEndDelimiter() {
+ this instanceof TStringLiteral and
+ result = "\""
+ or
+ this instanceof TRegExpLiteral and
+ result = "/"
+ or
+ this instanceof TSimpleSymbolLiteral and
+ result = ""
+ or
+ this instanceof TComplexSymbolLiteral and
+ result = "\""
+ or
+ this instanceof THashKeySymbolLiteral and
+ result = ""
+ or
+ this instanceof TSubshellLiteral and
+ result = "`"
+ or
+ this instanceof THereDoc and
+ result = ""
+ }
+
+ override string getValueText() {
+ // 0 components should result in the empty string
+ // if there are any interpolations, there should be no result
+ // otherwise, concatenate all the components
+ forall(StringComponent c | c = this.getComponent(_) |
+ not c instanceof StringInterpolationComponent
+ ) and
+ result =
+ concat(StringComponent c, int i | c = this.getComponent(i) | c.getValueText() order by i)
+ }
+
+ override string toString() {
+ exists(string full, string summary |
+ full =
+ concat(StringComponent c, int i, string s |
+ c = this.getComponent(i) and
+ (
+ s = toGenerated(c).(Ruby::Token).getValue()
+ or
+ not toGenerated(c) instanceof Ruby::Token and
+ s = "#{...}"
+ )
+ |
+ s order by i
+ ) and
+ (
+ // summary should be 32 chars max (incl. ellipsis)
+ full.length() > 32 and summary = full.substring(0, 29) + "..."
+ or
+ full.length() <= 32 and summary = full
+ ) and
+ result = this.getStartDelimiter() + summary + this.getEndDelimiter()
+ )
+ }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getComponent" and result = this.getComponent(_)
+ }
+}
+
+/**
+ * A string literal.
+ *
+ * ```rb
+ * 'hello'
+ * "hello, #{name}"
+ * ```
+ */
+class StringLiteral extends StringlikeLiteral, TStringLiteral {
+ final override string getAPrimaryQlClass() { result = "StringLiteral" }
+}
+
+private class RegularStringLiteral extends StringLiteral, TRegularStringLiteral {
+ private Ruby::String g;
+
+ RegularStringLiteral() { this = TRegularStringLiteral(g) }
+
+ final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) }
+}
+
+private class BareStringLiteral extends StringLiteral, TBareStringLiteral {
+ private Ruby::BareString g;
+
+ BareStringLiteral() { this = TBareStringLiteral(g) }
+
+ final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) }
+}
+
+/**
+ * A regular expression literal.
+ *
+ * ```rb
+ * /[a-z]+/
+ * ```
+ */
+class RegExpLiteral extends StringlikeLiteral, TRegExpLiteral {
+ private Ruby::Regex g;
+
+ RegExpLiteral() { this = TRegExpLiteral(g) }
+
+ final override string getAPrimaryQlClass() { result = "RegExpLiteral" }
+
+ final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
+
+ /**
+ * Gets the regexp flags as a string.
+ *
+ * ```rb
+ * /foo/ # => ""
+ * /foo/i # => "i"
+ * /foo/imxo # => "imxo"
+ */
+ final string getFlagString() {
+ // For `/foo/i`, there should be an `/i` token in the database with `this`
+ // as its parents. Strip the delimiter, which can vary.
+ result =
+ max(Ruby::Token t | t.getParent() = g | t.getValue().suffix(1) order by t.getParentIndex())
+ }
+
+ /**
+ * Holds if the regexp was specified using the `i` flag to indicate case
+ * insensitivity, as in the following example:
+ *
+ * ```rb
+ * /foo/i
+ * ```
+ */
+ final predicate hasCaseInsensitiveFlag() { this.getFlagString().charAt(_) = "i" }
+
+ /**
+ * Holds if the regex was specified using the `m` flag to indicate multiline
+ * mode. For example:
+ *
+ * ```rb
+ * /foo/m
+ * ```
+ */
+ final predicate hasMultilineFlag() { this.getFlagString().charAt(_) = "m" }
+
+ /**
+ * Holds if the regex was specified using the `x` flag to indicate
+ * 'free-spacing' mode (also known as 'extended' mode), meaning that
+ * whitespace and comments in the pattern are ignored. For example:
+ *
+ * ```rb
+ * %r{
+ * [a-zA-Z_] # starts with a letter or underscore
+ * \w* # and then zero or more letters/digits/underscores
+ * }/x
+ * ```
+ */
+ final predicate hasFreeSpacingFlag() { this.getFlagString().charAt(_) = "x" }
+
+ /** Returns the root node of the parse tree of this regular expression. */
+ final RETV::RegExpTerm getParsed() { result = RETV::getParsedRegExp(this) }
+}
+
+/**
+ * A symbol literal.
+ *
+ * ```rb
+ * :foo
+ * :"foo bar"
+ * :"foo bar #{baz}"
+ * ```
+ */
+class SymbolLiteral extends StringlikeLiteral, TSymbolLiteral {
+ final override string getAPrimaryQlClass() {
+ not this instanceof MethodName and result = "SymbolLiteral"
+ }
+}
+
+private class SimpleSymbolLiteral extends SymbolLiteral, TSimpleSymbolLiteral {
+ private Ruby::SimpleSymbol g;
+
+ SimpleSymbolLiteral() { this = TSimpleSymbolLiteral(g) }
+
+ // Tree-sitter gives us value text including the colon, which we skip.
+ final override string getValueText() { result = g.getValue().suffix(1) }
+
+ final override string toString() { result = g.getValue() }
+}
+
+private class ComplexSymbolLiteral extends SymbolLiteral, TComplexSymbolLiteral { }
+
+private class DelimitedSymbolLiteral extends ComplexSymbolLiteral, TDelimitedSymbolLiteral {
+ private Ruby::DelimitedSymbol g;
+
+ DelimitedSymbolLiteral() { this = TDelimitedSymbolLiteral(g) }
+
+ final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
+}
+
+private class BareSymbolLiteral extends ComplexSymbolLiteral, TBareSymbolLiteral {
+ private Ruby::BareSymbol g;
+
+ BareSymbolLiteral() { this = TBareSymbolLiteral(g) }
+
+ final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
+}
+
+private class HashKeySymbolLiteral extends SymbolLiteral, THashKeySymbolLiteral {
+ private Ruby::HashKeySymbol g;
+
+ HashKeySymbolLiteral() { this = THashKeySymbolLiteral(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override string toString() { result = ":" + this.getValueText() }
+}
+
+/**
+ * A subshell literal.
+ *
+ * ```rb
+ * `ls -l`
+ * %x(/bin/sh foo.sh)
+ * ```
+ */
+class SubshellLiteral extends StringlikeLiteral, TSubshellLiteral {
+ private Ruby::Subshell g;
+
+ SubshellLiteral() { this = TSubshellLiteral(g) }
+
+ final override string getAPrimaryQlClass() { result = "SubshellLiteral" }
+
+ final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
+}
+
+/**
+ * A character literal.
+ *
+ * ```rb
+ * ?a
+ * ?\u{61}
+ * ```
+ */
+class CharacterLiteral extends Literal, TCharacterLiteral {
+ private Ruby::Character g;
+
+ CharacterLiteral() { this = TCharacterLiteral(g) }
+
+ final override string getValueText() { result = g.getValue() }
+
+ final override string toString() { result = g.getValue() }
+
+ final override string getAPrimaryQlClass() { result = "CharacterLiteral" }
+}
+
+/**
+ * A "here document". For example:
+ * ```rb
+ * query = < 21
+ * SQL
+ * ```
+ */
+class HereDoc extends StringlikeLiteral, THereDoc {
+ private Ruby::HeredocBeginning g;
+
+ HereDoc() { this = THereDoc(g) }
+
+ final override string getAPrimaryQlClass() { result = "HereDoc" }
+
+ /**
+ * Holds if this here document is executed in a subshell.
+ * ```rb
+ * <<`COMMAND`
+ * echo "Hello world!"
+ * COMMAND
+ * ```
+ */
+ final predicate isSubShell() { getQuoteStyle() = "`" }
+
+ /**
+ * Gets the quotation mark (`"`, `'` or `` ` ``) that surrounds the here document identifier, if any.
+ * ```rb
+ * <<"IDENTIFIER"
+ * <<'IDENTIFIER'
+ * <<`IDENTIFIER`
+ * ```
+ */
+ final string getQuoteStyle() {
+ exists(string s |
+ s = g.getValue() and
+ s.charAt(s.length() - 1) = result and
+ result = ["'", "`", "\""]
+ )
+ }
+
+ /**
+ * Gets the indentation modifier (`-` or `~`) of the here document identifier, if any.
+ * ```rb
+ * <<~IDENTIFIER
+ * <<-IDENTIFIER
+ * < i
+ )
+ or
+ // Top-level methods are private members of the Object class
+ this.getEnclosingModule() instanceof Toplevel
+ }
+
+ final override Parameter getParameter(int n) {
+ toGenerated(result) = g.getParameters().getChild(n)
+ }
+
+ final override string toString() { result = this.getName() }
+}
+
+/** A singleton method. */
+class SingletonMethod extends MethodBase, TSingletonMethod {
+ private Ruby::SingletonMethod g;
+
+ SingletonMethod() { this = TSingletonMethod(g) }
+
+ final override string getAPrimaryQlClass() { result = "SingletonMethod" }
+
+ /** Gets the object of this singleton method. */
+ final Expr getObject() { toGenerated(result) = g.getObject() }
+
+ final override string getName() {
+ result = g.getName().(Ruby::Token).getValue()
+ or
+ result = g.getName().(Ruby::Setter).getName().getValue() + "="
+ }
+
+ final override Parameter getParameter(int n) {
+ toGenerated(result) = g.getParameters().getChild(n)
+ }
+
+ final override string toString() { result = this.getName() }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getObject" and result = this.getObject()
+ }
+}
+
+/**
+ * A lambda (anonymous method). For example:
+ * ```rb
+ * -> (x) { x + 1 }
+ * ```
+ */
+class Lambda extends Callable, BodyStmt, TLambda {
+ private Ruby::Lambda g;
+
+ Lambda() { this = TLambda(g) }
+
+ final override string getAPrimaryQlClass() { result = "Lambda" }
+
+ final override Parameter getParameter(int n) {
+ toGenerated(result) = g.getParameters().getChild(n)
+ }
+
+ final override string toString() { result = "-> { ... }" }
+
+ final override AstNode getAChild(string pred) {
+ result = Callable.super.getAChild(pred)
+ or
+ result = BodyStmt.super.getAChild(pred)
+ }
+}
+
+/** A block. */
+class Block extends Callable, StmtSequence, Scope, TBlock {
+ override AstNode getAChild(string pred) {
+ result = Callable.super.getAChild(pred)
+ or
+ result = StmtSequence.super.getAChild(pred)
+ }
+}
+
+/** A block enclosed within `do` and `end`. */
+class DoBlock extends Block, BodyStmt, TDoBlock {
+ private Ruby::DoBlock g;
+
+ DoBlock() { this = TDoBlock(g) }
+
+ final override Parameter getParameter(int n) {
+ toGenerated(result) = g.getParameters().getChild(n)
+ }
+
+ final override string toString() { result = "do ... end" }
+
+ final override AstNode getAChild(string pred) {
+ result = Block.super.getAChild(pred)
+ or
+ result = BodyStmt.super.getAChild(pred)
+ }
+
+ final override string getAPrimaryQlClass() { result = "DoBlock" }
+}
+
+/**
+ * A block defined using curly braces, e.g. in the following code:
+ * ```rb
+ * names.each { |name| puts name }
+ * ```
+ */
+class BraceBlock extends Block, TBraceBlock {
+ private Ruby::Block g;
+
+ BraceBlock() { this = TBraceBlock(g) }
+
+ final override Parameter getParameter(int n) {
+ toGenerated(result) = g.getParameters().getChild(n)
+ }
+
+ final override Stmt getStmt(int i) { toGenerated(result) = g.getChild(i) }
+
+ final override string toString() { result = "{ ... }" }
+
+ final override string getAPrimaryQlClass() { result = "BraceBlock" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Module.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Module.qll
new file mode 100644
index 00000000000..6a67c35a30d
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Module.qll
@@ -0,0 +1,365 @@
+private import codeql.ruby.AST
+private import codeql.ruby.ast.Constant
+private import internal.AST
+private import internal.Module
+private import internal.TreeSitter
+
+/**
+ * A representation of a run-time `module` or `class` value.
+ */
+class Module extends TModule {
+ /** Gets a declaration of this module, if any. */
+ ModuleBase getADeclaration() { result.getModule() = this }
+
+ /** Gets the super class of this module, if any. */
+ Module getSuperClass() { result = getSuperClass(this) }
+
+ /** Gets a `prepend`ed module. */
+ Module getAPrependedModule() { result = getAPrependedModule(this) }
+
+ /** Gets an `include`d module. */
+ Module getAnIncludedModule() { result = getAnIncludedModule(this) }
+
+ /** Holds if this module is a class. */
+ pragma[noinline]
+ predicate isClass() { this.getADeclaration() instanceof ClassDeclaration }
+
+ /** Gets a textual representation of this module. */
+ string toString() {
+ this = TResolved(result)
+ or
+ exists(Namespace n | this = TUnresolved(n) and result = "...::" + n.toString())
+ }
+
+ /** Gets the location of this module. */
+ Location getLocation() {
+ exists(Namespace n | this = TUnresolved(n) and result = n.getLocation())
+ or
+ result =
+ min(Namespace n, string qName, Location loc, int weight |
+ this = TResolved(qName) and
+ qName = namespaceDeclaration(n) and
+ loc = n.getLocation() and
+ if exists(loc.getFile().getRelativePath()) then weight = 0 else weight = 1
+ |
+ loc
+ order by
+ weight, count(n.getAStmt()) desc, loc.getFile().getAbsolutePath(), loc.getStartLine(),
+ loc.getStartColumn()
+ )
+ }
+}
+
+/**
+ * The base class for classes, singleton classes, and modules.
+ */
+class ModuleBase extends BodyStmt, Scope, TModuleBase {
+ /** Gets a method defined in this module/class. */
+ MethodBase getAMethod() { result = this.getAStmt() }
+
+ /** Gets the method named `name` in this module/class, if any. */
+ MethodBase getMethod(string name) { result = this.getAMethod() and result.getName() = name }
+
+ /** Gets a class defined in this module/class. */
+ ClassDeclaration getAClass() { result = this.getAStmt() }
+
+ /** Gets the class named `name` in this module/class, if any. */
+ ClassDeclaration getClass(string name) { result = this.getAClass() and result.getName() = name }
+
+ /** Gets a module defined in this module/class. */
+ ModuleDeclaration getAModule() { result = this.getAStmt() }
+
+ /** Gets the module named `name` in this module/class, if any. */
+ ModuleDeclaration getModule(string name) {
+ result = this.getAModule() and result.getName() = name
+ }
+
+ /**
+ * Gets the value of the constant named `name`, if any.
+ *
+ * For example, the value of `CONST` is `"const"` in
+ * ```rb
+ * module M
+ * CONST = "const"
+ * end
+ * ```
+ */
+ Expr getConstant(string name) {
+ exists(AssignExpr ae, ConstantWriteAccess w |
+ ae = this.getAStmt() and
+ w = ae.getLeftOperand() and
+ w.getName() = name and
+ not exists(w.getScopeExpr()) and
+ result = ae.getRightOperand()
+ )
+ }
+
+ /** Gets the representation of the run-time value of this module or class. */
+ Module getModule() { none() }
+}
+
+/**
+ * A Ruby source file.
+ *
+ * ```rb
+ * def main
+ * puts "hello world!"
+ * end
+ * main
+ * ```
+ */
+class Toplevel extends ModuleBase, TToplevel {
+ private Ruby::Program g;
+
+ Toplevel() { this = TToplevel(g) }
+
+ final override string getAPrimaryQlClass() { result = "Toplevel" }
+
+ /**
+ * Gets the `n`th `BEGIN` block.
+ */
+ final BeginBlock getBeginBlock(int n) {
+ toGenerated(result) = rank[n + 1](int i, Ruby::BeginBlock b | b = g.getChild(i) | b order by i)
+ }
+
+ /**
+ * Gets a `BEGIN` block.
+ */
+ final BeginBlock getABeginBlock() { result = getBeginBlock(_) }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getBeginBlock" and result = this.getBeginBlock(_)
+ }
+
+ final override Module getModule() { result = TResolved("Object") }
+
+ final override string toString() { result = g.getLocation().getFile().getBaseName() }
+}
+
+/**
+ * A class or module definition.
+ *
+ * ```rb
+ * class Foo
+ * def bar
+ * end
+ * end
+ * module Bar
+ * class Baz
+ * end
+ * end
+ * ```
+ */
+class Namespace extends ModuleBase, ConstantWriteAccess, TNamespace {
+ override string getAPrimaryQlClass() { result = "Namespace" }
+
+ /**
+ * Gets the name of the module/class. In the following example, the result is
+ * `"Foo"`.
+ * ```rb
+ * class Foo
+ * end
+ * ```
+ *
+ * N.B. in the following example, where the module/class name uses the scope
+ * resolution operator, the result is the name being resolved, i.e. `"Bar"`.
+ * Use `getScopeExpr` to get the `Foo` for `Foo`.
+ * ```rb
+ * module Foo::Bar
+ * end
+ * ```
+ */
+ override string getName() { none() }
+
+ /**
+ * Gets the scope expression used in the module/class name's scope resolution
+ * operation, if any.
+ *
+ * In the following example, the result is the `Expr` for `Foo`.
+ *
+ * ```rb
+ * module Foo::Bar
+ * end
+ * ```
+ *
+ * However, there is no result for the following example, since there is no
+ * scope resolution operation.
+ *
+ * ```rb
+ * module Baz
+ * end
+ * ```
+ */
+ override Expr getScopeExpr() { none() }
+
+ /**
+ * Holds if the module/class name uses the scope resolution operator to access the
+ * global scope, as in this example:
+ *
+ * ```rb
+ * class ::Foo
+ * end
+ * ```
+ */
+ override predicate hasGlobalScope() { none() }
+
+ final override Module getModule() {
+ result = any(string qName | qName = namespaceDeclaration(this) | TResolved(qName))
+ or
+ result = TUnresolved(this)
+ }
+
+ override AstNode getAChild(string pred) {
+ result = ModuleBase.super.getAChild(pred) or
+ result = ConstantWriteAccess.super.getAChild(pred)
+ }
+
+ final override string toString() { result = ConstantWriteAccess.super.toString() }
+}
+
+/**
+ * A class definition.
+ *
+ * ```rb
+ * class Foo
+ * def bar
+ * end
+ * end
+ * ```
+ */
+class ClassDeclaration extends Namespace, TClassDeclaration {
+ private Ruby::Class g;
+
+ ClassDeclaration() { this = TClassDeclaration(g) }
+
+ final override string getAPrimaryQlClass() { result = "ClassDeclaration" }
+
+ /**
+ * Gets the `Expr` used as the superclass in the class definition, if any.
+ *
+ * In the following example, the result is a `ConstantReadAccess`.
+ * ```rb
+ * class Foo < Bar
+ * end
+ * ```
+ *
+ * In the following example, where the superclass is a call expression, the
+ * result is a `Call`.
+ * ```rb
+ * class C < foo()
+ * end
+ * ```
+ */
+ final Expr getSuperclassExpr() { toGenerated(result) = g.getSuperclass().getChild() }
+
+ final override string getName() {
+ result = g.getName().(Ruby::Token).getValue() or
+ result = g.getName().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue()
+ }
+
+ final override Expr getScopeExpr() {
+ toGenerated(result) = g.getName().(Ruby::ScopeResolution).getScope()
+ }
+
+ final override predicate hasGlobalScope() {
+ exists(Ruby::ScopeResolution sr |
+ sr = g.getName() and
+ not exists(sr.getScope())
+ )
+ }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getSuperclassExpr" and result = this.getSuperclassExpr()
+ }
+}
+
+/**
+ * A definition of a singleton class on an object.
+ *
+ * ```rb
+ * class << foo
+ * def bar
+ * p 'bar'
+ * end
+ * end
+ * ```
+ */
+class SingletonClass extends ModuleBase, TSingletonClass {
+ private Ruby::SingletonClass g;
+
+ SingletonClass() { this = TSingletonClass(g) }
+
+ final override string getAPrimaryQlClass() { result = "SingletonClass" }
+
+ /**
+ * Gets the expression resulting in the object on which the singleton class
+ * is defined. In the following example, the result is the `Expr` for `foo`:
+ *
+ * ```rb
+ * class << foo
+ * end
+ * ```
+ */
+ final Expr getValue() { toGenerated(result) = g.getValue() }
+
+ final override string toString() { result = "class << ..." }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getValue" and result = this.getValue()
+ }
+}
+
+/**
+ * A module definition.
+ *
+ * ```rb
+ * module Foo
+ * class Bar
+ * end
+ * end
+ * ```
+ *
+ * N.B. this class represents a single instance of a module definition. In the
+ * following example, classes `Bar` and `Baz` are both defined in the module
+ * `Foo`, but in two syntactically distinct definitions, meaning that there
+ * will be two instances of `ModuleDeclaration` in the database.
+ *
+ * ```rb
+ * module Foo
+ * class Bar; end
+ * end
+ *
+ * module Foo
+ * class Baz; end
+ * end
+ * ```
+ */
+class ModuleDeclaration extends Namespace, TModuleDeclaration {
+ private Ruby::Module g;
+
+ ModuleDeclaration() { this = TModuleDeclaration(g) }
+
+ final override string getAPrimaryQlClass() { result = "ModuleDeclaration" }
+
+ final override string getName() {
+ result = g.getName().(Ruby::Token).getValue() or
+ result = g.getName().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue()
+ }
+
+ final override Expr getScopeExpr() {
+ toGenerated(result) = g.getName().(Ruby::ScopeResolution).getScope()
+ }
+
+ final override predicate hasGlobalScope() {
+ exists(Ruby::ScopeResolution sr |
+ sr = g.getName() and
+ not exists(sr.getScope())
+ )
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Operation.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Operation.qll
new file mode 100644
index 00000000000..236439700ce
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Operation.qll
@@ -0,0 +1,620 @@
+private import codeql.ruby.AST
+private import internal.AST
+private import internal.TreeSitter
+private import internal.Operation
+
+/**
+ * An operation.
+ *
+ * This is the QL root class for all operations.
+ */
+class Operation extends Expr instanceof OperationImpl {
+ /** Gets the operator of this operation. */
+ final string getOperator() { result = super.getOperatorImpl() }
+
+ /** Gets an operand of this operation. */
+ final Expr getAnOperand() { result = super.getAnOperandImpl() }
+
+ override AstNode getAChild(string pred) {
+ result = Expr.super.getAChild(pred)
+ or
+ pred = "getAnOperand" and result = this.getAnOperand()
+ }
+}
+
+/** A unary operation. */
+class UnaryOperation extends Operation, MethodCall instanceof UnaryOperationImpl {
+ /** Gets the operand of this unary operation. */
+ final Expr getOperand() { result = super.getOperandImpl() }
+
+ final override AstNode getAChild(string pred) {
+ result = Operation.super.getAChild(pred)
+ or
+ result = MethodCall.super.getAChild(pred)
+ or
+ pred = "getOperand" and result = this.getOperand()
+ }
+
+ final override string toString() { result = this.getOperator() + " ..." }
+}
+
+/** A unary logical operation. */
+class UnaryLogicalOperation extends UnaryOperation, TUnaryLogicalOperation { }
+
+/**
+ * A logical NOT operation, using either `!` or `not`.
+ * ```rb
+ * !x.nil?
+ * not params.empty?
+ * ```
+ */
+class NotExpr extends UnaryLogicalOperation, TNotExpr {
+ final override string getAPrimaryQlClass() { result = "NotExpr" }
+}
+
+/** A unary arithmetic operation. */
+class UnaryArithmeticOperation extends UnaryOperation, TUnaryArithmeticOperation { }
+
+/**
+ * A unary plus expression.
+ * ```rb
+ * + a
+ * ```
+ */
+class UnaryPlusExpr extends UnaryArithmeticOperation, TUnaryPlusExpr {
+ final override string getAPrimaryQlClass() { result = "UnaryPlusExpr" }
+}
+
+/**
+ * A unary minus expression.
+ * ```rb
+ * - a
+ * ```
+ */
+class UnaryMinusExpr extends UnaryArithmeticOperation, TUnaryMinusExpr {
+ final override string getAPrimaryQlClass() { result = "UnaryMinusExpr" }
+}
+
+/**
+ * A splat expression.
+ * ```rb
+ * foo(*args)
+ * ```
+ */
+class SplatExpr extends UnaryOperation, TSplatExpr {
+ final override string getAPrimaryQlClass() { result = "SplatExpr" }
+}
+
+/**
+ * A hash-splat (or 'double-splat') expression.
+ * ```rb
+ * foo(**options)
+ * ```
+ */
+class HashSplatExpr extends UnaryOperation, THashSplatExpr {
+ private Ruby::HashSplatArgument g;
+
+ HashSplatExpr() { this = THashSplatExpr(g) }
+
+ final override string getAPrimaryQlClass() { result = "HashSplatExpr" }
+}
+
+/** A unary bitwise operation. */
+class UnaryBitwiseOperation extends UnaryOperation, TUnaryBitwiseOperation { }
+
+/**
+ * A complement (bitwise NOT) expression.
+ * ```rb
+ * ~x
+ * ```
+ */
+class ComplementExpr extends UnaryBitwiseOperation, TComplementExpr {
+ final override string getAPrimaryQlClass() { result = "ComplementExpr" }
+}
+
+/**
+ * A call to the special `defined?` operator.
+ * ```rb
+ * defined? some_method
+ * ```
+ */
+class DefinedExpr extends UnaryOperation, TDefinedExpr {
+ final override string getAPrimaryQlClass() { result = "DefinedExpr" }
+}
+
+/** A binary operation. */
+class BinaryOperation extends Operation, MethodCall instanceof BinaryOperationImpl {
+ final override string toString() { result = "... " + this.getOperator() + " ..." }
+
+ override AstNode getAChild(string pred) {
+ result = Operation.super.getAChild(pred)
+ or
+ result = MethodCall.super.getAChild(pred)
+ or
+ pred = "getLeftOperand" and result = this.getLeftOperand()
+ or
+ pred = "getRightOperand" and result = this.getRightOperand()
+ }
+
+ /** Gets the left operand of this binary operation. */
+ final Stmt getLeftOperand() { result = super.getLeftOperandImpl() }
+
+ /** Gets the right operand of this binary operation. */
+ final Stmt getRightOperand() { result = super.getRightOperandImpl() }
+}
+
+/**
+ * A binary arithmetic operation.
+ */
+class BinaryArithmeticOperation extends BinaryOperation, TBinaryArithmeticOperation { }
+
+/**
+ * An add expression.
+ * ```rb
+ * x + 1
+ * ```
+ */
+class AddExpr extends BinaryArithmeticOperation, TAddExpr {
+ final override string getAPrimaryQlClass() { result = "AddExpr" }
+}
+
+/**
+ * A subtract expression.
+ * ```rb
+ * x - 3
+ * ```
+ */
+class SubExpr extends BinaryArithmeticOperation, TSubExpr {
+ final override string getAPrimaryQlClass() { result = "SubExpr" }
+}
+
+/**
+ * A multiply expression.
+ * ```rb
+ * x * 10
+ * ```
+ */
+class MulExpr extends BinaryArithmeticOperation, TMulExpr {
+ final override string getAPrimaryQlClass() { result = "MulExpr" }
+}
+
+/**
+ * A divide expression.
+ * ```rb
+ * x / y
+ * ```
+ */
+class DivExpr extends BinaryArithmeticOperation, TDivExpr {
+ final override string getAPrimaryQlClass() { result = "DivExpr" }
+}
+
+/**
+ * A modulo expression.
+ * ```rb
+ * x % 2
+ * ```
+ */
+class ModuloExpr extends BinaryArithmeticOperation, TModuloExpr {
+ final override string getAPrimaryQlClass() { result = "ModuloExpr" }
+}
+
+/**
+ * An exponent expression.
+ * ```rb
+ * x ** 2
+ * ```
+ */
+class ExponentExpr extends BinaryArithmeticOperation, TExponentExpr {
+ final override string getAPrimaryQlClass() { result = "ExponentExpr" }
+}
+
+/**
+ * A binary logical operation.
+ */
+class BinaryLogicalOperation extends BinaryOperation, TBinaryLogicalOperation { }
+
+/**
+ * A logical AND operation, using either `and` or `&&`.
+ * ```rb
+ * x and y
+ * a && b
+ * ```
+ */
+class LogicalAndExpr extends BinaryLogicalOperation, TLogicalAndExpr {
+ final override string getAPrimaryQlClass() { result = "LogicalAndExpr" }
+}
+
+/**
+ * A logical OR operation, using either `or` or `||`.
+ * ```rb
+ * x or y
+ * a || b
+ * ```
+ */
+class LogicalOrExpr extends BinaryLogicalOperation, TLogicalOrExpr {
+ final override string getAPrimaryQlClass() { result = "LogicalOrExpr" }
+}
+
+/**
+ * A binary bitwise operation.
+ */
+class BinaryBitwiseOperation extends BinaryOperation, TBinaryBitwiseOperation { }
+
+/**
+ * A left-shift operation.
+ * ```rb
+ * x << n
+ * ```
+ */
+class LShiftExpr extends BinaryBitwiseOperation, TLShiftExpr {
+ final override string getAPrimaryQlClass() { result = "LShiftExpr" }
+}
+
+/**
+ * A right-shift operation.
+ * ```rb
+ * x >> n
+ * ```
+ */
+class RShiftExpr extends BinaryBitwiseOperation, TRShiftExpr {
+ final override string getAPrimaryQlClass() { result = "RShiftExpr" }
+}
+
+/**
+ * A bitwise AND operation.
+ * ```rb
+ * x & 0xff
+ * ```
+ */
+class BitwiseAndExpr extends BinaryBitwiseOperation, TBitwiseAndExpr {
+ final override string getAPrimaryQlClass() { result = "BitwiseAndExpr" }
+}
+
+/**
+ * A bitwise OR operation.
+ * ```rb
+ * x | 0x01
+ * ```
+ */
+class BitwiseOrExpr extends BinaryBitwiseOperation, TBitwiseOrExpr {
+ final override string getAPrimaryQlClass() { result = "BitwiseOrExpr" }
+}
+
+/**
+ * An XOR (exclusive OR) operation.
+ * ```rb
+ * x ^ y
+ * ```
+ */
+class BitwiseXorExpr extends BinaryBitwiseOperation, TBitwiseXorExpr {
+ final override string getAPrimaryQlClass() { result = "BitwiseXorExpr" }
+}
+
+/**
+ * A comparison operation. That is, either an equality operation or a
+ * relational operation.
+ */
+class ComparisonOperation extends BinaryOperation, TComparisonOperation { }
+
+/**
+ * An equality operation.
+ */
+class EqualityOperation extends ComparisonOperation, TEqualityOperation { }
+
+/**
+ * An equals expression.
+ * ```rb
+ * x == y
+ * ```
+ */
+class EqExpr extends EqualityOperation, TEqExpr {
+ final override string getAPrimaryQlClass() { result = "EqExpr" }
+}
+
+/**
+ * A not-equals expression.
+ * ```rb
+ * x != y
+ * ```
+ */
+class NEExpr extends EqualityOperation, TNEExpr {
+ final override string getAPrimaryQlClass() { result = "NEExpr" }
+}
+
+/**
+ * A case-equality (or 'threequals') expression.
+ * ```rb
+ * String === "foo"
+ * ```
+ */
+class CaseEqExpr extends EqualityOperation, TCaseEqExpr {
+ final override string getAPrimaryQlClass() { result = "CaseEqExpr" }
+}
+
+/**
+ * A relational operation, that is, one of `<=`, `<`, `>`, or `>=`.
+ */
+class RelationalOperation extends ComparisonOperation, TRelationalOperation {
+ /** Gets the greater operand. */
+ Expr getGreaterOperand() { none() }
+
+ /** Gets the lesser operand. */
+ Expr getLesserOperand() { none() }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getGreaterOperand" and result = this.getGreaterOperand()
+ or
+ pred = "getLesserOperand" and result = this.getLesserOperand()
+ }
+}
+
+/**
+ * A greater-than expression.
+ * ```rb
+ * x > 0
+ * ```
+ */
+class GTExpr extends RelationalOperation, TGTExpr {
+ final override string getAPrimaryQlClass() { result = "GTExpr" }
+
+ final override Expr getGreaterOperand() { result = this.getLeftOperand() }
+
+ final override Expr getLesserOperand() { result = this.getRightOperand() }
+}
+
+/**
+ * A greater-than-or-equal expression.
+ * ```rb
+ * x >= 0
+ * ```
+ */
+class GEExpr extends RelationalOperation, TGEExpr {
+ final override string getAPrimaryQlClass() { result = "GEExpr" }
+
+ final override Expr getGreaterOperand() { result = this.getLeftOperand() }
+
+ final override Expr getLesserOperand() { result = this.getRightOperand() }
+}
+
+/**
+ * A less-than expression.
+ * ```rb
+ * x < 10
+ * ```
+ */
+class LTExpr extends RelationalOperation, TLTExpr {
+ final override string getAPrimaryQlClass() { result = "LTExpr" }
+
+ final override Expr getGreaterOperand() { result = this.getRightOperand() }
+
+ final override Expr getLesserOperand() { result = this.getLeftOperand() }
+}
+
+/**
+ * A less-than-or-equal expression.
+ * ```rb
+ * x <= 10
+ * ```
+ */
+class LEExpr extends RelationalOperation, TLEExpr {
+ final override string getAPrimaryQlClass() { result = "LEExpr" }
+
+ final override Expr getGreaterOperand() { result = this.getRightOperand() }
+
+ final override Expr getLesserOperand() { result = this.getLeftOperand() }
+}
+
+/**
+ * A three-way comparison ('spaceship') expression.
+ * ```rb
+ * a <=> b
+ * ```
+ */
+class SpaceshipExpr extends BinaryOperation, TSpaceshipExpr {
+ final override string getAPrimaryQlClass() { result = "SpaceshipExpr" }
+}
+
+/**
+ * A regexp match expression.
+ * ```rb
+ * input =~ /\d/
+ * ```
+ */
+class RegExpMatchExpr extends BinaryOperation, TRegExpMatchExpr {
+ final override string getAPrimaryQlClass() { result = "RegExpMatchExpr" }
+}
+
+/**
+ * A regexp-doesn't-match expression.
+ * ```rb
+ * input !~ /\d/
+ * ```
+ */
+class NoRegExpMatchExpr extends BinaryOperation, TNoRegExpMatchExpr {
+ final override string getAPrimaryQlClass() { result = "NoRegExpMatchExpr" }
+}
+
+/**
+ * A binary assignment operation, including `=`, `+=`, `&=`, etc.
+ *
+ * This is a QL base class for all assignments.
+ */
+class Assignment extends Operation instanceof AssignmentImpl {
+ /** Gets the left hand side of this assignment. */
+ final Pattern getLeftOperand() { result = super.getLeftOperandImpl() }
+
+ /** Gets the right hand side of this assignment. */
+ final Expr getRightOperand() { result = super.getRightOperandImpl() }
+
+ final override string toString() { result = "... " + this.getOperator() + " ..." }
+
+ override AstNode getAChild(string pred) {
+ result = Operation.super.getAChild(pred)
+ or
+ pred = "getLeftOperand" and result = getLeftOperand()
+ or
+ pred = "getRightOperand" and result = getRightOperand()
+ }
+}
+
+/**
+ * An assignment operation with the operator `=`.
+ * ```rb
+ * x = 123
+ * ```
+ */
+class AssignExpr extends Assignment, TAssignExpr {
+ final override string getAPrimaryQlClass() { result = "AssignExpr" }
+}
+
+/**
+ * A binary assignment operation other than `=`.
+ */
+class AssignOperation extends Assignment instanceof AssignOperationImpl { }
+
+/**
+ * An arithmetic assignment operation: `+=`, `-=`, `*=`, `/=`, `**=`, and `%=`.
+ */
+class AssignArithmeticOperation extends AssignOperation, TAssignArithmeticOperation { }
+
+/**
+ * A `+=` assignment expression.
+ * ```rb
+ * x += 1
+ * ```
+ */
+class AssignAddExpr extends AssignArithmeticOperation, TAssignAddExpr {
+ final override string getAPrimaryQlClass() { result = "AssignAddExpr" }
+}
+
+/**
+ * A `-=` assignment expression.
+ * ```rb
+ * x -= 3
+ * ```
+ */
+class AssignSubExpr extends AssignArithmeticOperation, TAssignSubExpr {
+ final override string getAPrimaryQlClass() { result = "AssignSubExpr" }
+}
+
+/**
+ * A `*=` assignment expression.
+ * ```rb
+ * x *= 10
+ * ```
+ */
+class AssignMulExpr extends AssignArithmeticOperation, TAssignMulExpr {
+ final override string getAPrimaryQlClass() { result = "AssignMulExpr" }
+}
+
+/**
+ * A `/=` assignment expression.
+ * ```rb
+ * x /= y
+ * ```
+ */
+class AssignDivExpr extends AssignArithmeticOperation, TAssignDivExpr {
+ final override string getAPrimaryQlClass() { result = "AssignDivExpr" }
+}
+
+/**
+ * A `%=` assignment expression.
+ * ```rb
+ * x %= 4
+ * ```
+ */
+class AssignModuloExpr extends AssignArithmeticOperation, TAssignModuloExpr {
+ final override string getAPrimaryQlClass() { result = "AssignModuloExpr" }
+}
+
+/**
+ * A `**=` assignment expression.
+ * ```rb
+ * x **= 2
+ * ```
+ */
+class AssignExponentExpr extends AssignArithmeticOperation, TAssignExponentExpr {
+ final override string getAPrimaryQlClass() { result = "AssignExponentExpr" }
+}
+
+/**
+ * A logical assignment operation: `&&=` and `||=`.
+ */
+class AssignLogicalOperation extends AssignOperation, TAssignLogicalOperation { }
+
+/**
+ * A logical AND assignment operation.
+ * ```rb
+ * x &&= y.even?
+ * ```
+ */
+class AssignLogicalAndExpr extends AssignLogicalOperation, TAssignLogicalAndExpr {
+ final override string getAPrimaryQlClass() { result = "AssignLogicalAndExpr" }
+}
+
+/**
+ * A logical OR assignment operation.
+ * ```rb
+ * x ||= y
+ * ```
+ */
+class AssignLogicalOrExpr extends AssignLogicalOperation, TAssignLogicalOrExpr {
+ final override string getAPrimaryQlClass() { result = "AssignLogicalOrExpr" }
+}
+
+/**
+ * A bitwise assignment operation: `<<=`, `>>=`, `&=`, `|=` and `^=`.
+ */
+class AssignBitwiseOperation extends AssignOperation, TAssignBitwiseOperation { }
+
+/**
+ * A left-shift assignment operation.
+ * ```rb
+ * x <<= 3
+ * ```
+ */
+class AssignLShiftExpr extends AssignBitwiseOperation, TAssignLShiftExpr {
+ final override string getAPrimaryQlClass() { result = "AssignLShiftExpr" }
+}
+
+/**
+ * A right-shift assignment operation.
+ * ```rb
+ * x >>= 3
+ * ```
+ */
+class AssignRShiftExpr extends AssignBitwiseOperation, TAssignRShiftExpr {
+ final override string getAPrimaryQlClass() { result = "AssignRShiftExpr" }
+}
+
+/**
+ * A bitwise AND assignment operation.
+ * ```rb
+ * x &= 0xff
+ * ```
+ */
+class AssignBitwiseAndExpr extends AssignBitwiseOperation, TAssignBitwiseAndExpr {
+ final override string getAPrimaryQlClass() { result = "AssignBitwiseAndExpr" }
+}
+
+/**
+ * A bitwise OR assignment operation.
+ * ```rb
+ * x |= 0x01
+ * ```
+ */
+class AssignBitwiseOrExpr extends AssignBitwiseOperation, TAssignBitwiseOrExpr {
+ final override string getAPrimaryQlClass() { result = "AssignBitwiseOrExpr" }
+}
+
+/**
+ * An XOR (exclusive OR) assignment operation.
+ * ```rb
+ * x ^= y
+ * ```
+ */
+class AssignBitwiseXorExpr extends AssignBitwiseOperation, TAssignBitwiseXorExpr {
+ final override string getAPrimaryQlClass() { result = "AssignBitwiseXorExpr" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Parameter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Parameter.qll
new file mode 100644
index 00000000000..6e6b5395d43
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Parameter.qll
@@ -0,0 +1,248 @@
+private import codeql.ruby.AST
+private import internal.AST
+private import internal.Variable
+private import internal.Parameter
+private import internal.TreeSitter
+
+/** A parameter. */
+class Parameter extends AstNode, TParameter {
+ /** Gets the callable that this parameter belongs to. */
+ final Callable getCallable() { result.getAParameter() = this }
+
+ /** Gets the zero-based position of this parameter. */
+ final int getPosition() { this = any(Callable c).getParameter(result) }
+
+ /** Gets a variable introduced by this parameter. */
+ LocalVariable getAVariable() { none() }
+
+ /** Gets the variable named `name` introduced by this parameter. */
+ final LocalVariable getVariable(string name) {
+ result = this.getAVariable() and
+ result.getName() = name
+ }
+}
+
+/**
+ * A parameter defined using a pattern.
+ *
+ * This includes both simple parameters and tuple parameters.
+ */
+class PatternParameter extends Parameter, Pattern, TPatternParameter {
+ override LocalVariable getAVariable() { result = Pattern.super.getAVariable() }
+}
+
+/** A parameter defined using a tuple pattern. */
+class TuplePatternParameter extends PatternParameter, TuplePattern, TTuplePatternParameter {
+ final override LocalVariable getAVariable() { result = TuplePattern.super.getAVariable() }
+
+ final override string getAPrimaryQlClass() { result = "TuplePatternParameter" }
+
+ override AstNode getAChild(string pred) { result = TuplePattern.super.getAChild(pred) }
+}
+
+/** A named parameter. */
+class NamedParameter extends Parameter, TNamedParameter {
+ /** Gets the name of this parameter. */
+ string getName() { none() }
+
+ /** Holds if the name of this parameter is `name`. */
+ final predicate hasName(string name) { this.getName() = name }
+
+ /** Gets the variable introduced by this parameter. */
+ LocalVariable getVariable() { none() }
+
+ override LocalVariable getAVariable() { result = this.getVariable() }
+
+ /** Gets an access to this parameter. */
+ final VariableAccess getAnAccess() { result = this.getVariable().getAnAccess() }
+
+ /** Gets the access that defines the underlying local variable. */
+ final VariableAccess getDefiningAccess() { result = this.getVariable().getDefiningAccess() }
+
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getDefiningAccess" and
+ result = this.getDefiningAccess()
+ }
+}
+
+/** A simple (normal) parameter. */
+class SimpleParameter extends NamedParameter, PatternParameter, VariablePattern, TSimpleParameter {
+ private Ruby::Identifier g;
+
+ SimpleParameter() { this = TSimpleParameter(g) }
+
+ final override string getName() { result = g.getValue() }
+
+ final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g) }
+
+ final override LocalVariable getAVariable() { result = this.getVariable() }
+
+ final override string getAPrimaryQlClass() { result = "SimpleParameter" }
+
+ final override string toString() { result = this.getName() }
+}
+
+/**
+ * A parameter that is a block. For example, `&bar` in the following code:
+ * ```rb
+ * def foo(&bar)
+ * bar.call if block_given?
+ * end
+ * ```
+ */
+class BlockParameter extends NamedParameter, TBlockParameter {
+ private Ruby::BlockParameter g;
+
+ BlockParameter() { this = TBlockParameter(g) }
+
+ final override string getName() { result = g.getName().getValue() }
+
+ final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
+
+ final override string toString() { result = "&" + this.getName() }
+
+ final override string getAPrimaryQlClass() { result = "BlockParameter" }
+}
+
+/**
+ * A hash-splat (or double-splat) parameter. For example, `**options` in the
+ * following code:
+ * ```rb
+ * def foo(bar, **options)
+ * ...
+ * end
+ * ```
+ */
+class HashSplatParameter extends NamedParameter, THashSplatParameter {
+ private Ruby::HashSplatParameter g;
+
+ HashSplatParameter() { this = THashSplatParameter(g) }
+
+ final override string getAPrimaryQlClass() { result = "HashSplatParameter" }
+
+ final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
+
+ final override string toString() { result = "**" + this.getName() }
+
+ final override string getName() { result = g.getName().getValue() }
+}
+
+/**
+ * A keyword parameter, including a default value if the parameter is optional.
+ * For example, in the following example, `foo` is a keyword parameter with a
+ * default value of `0`, and `bar` is a mandatory keyword parameter with no
+ * default value mandatory parameter).
+ * ```rb
+ * def f(foo: 0, bar:)
+ * foo * 10 + bar
+ * end
+ * ```
+ */
+class KeywordParameter extends NamedParameter, TKeywordParameter {
+ private Ruby::KeywordParameter g;
+
+ KeywordParameter() { this = TKeywordParameter(g) }
+
+ final override string getAPrimaryQlClass() { result = "KeywordParameter" }
+
+ final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
+
+ /**
+ * Gets the default value, i.e. the value assigned to the parameter when one
+ * is not provided by the caller. If the parameter is mandatory and does not
+ * have a default value, this predicate has no result.
+ */
+ final Expr getDefaultValue() { toGenerated(result) = g.getValue() }
+
+ /**
+ * Holds if the parameter is optional. That is, there is a default value that
+ * is used when the caller omits this parameter.
+ */
+ final predicate isOptional() { exists(this.getDefaultValue()) }
+
+ final override string toString() { result = this.getName() }
+
+ final override string getName() { result = g.getName().getValue() }
+
+ final override Location getLocation() { result = g.getName().getLocation() }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getDefaultValue" and result = this.getDefaultValue()
+ }
+}
+
+/**
+ * An optional parameter. For example, the parameter `name` in the following
+ * code:
+ * ```rb
+ * def say_hello(name = 'Anon')
+ * puts "hello #{name}"
+ * end
+ * ```
+ */
+class OptionalParameter extends NamedParameter, TOptionalParameter {
+ private Ruby::OptionalParameter g;
+
+ OptionalParameter() { this = TOptionalParameter(g) }
+
+ final override string getAPrimaryQlClass() { result = "OptionalParameter" }
+
+ /**
+ * Gets the default value, i.e. the value assigned to the parameter when one
+ * is not provided by the caller.
+ */
+ final Expr getDefaultValue() { toGenerated(result) = g.getValue() }
+
+ final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
+
+ final override string toString() { result = this.getName() }
+
+ final override string getName() { result = g.getName().getValue() }
+
+ final override Location getLocation() { result = g.getName().getLocation() }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getDefaultValue" and result = this.getDefaultValue()
+ }
+}
+
+/**
+ * A splat parameter. For example, `*values` in the following code:
+ * ```rb
+ * def foo(bar, *values)
+ * ...
+ * end
+ * ```
+ */
+class SplatParameter extends NamedParameter, TSplatParameter {
+ private Ruby::SplatParameter g;
+
+ SplatParameter() { this = TSplatParameter(g) }
+
+ final override string getAPrimaryQlClass() { result = "SplatParameter" }
+
+ final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
+
+ final override string toString() { result = "*" + this.getName() }
+
+ final override string getName() { result = g.getName().getValue() }
+}
+
+/**
+ * A special `...` parameter that forwards positional/keyword/block arguments:
+ * ```rb
+ * def foo(...)
+ * end
+ * ```
+ */
+class ForwardParameter extends Parameter, TForwardParameter {
+ final override string getAPrimaryQlClass() { result = "ForwardParameter" }
+
+ final override string toString() { result = "..." }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Pattern.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Pattern.qll
new file mode 100644
index 00000000000..7275894b57d
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Pattern.qll
@@ -0,0 +1,96 @@
+private import codeql.ruby.AST
+private import codeql.Locations
+private import internal.AST
+private import internal.Pattern
+private import internal.TreeSitter
+private import internal.Variable
+
+/** A pattern. */
+class Pattern extends AstNode {
+ Pattern() {
+ explicitAssignmentNode(toGenerated(this), _)
+ or
+ implicitAssignmentNode(toGenerated(this))
+ or
+ implicitParameterAssignmentNode(toGenerated(this), _)
+ or
+ this = getSynthChild(any(AssignExpr ae), 0)
+ }
+
+ /** Gets a variable used in (or introduced by) this pattern. */
+ Variable getAVariable() { none() }
+}
+
+private class LhsExpr_ =
+ TVariableAccess or TTokenConstantAccess or TScopeResolutionConstantAccess or TMethodCall or
+ TSimpleParameter;
+
+/**
+ * A "left-hand-side" expression. An `LhsExpr` can occur on the left-hand side of
+ * operator assignments (`AssignOperation`), in patterns (`Pattern`) on the left-hand side of
+ * an assignment (`AssignExpr`) or for loop (`ForExpr`), and as the exception
+ * variable of a `rescue` clause (`RescueClause`).
+ *
+ * An `LhsExpr` can be a simple variable, a constant, a call, or an element reference:
+ * ```rb
+ * var = 1
+ * var += 1
+ * E = 1
+ * foo.bar = 1
+ * foo[0] = 1
+ * rescue E => var
+ * ```
+ */
+class LhsExpr extends Pattern, LhsExpr_, Expr {
+ override Variable getAVariable() { result = this.(VariableAccess).getVariable() }
+}
+
+private class TVariablePattern = TVariableAccess or TSimpleParameter;
+
+/** A simple variable pattern. */
+class VariablePattern extends Pattern, LhsExpr, TVariablePattern { }
+
+/**
+ * A tuple pattern.
+ *
+ * This includes both tuple patterns in parameters and assignments. Example patterns:
+ * ```rb
+ * a, self.b = value
+ * (a, b), c[3] = value
+ * a, b, *rest, c, d = value
+ * ```
+ */
+class TuplePattern extends Pattern, TTuplePattern {
+ override string getAPrimaryQlClass() { result = "TuplePattern" }
+
+ private TuplePatternImpl getImpl() { result = toGenerated(this) }
+
+ private Ruby::AstNode getChild(int i) { result = this.getImpl().getChildNode(i) }
+
+ /** Gets the `i`th pattern in this tuple pattern. */
+ final Pattern getElement(int i) {
+ exists(Ruby::AstNode c | c = this.getChild(i) |
+ toGenerated(result) = c.(Ruby::RestAssignment).getChild()
+ or
+ toGenerated(result) = c
+ )
+ }
+
+ /** Gets a sub pattern in this tuple pattern. */
+ final Pattern getAnElement() { result = this.getElement(_) }
+
+ /**
+ * Gets the index of the pattern with the `*` marker on it, if it exists.
+ * In the example below the index is `2`.
+ * ```rb
+ * a, b, *rest, c, d = value
+ * ```
+ */
+ final int getRestIndex() { result = this.getImpl().getRestIndex() }
+
+ override Variable getAVariable() { result = this.getElement(_).getAVariable() }
+
+ override string toString() { result = "(..., ...)" }
+
+ override AstNode getAChild(string pred) { pred = "getElement" and result = getElement(_) }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Scope.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Scope.qll
new file mode 100644
index 00000000000..45fb00ae731
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Scope.qll
@@ -0,0 +1,22 @@
+private import codeql.ruby.AST
+private import internal.AST
+private import internal.Scope
+private import internal.TreeSitter
+
+class Scope extends AstNode, TScopeType {
+ private Scope::Range range;
+
+ Scope() { range = toGenerated(this) }
+
+ /** Gets the scope in which this scope is nested, if any. */
+ Scope getOuterScope() { toGenerated(result) = range.getOuterScope() }
+
+ /** Gets a variable that is declared in this scope. */
+ final Variable getAVariable() { result.getDeclaringScope() = this }
+
+ /** Gets the variable declared in this scope with the given name, if any. */
+ final Variable getVariable(string name) {
+ result = this.getAVariable() and
+ result.getName() = name
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Statement.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Statement.qll
new file mode 100644
index 00000000000..e3d77c2010c
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Statement.qll
@@ -0,0 +1,248 @@
+private import codeql.ruby.AST
+private import codeql.ruby.CFG
+private import internal.AST
+private import internal.TreeSitter
+private import internal.Variable
+private import codeql.ruby.controlflow.internal.ControlFlowGraphImpl
+
+/**
+ * A statement.
+ *
+ * This is the root QL class for all statements.
+ */
+class Stmt extends AstNode, TStmt {
+ /** Gets a control-flow node for this statement, if any. */
+ CfgNodes::AstCfgNode getAControlFlowNode() { result.getNode() = this }
+
+ /** Gets the control-flow scope of this statement, if any. */
+ CfgScope getCfgScope() { result = getCfgScope(this) }
+
+ /** Gets the enclosing callable, if any. */
+ Callable getEnclosingCallable() { result = this.getCfgScope() }
+}
+
+/**
+ * An empty statement (`;`).
+ */
+class EmptyStmt extends Stmt, TEmptyStmt {
+ final override string getAPrimaryQlClass() { result = "EmptyStmt" }
+
+ final override string toString() { result = ";" }
+}
+
+/**
+ * A `begin` statement.
+ * ```rb
+ * begin
+ * puts "hello world"
+ * end
+ * ```
+ */
+class BeginExpr extends BodyStmt, TBeginExpr {
+ final override string getAPrimaryQlClass() { result = "BeginExpr" }
+
+ final override string toString() { result = "begin ... " }
+}
+
+/**
+ * A `BEGIN` block.
+ * ```rb
+ * BEGIN { puts "starting ..." }
+ * ```
+ */
+class BeginBlock extends StmtSequence, TBeginBlock {
+ private Ruby::BeginBlock g;
+
+ BeginBlock() { this = TBeginBlock(g) }
+
+ final override string getAPrimaryQlClass() { result = "BeginBlock" }
+
+ final override string toString() { result = "BEGIN { ... }" }
+
+ final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+}
+
+/**
+ * An `END` block.
+ * ```rb
+ * END { puts "shutting down" }
+ * ```
+ */
+class EndBlock extends StmtSequence, TEndBlock {
+ private Ruby::EndBlock g;
+
+ EndBlock() { this = TEndBlock(g) }
+
+ final override string getAPrimaryQlClass() { result = "EndBlock" }
+
+ final override string toString() { result = "END { ... }" }
+
+ final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
+}
+
+/**
+ * An `undef` statement. For example:
+ * ```rb
+ * - undef method_name
+ * - undef &&, :method_name
+ * - undef :"method_#{ name }"
+ * ```
+ */
+class UndefStmt extends Stmt, TUndefStmt {
+ private Ruby::Undef g;
+
+ UndefStmt() { this = TUndefStmt(g) }
+
+ /** Gets the `n`th method name to undefine. */
+ final MethodName getMethodName(int n) { toGenerated(result) = g.getChild(n) }
+
+ /** Gets a method name to undefine. */
+ final MethodName getAMethodName() { result = getMethodName(_) }
+
+ final override string getAPrimaryQlClass() { result = "UndefStmt" }
+
+ final override string toString() { result = "undef ..." }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getMethodName" and result = this.getMethodName(_)
+ }
+}
+
+/**
+ * An `alias` statement. For example:
+ * ```rb
+ * - alias alias_name method_name
+ * - alias foo :method_name
+ * - alias bar :"method_#{ name }"
+ * ```
+ */
+class AliasStmt extends Stmt, TAliasStmt {
+ private Ruby::Alias g;
+
+ AliasStmt() { this = TAliasStmt(g) }
+
+ /** Gets the new method name. */
+ final MethodName getNewName() { toGenerated(result) = g.getName() }
+
+ /** Gets the original method name. */
+ final MethodName getOldName() { toGenerated(result) = g.getAlias() }
+
+ final override string getAPrimaryQlClass() { result = "AliasStmt" }
+
+ final override string toString() { result = "alias ..." }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getNewName" and result = this.getNewName()
+ or
+ pred = "getOldName" and result = this.getOldName()
+ }
+}
+
+/**
+ * A statement that may return a value: `return`, `break` and `next`.
+ *
+ * ```rb
+ * return
+ * return value
+ * break
+ * break value
+ * next
+ * next value
+ * ```
+ */
+class ReturningStmt extends Stmt, TReturningStmt {
+ private Ruby::ArgumentList getArgumentList() {
+ result = any(Ruby::Return g | this = TReturnStmt(g)).getChild()
+ or
+ result = any(Ruby::Break g | this = TBreakStmt(g)).getChild()
+ or
+ result = any(Ruby::Next g | this = TNextStmt(g)).getChild()
+ }
+
+ /** Gets the returned value, if any. */
+ final Expr getValue() {
+ toGenerated(result) =
+ any(Ruby::AstNode res |
+ exists(Ruby::ArgumentList a, int c |
+ a = this.getArgumentList() and c = count(a.getChild(_))
+ |
+ res = a.getChild(0) and c = 1
+ or
+ res = a and c > 1
+ )
+ )
+ }
+
+ final override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "getValue" and result = this.getValue()
+ }
+}
+
+/**
+ * A `return` statement.
+ * ```rb
+ * return
+ * return value
+ * ```
+ */
+class ReturnStmt extends ReturningStmt, TReturnStmt {
+ final override string getAPrimaryQlClass() { result = "ReturnStmt" }
+
+ final override string toString() { result = "return" }
+}
+
+/**
+ * A `break` statement.
+ * ```rb
+ * break
+ * break value
+ * ```
+ */
+class BreakStmt extends ReturningStmt, TBreakStmt {
+ final override string getAPrimaryQlClass() { result = "BreakStmt" }
+
+ final override string toString() { result = "break" }
+}
+
+/**
+ * A `next` statement.
+ * ```rb
+ * next
+ * next value
+ * ```
+ */
+class NextStmt extends ReturningStmt, TNextStmt {
+ final override string getAPrimaryQlClass() { result = "NextStmt" }
+
+ final override string toString() { result = "next" }
+}
+
+/**
+ * A `redo` statement.
+ * ```rb
+ * redo
+ * ```
+ */
+class RedoStmt extends Stmt, TRedoStmt {
+ final override string getAPrimaryQlClass() { result = "RedoStmt" }
+
+ final override string toString() { result = "redo" }
+}
+
+/**
+ * A `retry` statement.
+ * ```rb
+ * retry
+ * ```
+ */
+class RetryStmt extends Stmt, TRetryStmt {
+ final override string getAPrimaryQlClass() { result = "RetryStmt" }
+
+ final override string toString() { result = "retry" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Variable.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Variable.qll
new file mode 100644
index 00000000000..b16d046d886
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/Variable.qll
@@ -0,0 +1,187 @@
+/** Provides classes for modeling program variables. */
+
+private import codeql.ruby.AST
+private import codeql.Locations
+private import internal.AST
+private import internal.TreeSitter
+private import internal.Variable
+
+/** A variable declared in a scope. */
+class Variable instanceof VariableImpl {
+ /** Gets the name of this variable. */
+ final string getName() { result = super.getNameImpl() }
+
+ /** Holds if the name of this variable is `name`. */
+ final predicate hasName(string name) { this.getName() = name }
+
+ /** Gets a textual representation of this variable. */
+ final string toString() { result = this.getName() }
+
+ /** Gets the location of this variable. */
+ final Location getLocation() { result = super.getLocationImpl() }
+
+ /** Gets the scope this variable is declared in. */
+ final Scope getDeclaringScope() {
+ toGenerated(result) = this.(VariableReal).getDeclaringScopeImpl()
+ }
+
+ /** Gets an access to this variable. */
+ VariableAccess getAnAccess() { result.getVariable() = this }
+}
+
+/** A local variable. */
+class LocalVariable extends Variable, TLocalVariable {
+ override LocalVariableAccess getAnAccess() { result.getVariable() = this }
+
+ /** Gets the access where this local variable is first introduced. */
+ VariableAccess getDefiningAccess() { result = this.(LocalVariableReal).getDefiningAccessImpl() }
+
+ /**
+ * Holds if this variable is captured. For example in
+ *
+ * ```rb
+ * def m x
+ * x.times do |y|
+ * puts x
+ * end
+ * puts x
+ * end
+ * ```
+ *
+ * `x` is a captured variable, whereas `y` is not.
+ */
+ predicate isCaptured() { this.getAnAccess().isCapturedAccess() }
+}
+
+/** A global variable. */
+class GlobalVariable extends Variable instanceof GlobalVariableImpl {
+ final override GlobalVariableAccess getAnAccess() { result.getVariable() = this }
+}
+
+/** An instance variable. */
+class InstanceVariable extends Variable instanceof InstanceVariableImpl {
+ /** Holds is this variable is a class instance variable. */
+ final predicate isClassInstanceVariable() { super.isClassInstanceVariable() }
+
+ final override InstanceVariableAccess getAnAccess() { result.getVariable() = this }
+}
+
+/** A class variable. */
+class ClassVariable extends Variable instanceof ClassVariableImpl {
+ final override ClassVariableAccess getAnAccess() { result.getVariable() = this }
+}
+
+/** An access to a variable. */
+class VariableAccess extends Expr instanceof VariableAccessImpl {
+ /** Gets the variable this identifier refers to. */
+ final Variable getVariable() { result = super.getVariableImpl() }
+
+ /**
+ * Holds if this access is a write access belonging to the explicit
+ * assignment `assignment`. For example, in
+ *
+ * ```rb
+ * a, b = foo
+ * ```
+ *
+ * both `a` and `b` are write accesses belonging to the same assignment.
+ */
+ predicate isExplicitWrite(AstNode assignment) {
+ explicitWriteAccess(toGenerated(this), toGenerated(assignment))
+ or
+ this = assignment.(AssignExpr).getLeftOperand()
+ }
+
+ /**
+ * Holds if this access is a write access belonging to an implicit assignment.
+ * For example, in
+ *
+ * ```rb
+ * def m elements
+ * for e in elements do
+ * puts e
+ * end
+ * end
+ * ```
+ *
+ * the access to `elements` in the parameter list is an implicit assignment,
+ * as is the first access to `e`.
+ */
+ predicate isImplicitWrite() { implicitWriteAccess(toGenerated(this)) }
+
+ final override string toString() { result = VariableAccessImpl.super.toString() }
+}
+
+/** An access to a variable where the value is updated. */
+class VariableWriteAccess extends VariableAccess {
+ VariableWriteAccess() {
+ this.isExplicitWrite(_) or
+ this.isImplicitWrite()
+ }
+}
+
+/** An access to a variable where the value is read. */
+class VariableReadAccess extends VariableAccess {
+ VariableReadAccess() { not this instanceof VariableWriteAccess }
+}
+
+/** An access to a local variable. */
+class LocalVariableAccess extends VariableAccess instanceof LocalVariableAccessImpl {
+ final override string getAPrimaryQlClass() { result = "LocalVariableAccess" }
+
+ /**
+ * Holds if this access is a captured variable access. For example in
+ *
+ * ```rb
+ * def m x
+ * x.times do |y|
+ * puts x
+ * end
+ * puts x
+ * end
+ * ```
+ *
+ * the access to `x` in the first `puts x` is a captured access, while
+ * the access to `x` in the second `puts x` is not.
+ */
+ final predicate isCapturedAccess() { isCapturedAccess(this) }
+}
+
+/** An access to a local variable where the value is updated. */
+class LocalVariableWriteAccess extends LocalVariableAccess, VariableWriteAccess { }
+
+/** An access to a local variable where the value is read. */
+class LocalVariableReadAccess extends LocalVariableAccess, VariableReadAccess { }
+
+/** An access to a global variable. */
+class GlobalVariableAccess extends VariableAccess instanceof GlobalVariableAccessImpl {
+ final override string getAPrimaryQlClass() { result = "GlobalVariableAccess" }
+}
+
+/** An access to a global variable where the value is updated. */
+class GlobalVariableWriteAccess extends GlobalVariableAccess, VariableWriteAccess { }
+
+/** An access to a global variable where the value is read. */
+class GlobalVariableReadAccess extends GlobalVariableAccess, VariableReadAccess { }
+
+/** An access to an instance variable. */
+class InstanceVariableAccess extends VariableAccess instanceof InstanceVariableAccessImpl {
+ final override string getAPrimaryQlClass() { result = "InstanceVariableAccess" }
+}
+
+/** An access to an instance variable where the value is updated. */
+class InstanceVariableWriteAccess extends InstanceVariableAccess, VariableWriteAccess { }
+
+/** An access to an instance variable where the value is read. */
+class InstanceVariableReadAccess extends InstanceVariableAccess, VariableReadAccess { }
+
+/** An access to a class variable. */
+class ClassVariableAccess extends VariableAccess instanceof ClassVariableAccessRealImpl {
+ final override string getAPrimaryQlClass() { result = "ClassVariableAccess" }
+}
+
+/** An access to a class variable where the value is updated. */
+class ClassVariableWriteAccess extends ClassVariableAccess, VariableWriteAccess { }
+
+/** An access to a class variable where the value is read. */
+class ClassVariableReadAccess extends ClassVariableAccess, VariableReadAccess { }
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/AST.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/AST.qll
new file mode 100644
index 00000000000..7df09c9b5d8
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/AST.qll
@@ -0,0 +1,704 @@
+import codeql.Locations
+private import TreeSitter
+private import codeql.ruby.ast.internal.Call
+private import codeql.ruby.ast.internal.Parameter
+private import codeql.ruby.ast.internal.Variable
+private import codeql.ruby.AST as AST
+private import Synthesis
+
+module MethodName {
+ predicate range(Ruby::UnderscoreMethodName g) {
+ exists(Ruby::Undef u | u.getChild(_) = g)
+ or
+ exists(Ruby::Alias a | a.getName() = g or a.getAlias() = g)
+ }
+
+ class Token =
+ @ruby_setter or @ruby_token_class_variable or @ruby_token_constant or
+ @ruby_token_global_variable or @ruby_token_identifier or @ruby_token_instance_variable or
+ @ruby_token_operator;
+}
+
+private predicate mkSynthChild(SynthKind kind, AST::AstNode parent, int i) {
+ any(Synthesis s).child(parent, i, SynthChild(kind))
+}
+
+cached
+private module Cached {
+ cached
+ newtype TAstNode =
+ TAddExprReal(Ruby::Binary g) { g instanceof @ruby_binary_plus } or
+ TAddExprSynth(AST::AstNode parent, int i) { mkSynthChild(AddExprKind(), parent, i) } or
+ TAliasStmt(Ruby::Alias g) or
+ TArgumentList(Ruby::AstNode g) {
+ (
+ g.getParent() instanceof Ruby::Break or
+ g.getParent() instanceof Ruby::Return or
+ g.getParent() instanceof Ruby::Next or
+ g.getParent() instanceof Ruby::Assignment or
+ g.getParent() instanceof Ruby::OperatorAssignment
+ ) and
+ (
+ strictcount(g.(Ruby::ArgumentList).getChild(_)) > 1
+ or
+ g instanceof Ruby::RightAssignmentList
+ )
+ } or
+ TAssignAddExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_plusequal } or
+ TAssignBitwiseAndExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_ampersandequal
+ } or
+ TAssignBitwiseOrExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_pipeequal
+ } or
+ TAssignBitwiseXorExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_caretequal
+ } or
+ TAssignDivExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_slashequal } or
+ TAssignExponentExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_starstarequal
+ } or
+ TAssignExprReal(Ruby::Assignment g) or
+ TAssignExprSynth(AST::AstNode parent, int i) { mkSynthChild(AssignExprKind(), parent, i) } or
+ TAssignLShiftExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_langlelangleequal
+ } or
+ TAssignLogicalAndExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_ampersandampersandequal
+ } or
+ TAssignLogicalOrExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_pipepipeequal
+ } or
+ TAssignModuloExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_percentequal
+ } or
+ TAssignMulExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_starequal } or
+ TAssignRShiftExpr(Ruby::OperatorAssignment g) {
+ g instanceof @ruby_operator_assignment_ranglerangleequal
+ } or
+ TAssignSubExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_minusequal } or
+ TBareStringLiteral(Ruby::BareString g) or
+ TBareSymbolLiteral(Ruby::BareSymbol g) or
+ TBeginBlock(Ruby::BeginBlock g) or
+ TBeginExpr(Ruby::Begin g) or
+ TBitwiseAndExprReal(Ruby::Binary g) { g instanceof @ruby_binary_ampersand } or
+ TBitwiseAndExprSynth(AST::AstNode parent, int i) {
+ mkSynthChild(BitwiseAndExprKind(), parent, i)
+ } or
+ TBitwiseOrExprReal(Ruby::Binary g) { g instanceof @ruby_binary_pipe } or
+ TBitwiseOrExprSynth(AST::AstNode parent, int i) { mkSynthChild(BitwiseOrExprKind(), parent, i) } or
+ TBitwiseXorExprReal(Ruby::Binary g) { g instanceof @ruby_binary_caret } or
+ TBitwiseXorExprSynth(AST::AstNode parent, int i) {
+ mkSynthChild(BitwiseXorExprKind(), parent, i)
+ } or
+ TBlockArgument(Ruby::BlockArgument g) or
+ TBlockParameter(Ruby::BlockParameter g) or
+ TBraceBlock(Ruby::Block g) { not g.getParent() instanceof Ruby::Lambda } or
+ TBreakStmt(Ruby::Break g) or
+ TCaseEqExpr(Ruby::Binary g) { g instanceof @ruby_binary_equalequalequal } or
+ TCaseExpr(Ruby::Case g) or
+ TCharacterLiteral(Ruby::Character g) or
+ TClassDeclaration(Ruby::Class g) or
+ TClassVariableAccessReal(Ruby::ClassVariable g, AST::ClassVariable v) {
+ ClassVariableAccess::range(g, v)
+ } or
+ TClassVariableAccessSynth(AST::AstNode parent, int i, AST::ClassVariable v) {
+ mkSynthChild(ClassVariableAccessKind(v), parent, i)
+ } or
+ TComplementExpr(Ruby::Unary g) { g instanceof @ruby_unary_tilde } or
+ TComplexLiteral(Ruby::Complex g) or
+ TConstantReadAccessSynth(AST::AstNode parent, int i, string value) {
+ mkSynthChild(ConstantReadAccessKind(value), parent, i)
+ } or
+ TDefinedExpr(Ruby::Unary g) { g instanceof @ruby_unary_definedquestion } or
+ TDelimitedSymbolLiteral(Ruby::DelimitedSymbol g) or
+ TDestructuredLeftAssignment(Ruby::DestructuredLeftAssignment g) {
+ not strictcount(int i | exists(g.getParent().(Ruby::LeftAssignmentList).getChild(i))) = 1
+ } or
+ TDivExprReal(Ruby::Binary g) { g instanceof @ruby_binary_slash } or
+ TDivExprSynth(AST::AstNode parent, int i) { mkSynthChild(DivExprKind(), parent, i) } or
+ TDo(Ruby::Do g) or
+ TDoBlock(Ruby::DoBlock g) { not g.getParent() instanceof Ruby::Lambda } or
+ TElementReference(Ruby::ElementReference g) or
+ TElse(Ruby::Else g) or
+ TElsif(Ruby::Elsif g) or
+ TEmptyStmt(Ruby::EmptyStatement g) or
+ TEndBlock(Ruby::EndBlock g) or
+ TEnsure(Ruby::Ensure g) or
+ TEqExpr(Ruby::Binary g) { g instanceof @ruby_binary_equalequal } or
+ TExponentExprReal(Ruby::Binary g) { g instanceof @ruby_binary_starstar } or
+ TExponentExprSynth(AST::AstNode parent, int i) { mkSynthChild(ExponentExprKind(), parent, i) } or
+ TFalseLiteral(Ruby::False g) or
+ TFloatLiteral(Ruby::Float g) { not any(Ruby::Rational r).getChild() = g } or
+ TForExpr(Ruby::For g) or
+ TForIn(Ruby::In g) or // TODO REMOVE
+ TForwardParameter(Ruby::ForwardParameter g) or
+ TForwardArgument(Ruby::ForwardArgument g) or
+ TGEExpr(Ruby::Binary g) { g instanceof @ruby_binary_rangleequal } or
+ TGTExpr(Ruby::Binary g) { g instanceof @ruby_binary_rangle } or
+ TGlobalVariableAccessReal(Ruby::GlobalVariable g, AST::GlobalVariable v) {
+ GlobalVariableAccess::range(g, v)
+ } or
+ TGlobalVariableAccessSynth(AST::AstNode parent, int i, AST::GlobalVariable v) {
+ mkSynthChild(GlobalVariableAccessKind(v), parent, i)
+ } or
+ THashKeySymbolLiteral(Ruby::HashKeySymbol g) or
+ THashLiteral(Ruby::Hash g) or
+ THashSplatExpr(Ruby::HashSplatArgument g) or
+ THashSplatParameter(Ruby::HashSplatParameter g) or
+ THereDoc(Ruby::HeredocBeginning g) or
+ TIdentifierMethodCall(Ruby::Identifier g) { isIdentifierMethodCall(g) } or
+ TIf(Ruby::If g) or
+ TIfModifierExpr(Ruby::IfModifier g) or
+ TInstanceVariableAccessReal(Ruby::InstanceVariable g, AST::InstanceVariable v) {
+ InstanceVariableAccess::range(g, v)
+ } or
+ TInstanceVariableAccessSynth(AST::AstNode parent, int i, AST::InstanceVariable v) {
+ mkSynthChild(InstanceVariableAccessKind(v), parent, i)
+ } or
+ TIntegerLiteralReal(Ruby::Integer g) { not any(Ruby::Rational r).getChild() = g } or
+ TIntegerLiteralSynth(AST::AstNode parent, int i, int value) {
+ mkSynthChild(IntegerLiteralKind(value), parent, i)
+ } or
+ TKeywordParameter(Ruby::KeywordParameter g) or
+ TLEExpr(Ruby::Binary g) { g instanceof @ruby_binary_langleequal } or
+ TLShiftExprReal(Ruby::Binary g) { g instanceof @ruby_binary_langlelangle } or
+ TLShiftExprSynth(AST::AstNode parent, int i) { mkSynthChild(LShiftExprKind(), parent, i) } or
+ TLTExpr(Ruby::Binary g) { g instanceof @ruby_binary_langle } or
+ TLambda(Ruby::Lambda g) or
+ TLeftAssignmentList(Ruby::LeftAssignmentList g) or
+ TLocalVariableAccessReal(Ruby::Identifier g, AST::LocalVariable v) {
+ LocalVariableAccess::range(g, v)
+ } or
+ TLocalVariableAccessSynth(AST::AstNode parent, int i, AST::LocalVariable v) {
+ mkSynthChild(LocalVariableAccessRealKind(v), parent, i)
+ or
+ mkSynthChild(LocalVariableAccessSynthKind(v), parent, i)
+ } or
+ TLogicalAndExprReal(Ruby::Binary g) {
+ g instanceof @ruby_binary_and or g instanceof @ruby_binary_ampersandampersand
+ } or
+ TLogicalAndExprSynth(AST::AstNode parent, int i) {
+ mkSynthChild(LogicalAndExprKind(), parent, i)
+ } or
+ TLogicalOrExprReal(Ruby::Binary g) {
+ g instanceof @ruby_binary_or or g instanceof @ruby_binary_pipepipe
+ } or
+ TLogicalOrExprSynth(AST::AstNode parent, int i) { mkSynthChild(LogicalOrExprKind(), parent, i) } or
+ TMethod(Ruby::Method g) or
+ TMethodCallSynth(AST::AstNode parent, int i, string name, boolean setter, int arity) {
+ mkSynthChild(MethodCallKind(name, setter, arity), parent, i)
+ } or
+ TModuleDeclaration(Ruby::Module g) or
+ TModuloExprReal(Ruby::Binary g) { g instanceof @ruby_binary_percent } or
+ TModuloExprSynth(AST::AstNode parent, int i) { mkSynthChild(ModuloExprKind(), parent, i) } or
+ TMulExprReal(Ruby::Binary g) { g instanceof @ruby_binary_star } or
+ TMulExprSynth(AST::AstNode parent, int i) { mkSynthChild(MulExprKind(), parent, i) } or
+ TNEExpr(Ruby::Binary g) { g instanceof @ruby_binary_bangequal } or
+ TNextStmt(Ruby::Next g) or
+ TNilLiteral(Ruby::Nil g) or
+ TNoRegExpMatchExpr(Ruby::Binary g) { g instanceof @ruby_binary_bangtilde } or
+ TNotExpr(Ruby::Unary g) { g instanceof @ruby_unary_bang or g instanceof @ruby_unary_not } or
+ TOptionalParameter(Ruby::OptionalParameter g) or
+ TPair(Ruby::Pair g) or
+ TParenthesizedExpr(Ruby::ParenthesizedStatements g) or
+ TRShiftExprReal(Ruby::Binary g) { g instanceof @ruby_binary_ranglerangle } or
+ TRShiftExprSynth(AST::AstNode parent, int i) { mkSynthChild(RShiftExprKind(), parent, i) } or
+ TRangeLiteralReal(Ruby::Range g) or
+ TRangeLiteralSynth(AST::AstNode parent, int i, boolean inclusive) {
+ mkSynthChild(RangeLiteralKind(inclusive), parent, i)
+ } or
+ TRationalLiteral(Ruby::Rational g) or
+ TRedoStmt(Ruby::Redo g) or
+ TRegExpLiteral(Ruby::Regex g) or
+ TRegExpMatchExpr(Ruby::Binary g) { g instanceof @ruby_binary_equaltilde } or
+ TRegularArrayLiteral(Ruby::Array g) or
+ TRegularMethodCall(Ruby::Call g) { isRegularMethodCall(g) } or
+ TRegularStringLiteral(Ruby::String g) or
+ TRegularSuperCall(Ruby::Call g) { g.getMethod() instanceof Ruby::Super } or
+ TRescueClause(Ruby::Rescue g) or
+ TRescueModifierExpr(Ruby::RescueModifier g) or
+ TRetryStmt(Ruby::Retry g) or
+ TReturnStmt(Ruby::Return g) or
+ TScopeResolutionConstantAccess(Ruby::ScopeResolution g, Ruby::Constant constant) {
+ constant = g.getName() and
+ (
+ // A tree-sitter `scope_resolution` node with a `constant` name field is a
+ // read of that constant in any context where an identifier would be a
+ // vcall.
+ vcall(g)
+ or
+ explicitAssignmentNode(g, _)
+ )
+ } or
+ TScopeResolutionMethodCall(Ruby::ScopeResolution g, Ruby::Identifier i) {
+ isScopeResolutionMethodCall(g, i)
+ } or
+ TSelfReal(Ruby::Self g) or
+ TSelfSynth(AST::AstNode parent, int i) { mkSynthChild(SelfKind(), parent, i) } or
+ TSimpleParameter(Ruby::Identifier g) { g instanceof Parameter::Range } or
+ TSimpleSymbolLiteral(Ruby::SimpleSymbol g) or
+ TSingletonClass(Ruby::SingletonClass g) or
+ TSingletonMethod(Ruby::SingletonMethod g) or
+ TSpaceshipExpr(Ruby::Binary g) { g instanceof @ruby_binary_langleequalrangle } or
+ TSplatExprReal(Ruby::SplatArgument g) or
+ TSplatExprSynth(AST::AstNode parent, int i) { mkSynthChild(SplatExprKind(), parent, i) } or
+ TSplatParameter(Ruby::SplatParameter g) or
+ TStmtSequenceSynth(AST::AstNode parent, int i) { mkSynthChild(StmtSequenceKind(), parent, i) } or
+ TStringArrayLiteral(Ruby::StringArray g) or
+ TStringConcatenation(Ruby::ChainedString g) or
+ TStringEscapeSequenceComponent(Ruby::EscapeSequence g) or
+ TStringInterpolationComponent(Ruby::Interpolation g) or
+ TStringTextComponent(Ruby::Token g) {
+ g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent
+ } or
+ TSubExprReal(Ruby::Binary g) { g instanceof @ruby_binary_minus } or
+ TSubExprSynth(AST::AstNode parent, int i) { mkSynthChild(SubExprKind(), parent, i) } or
+ TSubshellLiteral(Ruby::Subshell g) or
+ TSymbolArrayLiteral(Ruby::SymbolArray g) or
+ TTernaryIfExpr(Ruby::Conditional g) or
+ TThen(Ruby::Then g) or
+ TTokenConstantAccess(Ruby::Constant g) {
+ // A tree-sitter `constant` token is a read of that constant in any context
+ // where an identifier would be a vcall.
+ vcall(g)
+ or
+ explicitAssignmentNode(g, _)
+ } or
+ TTokenMethodName(MethodName::Token g) { MethodName::range(g) } or
+ TTokenSuperCall(Ruby::Super g) { vcall(g) } or
+ TToplevel(Ruby::Program g) or
+ TTrueLiteral(Ruby::True g) or
+ TTuplePatternParameter(Ruby::DestructuredParameter g) or
+ TUnaryMinusExpr(Ruby::Unary g) { g instanceof @ruby_unary_minus } or
+ TUnaryPlusExpr(Ruby::Unary g) { g instanceof @ruby_unary_plus } or
+ TUndefStmt(Ruby::Undef g) or
+ TUnlessExpr(Ruby::Unless g) or
+ TUnlessModifierExpr(Ruby::UnlessModifier g) or
+ TUntilExpr(Ruby::Until g) or
+ TUntilModifierExpr(Ruby::UntilModifier g) or
+ TWhenExpr(Ruby::When g) or
+ TWhileExpr(Ruby::While g) or
+ TWhileModifierExpr(Ruby::WhileModifier g) or
+ TYieldCall(Ruby::Yield g)
+
+ /**
+ * Gets the underlying TreeSitter entity for a given AST node. This does not
+ * include synthesized AST nodes, because they are not the primary AST node
+ * for any given generated node.
+ */
+ cached
+ Ruby::AstNode toGenerated(AST::AstNode n) {
+ n = TAddExprReal(result) or
+ n = TAliasStmt(result) or
+ n = TArgumentList(result) or
+ n = TAssignAddExpr(result) or
+ n = TAssignBitwiseAndExpr(result) or
+ n = TAssignBitwiseOrExpr(result) or
+ n = TAssignBitwiseXorExpr(result) or
+ n = TAssignDivExpr(result) or
+ n = TAssignExponentExpr(result) or
+ n = TAssignExprReal(result) or
+ n = TAssignLShiftExpr(result) or
+ n = TAssignLogicalAndExpr(result) or
+ n = TAssignLogicalOrExpr(result) or
+ n = TAssignModuloExpr(result) or
+ n = TAssignMulExpr(result) or
+ n = TAssignRShiftExpr(result) or
+ n = TAssignSubExpr(result) or
+ n = TBareStringLiteral(result) or
+ n = TBareSymbolLiteral(result) or
+ n = TBeginBlock(result) or
+ n = TBeginExpr(result) or
+ n = TBitwiseAndExprReal(result) or
+ n = TBitwiseOrExprReal(result) or
+ n = TBitwiseXorExprReal(result) or
+ n = TBlockArgument(result) or
+ n = TBlockParameter(result) or
+ n = TBraceBlock(result) or
+ n = TBreakStmt(result) or
+ n = TCaseEqExpr(result) or
+ n = TCaseExpr(result) or
+ n = TCharacterLiteral(result) or
+ n = TClassDeclaration(result) or
+ n = TClassVariableAccessReal(result, _) or
+ n = TComplementExpr(result) or
+ n = TComplexLiteral(result) or
+ n = TDefinedExpr(result) or
+ n = TDelimitedSymbolLiteral(result) or
+ n = TDestructuredLeftAssignment(result) or
+ n = TDivExprReal(result) or
+ n = TDo(result) or
+ n = TDoBlock(result) or
+ n = TElementReference(result) or
+ n = TElse(result) or
+ n = TElsif(result) or
+ n = TEmptyStmt(result) or
+ n = TEndBlock(result) or
+ n = TEnsure(result) or
+ n = TEqExpr(result) or
+ n = TExponentExprReal(result) or
+ n = TFalseLiteral(result) or
+ n = TFloatLiteral(result) or
+ n = TForExpr(result) or
+ n = TForIn(result) or // TODO REMOVE
+ n = TForwardArgument(result) or
+ n = TForwardParameter(result) or
+ n = TGEExpr(result) or
+ n = TGTExpr(result) or
+ n = TGlobalVariableAccessReal(result, _) or
+ n = THashKeySymbolLiteral(result) or
+ n = THashLiteral(result) or
+ n = THashSplatExpr(result) or
+ n = THashSplatParameter(result) or
+ n = THereDoc(result) or
+ n = TIdentifierMethodCall(result) or
+ n = TIf(result) or
+ n = TIfModifierExpr(result) or
+ n = TInstanceVariableAccessReal(result, _) or
+ n = TIntegerLiteralReal(result) or
+ n = TKeywordParameter(result) or
+ n = TLEExpr(result) or
+ n = TLShiftExprReal(result) or
+ n = TLTExpr(result) or
+ n = TLambda(result) or
+ n = TLeftAssignmentList(result) or
+ n = TLocalVariableAccessReal(result, _) or
+ n = TLogicalAndExprReal(result) or
+ n = TLogicalOrExprReal(result) or
+ n = TMethod(result) or
+ n = TModuleDeclaration(result) or
+ n = TModuloExprReal(result) or
+ n = TMulExprReal(result) or
+ n = TNEExpr(result) or
+ n = TNextStmt(result) or
+ n = TNilLiteral(result) or
+ n = TNoRegExpMatchExpr(result) or
+ n = TNotExpr(result) or
+ n = TOptionalParameter(result) or
+ n = TPair(result) or
+ n = TParenthesizedExpr(result) or
+ n = TRShiftExprReal(result) or
+ n = TRangeLiteralReal(result) or
+ n = TRationalLiteral(result) or
+ n = TRedoStmt(result) or
+ n = TRegExpLiteral(result) or
+ n = TRegExpMatchExpr(result) or
+ n = TRegularArrayLiteral(result) or
+ n = TRegularMethodCall(result) or
+ n = TRegularStringLiteral(result) or
+ n = TRegularSuperCall(result) or
+ n = TRescueClause(result) or
+ n = TRescueModifierExpr(result) or
+ n = TRetryStmt(result) or
+ n = TReturnStmt(result) or
+ n = TScopeResolutionConstantAccess(result, _) or
+ n = TScopeResolutionMethodCall(result, _) or
+ n = TSelfReal(result) or
+ n = TSimpleParameter(result) or
+ n = TSimpleSymbolLiteral(result) or
+ n = TSingletonClass(result) or
+ n = TSingletonMethod(result) or
+ n = TSpaceshipExpr(result) or
+ n = TSplatExprReal(result) or
+ n = TSplatParameter(result) or
+ n = TStringArrayLiteral(result) or
+ n = TStringConcatenation(result) or
+ n = TStringEscapeSequenceComponent(result) or
+ n = TStringInterpolationComponent(result) or
+ n = TStringTextComponent(result) or
+ n = TSubExprReal(result) or
+ n = TSubshellLiteral(result) or
+ n = TSymbolArrayLiteral(result) or
+ n = TTernaryIfExpr(result) or
+ n = TThen(result) or
+ n = TTokenConstantAccess(result) or
+ n = TTokenMethodName(result) or
+ n = TTokenSuperCall(result) or
+ n = TToplevel(result) or
+ n = TTrueLiteral(result) or
+ n = TTuplePatternParameter(result) or
+ n = TUnaryMinusExpr(result) or
+ n = TUnaryPlusExpr(result) or
+ n = TUndefStmt(result) or
+ n = TUnlessExpr(result) or
+ n = TUnlessModifierExpr(result) or
+ n = TUntilExpr(result) or
+ n = TUntilModifierExpr(result) or
+ n = TWhenExpr(result) or
+ n = TWhileExpr(result) or
+ n = TWhileModifierExpr(result) or
+ n = TYieldCall(result)
+ }
+
+ /** Gets the `i`th synthesized child of `parent`. */
+ cached
+ AST::AstNode getSynthChild(AST::AstNode parent, int i) {
+ result = TAddExprSynth(parent, i)
+ or
+ result = TAssignExprSynth(parent, i)
+ or
+ result = TBitwiseAndExprSynth(parent, i)
+ or
+ result = TBitwiseOrExprSynth(parent, i)
+ or
+ result = TBitwiseXorExprSynth(parent, i)
+ or
+ result = TClassVariableAccessSynth(parent, i, _)
+ or
+ result = TConstantReadAccessSynth(parent, i, _)
+ or
+ result = TDivExprSynth(parent, i)
+ or
+ result = TExponentExprSynth(parent, i)
+ or
+ result = TGlobalVariableAccessSynth(parent, i, _)
+ or
+ result = TInstanceVariableAccessSynth(parent, i, _)
+ or
+ result = TIntegerLiteralSynth(parent, i, _)
+ or
+ result = TLShiftExprSynth(parent, i)
+ or
+ result = TLocalVariableAccessSynth(parent, i, _)
+ or
+ result = TLogicalAndExprSynth(parent, i)
+ or
+ result = TLogicalOrExprSynth(parent, i)
+ or
+ result = TMethodCallSynth(parent, i, _, _, _)
+ or
+ result = TModuloExprSynth(parent, i)
+ or
+ result = TMulExprSynth(parent, i)
+ or
+ result = TRangeLiteralSynth(parent, i, _)
+ or
+ result = TRShiftExprSynth(parent, i)
+ or
+ result = TSelfSynth(parent, i)
+ or
+ result = TSplatExprSynth(parent, i)
+ or
+ result = TStmtSequenceSynth(parent, i)
+ or
+ result = TSubExprSynth(parent, i)
+ }
+
+ /**
+ * Holds if the `i`th child of `parent` is `child`. Either `parent` or
+ * `child` (or both) is a synthesized node.
+ */
+ cached
+ predicate synthChild(AST::AstNode parent, int i, AST::AstNode child) {
+ child = getSynthChild(parent, i)
+ or
+ any(Synthesis s).child(parent, i, RealChild(child))
+ }
+
+ /**
+ * Like `toGenerated`, but also returns generated nodes for synthesized AST
+ * nodes.
+ */
+ cached
+ Ruby::AstNode toGeneratedInclSynth(AST::AstNode n) {
+ result = toGenerated(n)
+ or
+ not exists(toGenerated(n)) and
+ exists(AST::AstNode parent |
+ synthChild(parent, _, n) and
+ result = toGeneratedInclSynth(parent)
+ )
+ }
+
+ cached
+ Location getLocation(AST::AstNode n) {
+ synthLocation(n, result)
+ or
+ n.isSynthesized() and
+ not synthLocation(n, _) and
+ result = getLocation(n.getParent())
+ or
+ result = toGenerated(n).getLocation()
+ }
+}
+
+import Cached
+
+TAstNode fromGenerated(Ruby::AstNode n) { n = toGenerated(result) }
+
+class TCall = TMethodCall or TYieldCall;
+
+class TMethodCall =
+ TMethodCallSynth or TIdentifierMethodCall or TScopeResolutionMethodCall or TRegularMethodCall or
+ TElementReference or TSuperCall or TUnaryOperation or TBinaryOperation;
+
+class TSuperCall = TTokenSuperCall or TRegularSuperCall;
+
+class TConstantAccess =
+ TTokenConstantAccess or TScopeResolutionConstantAccess or TNamespace or TConstantReadAccessSynth;
+
+class TControlExpr = TConditionalExpr or TCaseExpr or TLoop;
+
+class TConditionalExpr =
+ TIfExpr or TUnlessExpr or TIfModifierExpr or TUnlessModifierExpr or TTernaryIfExpr;
+
+class TIfExpr = TIf or TElsif;
+
+class TConditionalLoop = TWhileExpr or TUntilExpr or TWhileModifierExpr or TUntilModifierExpr;
+
+class TLoop = TConditionalLoop or TForExpr;
+
+class TSelf = TSelfReal or TSelfSynth;
+
+class TExpr =
+ TSelf or TArgumentList or TRescueClause or TRescueModifierExpr or TPair or TStringConcatenation or
+ TCall or TBlockArgument or TConstantAccess or TControlExpr or TWhenExpr or TLiteral or
+ TCallable or TVariableAccess or TStmtSequence or TOperation or TSimpleParameter or
+ TForwardArgument;
+
+class TSplatExpr = TSplatExprReal or TSplatExprSynth;
+
+class TStmtSequence =
+ TBeginBlock or TEndBlock or TThen or TElse or TDo or TEnsure or TStringInterpolationComponent or
+ TBlock or TBodyStmt or TParenthesizedExpr or TStmtSequenceSynth;
+
+class TBodyStmt = TBeginExpr or TModuleBase or TMethod or TLambda or TDoBlock or TSingletonMethod;
+
+class TLiteral =
+ TNumericLiteral or TNilLiteral or TBooleanLiteral or TStringlikeLiteral or TCharacterLiteral or
+ TArrayLiteral or THashLiteral or TRangeLiteral or TTokenMethodName;
+
+class TNumericLiteral = TIntegerLiteral or TFloatLiteral or TRationalLiteral or TComplexLiteral;
+
+class TIntegerLiteral = TIntegerLiteralReal or TIntegerLiteralSynth;
+
+class TBooleanLiteral = TTrueLiteral or TFalseLiteral;
+
+class TStringComponent =
+ TStringTextComponent or TStringEscapeSequenceComponent or TStringInterpolationComponent;
+
+class TStringlikeLiteral =
+ TStringLiteral or TRegExpLiteral or TSymbolLiteral or TSubshellLiteral or THereDoc;
+
+class TStringLiteral = TRegularStringLiteral or TBareStringLiteral;
+
+class TSymbolLiteral = TSimpleSymbolLiteral or TComplexSymbolLiteral or THashKeySymbolLiteral;
+
+class TComplexSymbolLiteral = TDelimitedSymbolLiteral or TBareSymbolLiteral;
+
+class TArrayLiteral = TRegularArrayLiteral or TStringArrayLiteral or TSymbolArrayLiteral;
+
+class TCallable = TMethodBase or TLambda or TBlock;
+
+class TMethodBase = TMethod or TSingletonMethod;
+
+class TBlock = TDoBlock or TBraceBlock;
+
+class TModuleBase = TToplevel or TNamespace or TSingletonClass;
+
+class TNamespace = TClassDeclaration or TModuleDeclaration;
+
+class TOperation = TUnaryOperation or TBinaryOperation or TAssignment;
+
+class TUnaryOperation =
+ TUnaryLogicalOperation or TUnaryArithmeticOperation or TUnaryBitwiseOperation or TDefinedExpr or
+ TSplatExpr or THashSplatExpr;
+
+class TUnaryLogicalOperation = TNotExpr;
+
+class TUnaryArithmeticOperation = TUnaryPlusExpr or TUnaryMinusExpr;
+
+class TUnaryBitwiseOperation = TComplementExpr;
+
+class TBinaryOperation =
+ TBinaryArithmeticOperation or TBinaryLogicalOperation or TBinaryBitwiseOperation or
+ TComparisonOperation or TSpaceshipExpr or TRegExpMatchExpr or TNoRegExpMatchExpr;
+
+class TBinaryArithmeticOperation =
+ TAddExpr or TSubExpr or TMulExpr or TDivExpr or TModuloExpr or TExponentExpr;
+
+class TAddExpr = TAddExprReal or TAddExprSynth;
+
+class TSubExpr = TSubExprReal or TSubExprSynth;
+
+class TMulExpr = TMulExprReal or TMulExprSynth;
+
+class TDivExpr = TDivExprReal or TDivExprSynth;
+
+class TModuloExpr = TModuloExprReal or TModuloExprSynth;
+
+class TExponentExpr = TExponentExprReal or TExponentExprSynth;
+
+class TBinaryLogicalOperation = TLogicalAndExpr or TLogicalOrExpr;
+
+class TLogicalAndExpr = TLogicalAndExprReal or TLogicalAndExprSynth;
+
+class TLogicalOrExpr = TLogicalOrExprReal or TLogicalOrExprSynth;
+
+class TBinaryBitwiseOperation =
+ TLShiftExpr or TRShiftExpr or TBitwiseAndExpr or TBitwiseOrExpr or TBitwiseXorExpr;
+
+class TLShiftExpr = TLShiftExprReal or TLShiftExprSynth;
+
+class TRangeLiteral = TRangeLiteralReal or TRangeLiteralSynth;
+
+class TRShiftExpr = TRShiftExprReal or TRShiftExprSynth;
+
+class TBitwiseAndExpr = TBitwiseAndExprReal or TBitwiseAndExprSynth;
+
+class TBitwiseOrExpr = TBitwiseOrExprReal or TBitwiseOrExprSynth;
+
+class TBitwiseXorExpr = TBitwiseXorExprReal or TBitwiseXorExprSynth;
+
+class TComparisonOperation = TEqualityOperation or TRelationalOperation;
+
+class TEqualityOperation = TEqExpr or TNEExpr or TCaseEqExpr;
+
+class TRelationalOperation = TGTExpr or TGEExpr or TLTExpr or TLEExpr;
+
+class TAssignExpr = TAssignExprReal or TAssignExprSynth;
+
+class TAssignment = TAssignExpr or TAssignOperation;
+
+class TAssignOperation =
+ TAssignArithmeticOperation or TAssignLogicalOperation or TAssignBitwiseOperation;
+
+class TAssignArithmeticOperation =
+ TAssignAddExpr or TAssignSubExpr or TAssignMulExpr or TAssignDivExpr or TAssignModuloExpr or
+ TAssignExponentExpr;
+
+class TAssignLogicalOperation = TAssignLogicalAndExpr or TAssignLogicalOrExpr;
+
+class TAssignBitwiseOperation =
+ TAssignLShiftExpr or TAssignRShiftExpr or TAssignBitwiseAndExpr or TAssignBitwiseOrExpr or
+ TAssignBitwiseXorExpr;
+
+class TStmt =
+ TEmptyStmt or TBodyStmt or TStmtSequence or TUndefStmt or TAliasStmt or TReturningStmt or
+ TRedoStmt or TRetryStmt or TExpr;
+
+class TReturningStmt = TReturnStmt or TBreakStmt or TNextStmt;
+
+class TParameter =
+ TPatternParameter or TBlockParameter or THashSplatParameter or TKeywordParameter or
+ TOptionalParameter or TSplatParameter or TForwardParameter;
+
+class TPatternParameter = TSimpleParameter or TTuplePatternParameter;
+
+class TNamedParameter =
+ TSimpleParameter or TBlockParameter or THashSplatParameter or TKeywordParameter or
+ TOptionalParameter or TSplatParameter;
+
+class TTuplePattern = TTuplePatternParameter or TDestructuredLeftAssignment or TLeftAssignmentList;
+
+class TVariableAccess =
+ TLocalVariableAccess or TGlobalVariableAccess or TInstanceVariableAccess or TClassVariableAccess;
+
+class TLocalVariableAccess = TLocalVariableAccessReal or TLocalVariableAccessSynth;
+
+class TGlobalVariableAccess = TGlobalVariableAccessReal or TGlobalVariableAccessSynth;
+
+class TInstanceVariableAccess = TInstanceVariableAccessReal or TInstanceVariableAccessSynth;
+
+class TClassVariableAccess = TClassVariableAccessReal or TClassVariableAccessSynth;
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Call.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Call.qll
new file mode 100644
index 00000000000..43681e1d58f
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Call.qll
@@ -0,0 +1,186 @@
+private import TreeSitter
+private import Variable
+private import codeql.ruby.AST
+private import codeql.ruby.ast.internal.AST
+
+predicate isIdentifierMethodCall(Ruby::Identifier g) { vcall(g) and not access(g, _) }
+
+predicate isRegularMethodCall(Ruby::Call g) { not g.getMethod() instanceof Ruby::Super }
+
+predicate isScopeResolutionMethodCall(Ruby::ScopeResolution g, Ruby::Identifier i) {
+ i = g.getName() and
+ not exists(Ruby::Call c | c.getMethod() = g)
+}
+
+abstract class CallImpl extends Expr, TCall {
+ abstract AstNode getArgumentImpl(int n);
+
+ /**
+ * It is not possible to define this predicate as
+ *
+ * ```ql
+ * result = count(this.getArgumentImpl(_))
+ * ```
+ *
+ * since that will result in a non-monotonicity error.
+ */
+ abstract int getNumberOfArgumentsImpl();
+}
+
+abstract class MethodCallImpl extends CallImpl, TMethodCall {
+ abstract AstNode getReceiverImpl();
+
+ abstract string getMethodNameImpl();
+
+ abstract Block getBlockImpl();
+}
+
+class MethodCallSynth extends MethodCallImpl, TMethodCallSynth {
+ final override string getMethodNameImpl() {
+ exists(boolean setter, string name | this = TMethodCallSynth(_, _, name, setter, _) |
+ setter = true and result = name + "="
+ or
+ setter = false and result = name
+ )
+ }
+
+ final override AstNode getReceiverImpl() { synthChild(this, 0, result) }
+
+ final override AstNode getArgumentImpl(int n) { synthChild(this, n + 1, result) and n >= 0 }
+
+ final override int getNumberOfArgumentsImpl() { this = TMethodCallSynth(_, _, _, _, result) }
+
+ final override Block getBlockImpl() { none() }
+}
+
+class IdentifierMethodCall extends MethodCallImpl, TIdentifierMethodCall {
+ private Ruby::Identifier g;
+
+ IdentifierMethodCall() { this = TIdentifierMethodCall(g) }
+
+ final override string getMethodNameImpl() { result = g.getValue() }
+
+ final override AstNode getReceiverImpl() { result = TSelfSynth(this, 0) }
+
+ final override Expr getArgumentImpl(int n) { none() }
+
+ final override int getNumberOfArgumentsImpl() { result = 0 }
+
+ final override Block getBlockImpl() { none() }
+}
+
+class ScopeResolutionMethodCall extends MethodCallImpl, TScopeResolutionMethodCall {
+ private Ruby::ScopeResolution g;
+ private Ruby::Identifier i;
+
+ ScopeResolutionMethodCall() { this = TScopeResolutionMethodCall(g, i) }
+
+ final override string getMethodNameImpl() { result = i.getValue() }
+
+ final override Expr getReceiverImpl() { toGenerated(result) = g.getScope() }
+
+ final override Expr getArgumentImpl(int n) { none() }
+
+ final override int getNumberOfArgumentsImpl() { result = 0 }
+
+ final override Block getBlockImpl() { none() }
+}
+
+class RegularMethodCall extends MethodCallImpl, TRegularMethodCall {
+ private Ruby::Call g;
+
+ RegularMethodCall() { this = TRegularMethodCall(g) }
+
+ final override Expr getReceiverImpl() {
+ toGenerated(result) = g.getReceiver()
+ or
+ not exists(g.getReceiver()) and
+ toGenerated(result) = g.getMethod().(Ruby::ScopeResolution).getScope()
+ or
+ result = TSelfSynth(this, 0)
+ }
+
+ final override string getMethodNameImpl() {
+ isRegularMethodCall(g) and
+ (
+ result = "call" and g.getMethod() instanceof Ruby::ArgumentList
+ or
+ result = g.getMethod().(Ruby::Token).getValue()
+ or
+ result = g.getMethod().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue()
+ )
+ }
+
+ final override Expr getArgumentImpl(int n) {
+ toGenerated(result) = g.getArguments().getChild(n)
+ or
+ toGenerated(result) = g.getMethod().(Ruby::ArgumentList).getChild(n)
+ }
+
+ final override int getNumberOfArgumentsImpl() {
+ result =
+ count(g.getArguments().getChild(_)) + count(g.getMethod().(Ruby::ArgumentList).getChild(_))
+ }
+
+ final override Block getBlockImpl() { toGenerated(result) = g.getBlock() }
+}
+
+class ElementReferenceImpl extends MethodCallImpl, TElementReference {
+ private Ruby::ElementReference g;
+
+ ElementReferenceImpl() { this = TElementReference(g) }
+
+ final override Expr getReceiverImpl() { toGenerated(result) = g.getObject() }
+
+ final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getChild(n) }
+
+ final override int getNumberOfArgumentsImpl() { result = count(g.getChild(_)) }
+
+ final override string getMethodNameImpl() { result = "[]" }
+
+ final override Block getBlockImpl() { none() }
+}
+
+abstract class SuperCallImpl extends MethodCallImpl, TSuperCall { }
+
+class TokenSuperCall extends SuperCallImpl, TTokenSuperCall {
+ private Ruby::Super g;
+
+ TokenSuperCall() { this = TTokenSuperCall(g) }
+
+ final override string getMethodNameImpl() { result = g.getValue() }
+
+ final override Expr getReceiverImpl() { none() }
+
+ final override Expr getArgumentImpl(int n) { none() }
+
+ final override int getNumberOfArgumentsImpl() { result = 0 }
+
+ final override Block getBlockImpl() { none() }
+}
+
+class RegularSuperCall extends SuperCallImpl, TRegularSuperCall {
+ private Ruby::Call g;
+
+ RegularSuperCall() { this = TRegularSuperCall(g) }
+
+ final override string getMethodNameImpl() { result = g.getMethod().(Ruby::Super).getValue() }
+
+ final override Expr getReceiverImpl() { none() }
+
+ final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getArguments().getChild(n) }
+
+ final override int getNumberOfArgumentsImpl() { result = count(g.getArguments().getChild(_)) }
+
+ final override Block getBlockImpl() { toGenerated(result) = g.getBlock() }
+}
+
+class YieldCallImpl extends CallImpl, TYieldCall {
+ Ruby::Yield g;
+
+ YieldCallImpl() { this = TYieldCall(g) }
+
+ final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getChild().getChild(n) }
+
+ final override int getNumberOfArgumentsImpl() { result = count(g.getChild().getChild(_)) }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Erb.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Erb.qll
new file mode 100644
index 00000000000..7a69bf5b783
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Erb.qll
@@ -0,0 +1,43 @@
+import codeql.Locations
+private import TreeSitter
+private import codeql.ruby.ast.Erb
+
+cached
+private module Cached {
+ cached
+ newtype TAstNode =
+ TCommentDirective(Erb::CommentDirective g) or
+ TDirective(Erb::Directive g) or
+ TGraphqlDirective(Erb::GraphqlDirective g) or
+ TOutputDirective(Erb::OutputDirective g) or
+ TTemplate(Erb::Template g) or
+ TToken(Erb::Token g) or
+ TComment(Erb::Comment g) or
+ TCode(Erb::Code g)
+
+ /**
+ * Gets the underlying TreeSitter entity for a given erb AST node.
+ */
+ cached
+ Erb::AstNode toGenerated(ErbAstNode n) {
+ n = TCommentDirective(result) or
+ n = TDirective(result) or
+ n = TGraphqlDirective(result) or
+ n = TOutputDirective(result) or
+ n = TTemplate(result) or
+ n = TToken(result) or
+ n = TComment(result) or
+ n = TCode(result)
+ }
+
+ cached
+ Location getLocation(ErbAstNode n) { result = toGenerated(n).getLocation() }
+}
+
+import Cached
+
+TAstNode fromGenerated(Erb::AstNode n) { n = toGenerated(result) }
+
+class TDirectiveNode = TCommentDirective or TDirective or TGraphqlDirective or TOutputDirective;
+
+class TTokenNode = TToken or TComment or TCode;
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Module.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Module.qll
new file mode 100644
index 00000000000..247573b59e5
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Module.qll
@@ -0,0 +1,409 @@
+private import codeql.Locations
+private import codeql.ruby.AST
+private import codeql.ruby.ast.Call
+private import codeql.ruby.ast.Constant
+private import codeql.ruby.ast.Expr
+private import codeql.ruby.ast.Module
+private import codeql.ruby.ast.Operation
+private import codeql.ruby.ast.Scope
+
+// Names of built-in modules and classes
+private string builtin() {
+ result =
+ [
+ "Object", "Kernel", "BasicObject", "Class", "Module", "NilClass", "FalseClass", "TrueClass",
+ "Numeric", "Integer", "Float", "Rational", "Complex", "Array", "Hash", "Symbol", "Proc"
+ ]
+}
+
+cached
+private module Cached {
+ cached
+ newtype TModule =
+ TResolved(string qName) {
+ qName = builtin()
+ or
+ qName = namespaceDeclaration(_)
+ } or
+ TUnresolved(Namespace n) { not exists(namespaceDeclaration(n)) }
+
+ cached
+ string namespaceDeclaration(Namespace n) {
+ isToplevel(n) and result = n.getName()
+ or
+ not isToplevel(n) and
+ not exists(n.getScopeExpr()) and
+ result = scopeAppend(namespaceDeclaration(n.getEnclosingModule()), n.getName())
+ or
+ exists(string container |
+ TResolved(container) = resolveScopeExpr(n.getScopeExpr()) and
+ result = scopeAppend(container, n.getName())
+ )
+ }
+
+ cached
+ Module getSuperClass(Module cls) {
+ cls = TResolved("Object") and result = TResolved("BasicObject")
+ or
+ cls = TResolved(["Module", "Numeric", "Array", "Hash", "FalseClass", "TrueClass", "NilClass"]) and
+ result = TResolved("Object")
+ or
+ cls = TResolved(["Integer", "Float", "Rational", "Complex"]) and
+ result = TResolved("Numeric")
+ or
+ cls = TResolved("Class") and
+ result = TResolved("Module")
+ or
+ not cls = TResolved(builtin()) and
+ (
+ exists(ClassDeclaration d |
+ d = cls.getADeclaration() and
+ result = resolveScopeExpr(d.getSuperclassExpr())
+ )
+ or
+ result = TResolved("Object") and
+ forex(ClassDeclaration d | d = cls.getADeclaration() |
+ not exists(resolveScopeExpr(d.getSuperclassExpr()))
+ )
+ )
+ }
+
+ cached
+ Module getAnIncludedModule(Module m) {
+ m = TResolved("Object") and result = TResolved("Kernel")
+ or
+ exists(IncludeOrPrependCall c |
+ c.getMethodName() = "include" and
+ (
+ m = resolveScopeExpr(c.getReceiver())
+ or
+ m = enclosingModule(c).getModule() and
+ c.getReceiver() instanceof Self
+ ) and
+ result = resolveScopeExpr(c.getAnArgument())
+ )
+ }
+
+ cached
+ Module getAPrependedModule(Module m) {
+ exists(IncludeOrPrependCall c |
+ c.getMethodName() = "prepend" and
+ (
+ m = resolveScopeExpr(c.getReceiver())
+ or
+ m = enclosingModule(c).getModule() and
+ c.getReceiver() instanceof Self
+ ) and
+ result = resolveScopeExpr(c.getAnArgument())
+ )
+ }
+
+ /**
+ * Resolve class or module read access to a qualified module name.
+ */
+ cached
+ TResolved resolveScopeExpr(ConstantReadAccess r) {
+ exists(string qname | qname = resolveConstant(r) and result = TResolved(qname))
+ }
+
+ /**
+ * Resolve constant access (class, module or otherwise) to a qualified module name.
+ * `resolveScopeExpr/1` picks the best (lowest priority number) result of
+ * `resolveScopeExpr/2` that resolves to a constant definition. If the constant
+ * definition is a Namespace then it is returned, if it's a constant assignment then
+ * the right-hand side of the assignment is resolved.
+ */
+ cached
+ string resolveConstant(ConstantReadAccess r) {
+ exists(string qname |
+ qname =
+ min(string qn, int p |
+ isDefinedConstant(qn) and
+ qn = resolveScopeExpr(r, p) and
+ // prevent classes/modules that contain/extend themselves
+ not exists(ConstantWriteAccess w | qn = constantDefinition0(w) |
+ r = w.getScopeExpr()
+ or
+ r = w.(ClassDeclaration).getSuperclassExpr()
+ )
+ |
+ qn order by p
+ )
+ |
+ result = qname
+ or
+ exists(ConstantAssignment a |
+ qname = constantDefinition0(a) and
+ result = resolveConstant(a.getParent().(Assignment).getRightOperand())
+ )
+ )
+ }
+
+ cached
+ Method lookupMethod(Module m, string name) { TMethod(result) = lookupMethodOrConst(m, name) }
+
+ cached
+ Expr lookupConst(Module m, string name) {
+ TExpr(result) = lookupMethodOrConst(m, name)
+ or
+ exists(AssignExpr ae, ConstantWriteAccess w |
+ w = ae.getLeftOperand() and
+ w.getName() = name and
+ m = resolveScopeExpr(w.getScopeExpr()) and
+ result = ae.getRightOperand()
+ )
+ }
+}
+
+import Cached
+
+private predicate isToplevel(ConstantAccess n) {
+ not exists(n.getScopeExpr()) and
+ (
+ n.hasGlobalScope()
+ or
+ n.getEnclosingModule() instanceof Toplevel
+ )
+}
+
+private predicate isDefinedConstant(string qualifiedModuleName) {
+ qualifiedModuleName = [builtin(), constantDefinition0(_)]
+}
+
+private int maxDepth() { result = 1 + max(int level | exists(enclosing(_, level))) }
+
+private ModuleBase enclosing(ModuleBase m, int level) {
+ result = m and level = 0
+ or
+ result = enclosing(m.getEnclosingModule(), level - 1)
+}
+
+pragma[noinline]
+private Namespace enclosingNameSpaceConstantReadAccess(
+ ConstantReadAccess c, int priority, string name
+) {
+ result = enclosing(c.getEnclosingModule(), priority) and
+ name = c.getName()
+}
+
+/**
+ * Resolve constant read access (typically a scope expression) to a qualified name. The
+ * `priority` value indicates the precedence of the solution with respect to the lookup order.
+ * A constant name without scope specifier is resolved against its enclosing modules (inner-most first);
+ * if the constant is not found in any of the enclosing modules, then the constant will be resolved
+ * with respect to the ancestors (prepends, includes, super classes, and their ancestors) of the
+ * directly enclosing module.
+ */
+private string resolveScopeExpr(ConstantReadAccess c, int priority) {
+ c.hasGlobalScope() and result = c.getName() and priority = 0
+ or
+ exists(string name |
+ result = qualifiedModuleName(resolveScopeExprConstantReadAccess(c, priority, name), name)
+ )
+ or
+ not exists(c.getScopeExpr()) and
+ not c.hasGlobalScope() and
+ (
+ exists(string name |
+ exists(Namespace n |
+ n = enclosingNameSpaceConstantReadAccess(c, priority, name) and
+ result = qualifiedModuleName(constantDefinition0(n), name)
+ )
+ or
+ result =
+ qualifiedModuleName(ancestors(qualifiedModuleNameConstantReadAccess(c, name),
+ priority - maxDepth()), name)
+ )
+ or
+ priority = maxDepth() + 4 and
+ qualifiedModuleNameConstantReadAccess(c, result) != "BasicObject"
+ )
+}
+
+pragma[nomagic]
+private string resolveScopeExprConstantReadAccess(ConstantReadAccess c, int priority, string name) {
+ result = resolveScopeExpr(c.getScopeExpr(), priority) and
+ name = c.getName()
+}
+
+bindingset[qualifier, name]
+private string scopeAppend(string qualifier, string name) {
+ if qualifier = "Object" then result = name else result = qualifier + "::" + name
+}
+
+private string qualifiedModuleName(ModuleBase m) {
+ result = "Object" and m instanceof Toplevel
+ or
+ result = constantDefinition0(m)
+}
+
+pragma[noinline]
+private string qualifiedModuleNameConstantWriteAccess(ConstantWriteAccess c, string name) {
+ result = qualifiedModuleName(c.getEnclosingModule()) and
+ name = c.getName()
+}
+
+pragma[noinline]
+private string qualifiedModuleNameConstantReadAccess(ConstantReadAccess c, string name) {
+ result = qualifiedModuleName(c.getEnclosingModule()) and
+ name = c.getName()
+}
+
+/**
+ * Get a qualified name for a constant definition. May return multiple qualified
+ * names because we over-approximate when resolving scope resolutions and ignore
+ * lookup order precedence. Taking lookup order into account here would lead to
+ * non-monotonic recursion.
+ */
+private string constantDefinition0(ConstantWriteAccess c) {
+ c.hasGlobalScope() and result = c.getName()
+ or
+ result = scopeAppend(resolveScopeExpr(c.getScopeExpr(), _), c.getName())
+ or
+ not exists(c.getScopeExpr()) and
+ not c.hasGlobalScope() and
+ exists(string name | result = scopeAppend(qualifiedModuleNameConstantWriteAccess(c, name), name))
+}
+
+/**
+ * The qualified names of the ancestors of a class/module. The ancestors should be an ordered list
+ * of the ancestores of `prepend`ed modules, the module itself , the ancestors or `include`d modules
+ * and the ancestors of the super class. The priority value only distinguishes the kind of ancestor,
+ * it does not order the ancestors within a group of the same kind. This is an over-approximation, however,
+ * computing the precise order is tricky because it depends on the evaluation/file loading order.
+ */
+// TODO: the order of super classes can be determined more precisely even without knowing the evaluation
+// order, so we should be able to make this more precise.
+private string ancestors(string qname, int priority) {
+ result = ancestors(prepends(qname), _) and priority = 0
+ or
+ result = qname and priority = 1 and isDefinedConstant(qname)
+ or
+ result = ancestors(includes(qname), _) and priority = 2
+ or
+ result = ancestors(superclass(qname), _) and priority = 3
+}
+
+private class IncludeOrPrependCall extends MethodCall {
+ IncludeOrPrependCall() { this.getMethodName() = ["include", "prepend"] }
+
+ string getAModule() { result = resolveScopeExpr(this.getAnArgument(), _) }
+
+ string getTarget() {
+ result = resolveScopeExpr(this.getReceiver(), _)
+ or
+ result = qualifiedModuleName(enclosingModule(this)) and
+ (
+ this.getReceiver() instanceof Self
+ or
+ not exists(this.getReceiver())
+ )
+ }
+}
+
+/**
+ * A variant of AstNode::getEnclosingModule that excludes
+ * results that are enclosed in a block. This is a bit wrong because
+ * it could lead to false negatives. However, `include` statements in
+ * blocks are very rare in normal code. The majority of cases are in calls
+ * to methods like `module_eval` and `Rspec.describe` / `Rspec.context`. These
+ * methods evaluate the block in the context of some other module/class instead of
+ * the enclosing one.
+ */
+private ModuleBase enclosingModule(AstNode node) { result = parent*(node).getParent() }
+
+private AstNode parent(AstNode n) {
+ result = n.getParent() and
+ not result instanceof ModuleBase and
+ not result instanceof Block
+}
+
+private string prepends(string qname) {
+ exists(IncludeOrPrependCall m |
+ m.getMethodName() = "prepend" and
+ qname = m.getTarget() and
+ result = m.getAModule()
+ )
+}
+
+private string includes(string qname) {
+ qname = "Object" and
+ result = "Kernel"
+ or
+ exists(IncludeOrPrependCall m |
+ m.getMethodName() = "include" and
+ qname = m.getTarget() and
+ result = m.getAModule()
+ )
+}
+
+private Expr superexpr(string qname) {
+ exists(ClassDeclaration c | qname = constantDefinition0(c) and result = c.getSuperclassExpr())
+}
+
+private string superclass(string qname) {
+ qname = "Object" and result = "BasicObject"
+ or
+ result = resolveScopeExpr(superexpr(qname), _)
+}
+
+private string qualifiedModuleName(string container, string name) {
+ isDefinedConstant(result) and
+ (
+ container = result.regexpCapture("(.+)::([^:]+)", 1) and
+ name = result.regexpCapture("(.+)::([^:]+)", 2)
+ or
+ container = "Object" and name = result
+ )
+}
+
+private Module getAncestors(Module m) {
+ result = m or
+ result = getAncestors(m.getAnIncludedModule()) or
+ result = getAncestors(m.getAPrependedModule())
+}
+
+private newtype TMethodOrExpr =
+ TMethod(Method m) or
+ TExpr(Expr e)
+
+private TMethodOrExpr getMethodOrConst(TModule owner, string name) {
+ exists(ModuleBase m | m.getModule() = owner |
+ result = TMethod(m.getMethod(name))
+ or
+ result = TExpr(m.getConstant(name))
+ )
+}
+
+module ExposedForTestingOnly {
+ Method getMethod(TModule owner, string name) { TMethod(result) = getMethodOrConst(owner, name) }
+
+ Expr getConst(TModule owner, string name) { TExpr(result) = getMethodOrConst(owner, name) }
+}
+
+private TMethodOrExpr lookupMethodOrConst0(Module m, string name) {
+ result = lookupMethodOrConst0(m.getAPrependedModule(), name)
+ or
+ not exists(getMethodOrConst(getAncestors(m.getAPrependedModule()), name)) and
+ (
+ result = getMethodOrConst(m, name)
+ or
+ not exists(getMethodOrConst(m, name)) and
+ result = lookupMethodOrConst0(m.getAnIncludedModule(), name)
+ )
+}
+
+private AstNode getNode(TMethodOrExpr e) { e = TMethod(result) or e = TExpr(result) }
+
+private TMethodOrExpr lookupMethodOrConst(Module m, string name) {
+ result = lookupMethodOrConst0(m, name)
+ or
+ not exists(lookupMethodOrConst0(m, name)) and
+ result = lookupMethodOrConst(m.getSuperClass(), name) and
+ // For now, we restrict the scope of top-level declarations to their file.
+ // This may remove some plausible targets, but also removes a lot of
+ // implausible targets
+ if getNode(result).getEnclosingModule() instanceof Toplevel
+ then getNode(result).getFile() = m.getADeclaration().getFile()
+ else any()
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Operation.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Operation.qll
new file mode 100644
index 00000000000..3571c97e9dc
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Operation.qll
@@ -0,0 +1,198 @@
+private import codeql.ruby.AST
+private import AST
+private import TreeSitter
+private import Call
+
+abstract class OperationImpl extends Expr, TOperation {
+ abstract string getOperatorImpl();
+
+ abstract Expr getAnOperandImpl();
+}
+
+abstract class UnaryOperationImpl extends OperationImpl, MethodCallImpl, TUnaryOperation {
+ abstract Expr getOperandImpl();
+
+ final override Expr getAnOperandImpl() { result = this.getOperandImpl() }
+
+ final override string getMethodNameImpl() { result = this.getOperatorImpl() }
+
+ final override AstNode getReceiverImpl() { result = this.getOperandImpl() }
+
+ final override Expr getArgumentImpl(int n) { none() }
+
+ final override int getNumberOfArgumentsImpl() { result = 0 }
+
+ final override Block getBlockImpl() { none() }
+}
+
+class UnaryOperationGenerated extends UnaryOperationImpl {
+ private Ruby::Unary g;
+
+ UnaryOperationGenerated() { g = toGenerated(this) }
+
+ final override Expr getOperandImpl() { toGenerated(result) = g.getOperand() }
+
+ final override string getOperatorImpl() { result = g.getOperator() }
+}
+
+class SplatExprReal extends UnaryOperationImpl, TSplatExprReal {
+ private Ruby::SplatArgument g;
+
+ SplatExprReal() { this = TSplatExprReal(g) }
+
+ final override string getOperatorImpl() { result = "*" }
+
+ final override Expr getOperandImpl() { toGenerated(result) = g.getChild() }
+}
+
+class SplatExprSynth extends UnaryOperationImpl, TSplatExprSynth {
+ final override string getOperatorImpl() { result = "*" }
+
+ final override Expr getOperandImpl() { synthChild(this, 0, result) }
+}
+
+class HashSplatExprImpl extends UnaryOperationImpl, THashSplatExpr {
+ private Ruby::HashSplatArgument g;
+
+ HashSplatExprImpl() { this = THashSplatExpr(g) }
+
+ final override Expr getOperandImpl() { toGenerated(result) = g.getChild() }
+
+ final override string getOperatorImpl() { result = "**" }
+}
+
+abstract class BinaryOperationImpl extends OperationImpl, MethodCallImpl, TBinaryOperation {
+ abstract Stmt getLeftOperandImpl();
+
+ abstract Stmt getRightOperandImpl();
+
+ final override Expr getAnOperandImpl() {
+ result = this.getLeftOperandImpl()
+ or
+ result = this.getRightOperandImpl()
+ }
+
+ final override string getMethodNameImpl() { result = this.getOperatorImpl() }
+
+ final override AstNode getReceiverImpl() { result = this.getLeftOperandImpl() }
+
+ final override Expr getArgumentImpl(int n) { n = 0 and result = this.getRightOperandImpl() }
+
+ final override int getNumberOfArgumentsImpl() { result = 1 }
+
+ final override Block getBlockImpl() { none() }
+}
+
+class BinaryOperationReal extends BinaryOperationImpl {
+ private Ruby::Binary g;
+
+ BinaryOperationReal() { g = toGenerated(this) }
+
+ final override string getOperatorImpl() { result = g.getOperator() }
+
+ final override Stmt getLeftOperandImpl() { toGenerated(result) = g.getLeft() }
+
+ final override Stmt getRightOperandImpl() { toGenerated(result) = g.getRight() }
+}
+
+abstract class BinaryOperationSynth extends BinaryOperationImpl {
+ final override Stmt getLeftOperandImpl() { synthChild(this, 0, result) }
+
+ final override Stmt getRightOperandImpl() { synthChild(this, 1, result) }
+}
+
+class AddExprSynth extends BinaryOperationSynth, TAddExprSynth {
+ final override string getOperatorImpl() { result = "+" }
+}
+
+class SubExprSynth extends BinaryOperationSynth, TSubExprSynth {
+ final override string getOperatorImpl() { result = "-" }
+}
+
+class MulExprSynth extends BinaryOperationSynth, TMulExprSynth {
+ final override string getOperatorImpl() { result = "*" }
+}
+
+class DivExprSynth extends BinaryOperationSynth, TDivExprSynth {
+ final override string getOperatorImpl() { result = "/" }
+}
+
+class ModuloExprSynth extends BinaryOperationSynth, TModuloExprSynth {
+ final override string getOperatorImpl() { result = "%" }
+}
+
+class ExponentExprSynth extends BinaryOperationSynth, TExponentExprSynth {
+ final override string getOperatorImpl() { result = "**" }
+}
+
+class LogicalAndExprSynth extends BinaryOperationSynth, TLogicalAndExprSynth {
+ final override string getOperatorImpl() { result = "&&" }
+}
+
+class LogicalOrExprSynth extends BinaryOperationSynth, TLogicalOrExprSynth {
+ final override string getOperatorImpl() { result = "||" }
+}
+
+class LShiftExprSynth extends BinaryOperationSynth, TLShiftExprSynth {
+ final override string getOperatorImpl() { result = "<<" }
+}
+
+class RShiftExprSynth extends BinaryOperationSynth, TRShiftExprSynth {
+ final override string getOperatorImpl() { result = ">>" }
+}
+
+class BitwiseAndSynthExpr extends BinaryOperationSynth, TBitwiseAndExprSynth {
+ final override string getOperatorImpl() { result = "&" }
+}
+
+class BitwiseOrSynthExpr extends BinaryOperationSynth, TBitwiseOrExprSynth {
+ final override string getOperatorImpl() { result = "|" }
+}
+
+class BitwiseXorSynthExpr extends BinaryOperationSynth, TBitwiseXorExprSynth {
+ final override string getOperatorImpl() { result = "^" }
+}
+
+abstract class AssignmentImpl extends OperationImpl, TAssignment {
+ abstract Pattern getLeftOperandImpl();
+
+ abstract Expr getRightOperandImpl();
+
+ final override Expr getAnOperandImpl() {
+ result = this.getLeftOperandImpl()
+ or
+ result = this.getRightOperandImpl()
+ }
+}
+
+class AssignExprReal extends AssignmentImpl, TAssignExprReal {
+ private Ruby::Assignment g;
+
+ AssignExprReal() { this = TAssignExprReal(g) }
+
+ final override string getOperatorImpl() { result = "=" }
+
+ final override Pattern getLeftOperandImpl() { toGenerated(result) = g.getLeft() }
+
+ final override Expr getRightOperandImpl() { toGenerated(result) = g.getRight() }
+}
+
+class AssignExprSynth extends AssignmentImpl, TAssignExprSynth {
+ final override string getOperatorImpl() { result = "=" }
+
+ final override Pattern getLeftOperandImpl() { synthChild(this, 0, result) }
+
+ final override Expr getRightOperandImpl() { synthChild(this, 1, result) }
+}
+
+class AssignOperationImpl extends AssignmentImpl, TAssignOperation {
+ Ruby::OperatorAssignment g;
+
+ AssignOperationImpl() { g = toGenerated(this) }
+
+ final override string getOperatorImpl() { result = g.getOperator() }
+
+ final override Pattern getLeftOperandImpl() { toGenerated(result) = g.getLeft() }
+
+ final override Expr getRightOperandImpl() { toGenerated(result) = g.getRight() }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll
new file mode 100644
index 00000000000..f888d89c1ac
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll
@@ -0,0 +1,19 @@
+private import codeql.ruby.AST
+private import AST
+private import TreeSitter
+
+module Parameter {
+ class Range extends Ruby::AstNode {
+ private int pos;
+
+ Range() {
+ this = any(Ruby::BlockParameters bp).getChild(pos)
+ or
+ this = any(Ruby::MethodParameters mp).getChild(pos)
+ or
+ this = any(Ruby::LambdaParameters lp).getChild(pos)
+ }
+
+ int getPosition() { result = pos }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Pattern.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Pattern.qll
new file mode 100644
index 00000000000..ce18e77f222
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Pattern.qll
@@ -0,0 +1,32 @@
+private import codeql.ruby.AST
+private import AST
+private import TreeSitter
+
+abstract class TuplePatternImpl extends Ruby::AstNode {
+ abstract Ruby::AstNode getChildNode(int i);
+
+ final int getRestIndex() {
+ result = unique(int i | this.getChildNode(i) instanceof Ruby::RestAssignment)
+ }
+}
+
+class TuplePatternParameterImpl extends TuplePatternImpl, Ruby::DestructuredParameter {
+ override Ruby::AstNode getChildNode(int i) { result = this.getChild(i) }
+}
+
+class DestructuredLeftAssignmentImpl extends TuplePatternImpl, Ruby::DestructuredLeftAssignment {
+ override Ruby::AstNode getChildNode(int i) { result = this.getChild(i) }
+}
+
+class LeftAssignmentListImpl extends TuplePatternImpl, Ruby::LeftAssignmentList {
+ override Ruby::AstNode getChildNode(int i) {
+ this =
+ any(Ruby::LeftAssignmentList lal |
+ if
+ strictcount(int j | exists(lal.getChild(j))) = 1 and
+ lal.getChild(0) instanceof Ruby::DestructuredLeftAssignment
+ then result = lal.getChild(0).(Ruby::DestructuredLeftAssignment).getChild(i)
+ else result = lal.getChild(i)
+ )
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll
new file mode 100644
index 00000000000..1cc64fac885
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll
@@ -0,0 +1,109 @@
+private import TreeSitter
+private import codeql.ruby.ast.Scope
+private import codeql.ruby.ast.internal.AST
+private import codeql.ruby.ast.internal.Parameter
+
+class TScopeType = TMethodBase or TModuleLike or TBlockLike;
+
+private class TBlockLike = TDoBlock or TLambda or TBlock or TEndBlock;
+
+private class TModuleLike = TToplevel or TModuleDeclaration or TClassDeclaration or TSingletonClass;
+
+module Scope {
+ class TypeRange = Callable::TypeRange or ModuleBase::TypeRange or @ruby_end_block;
+
+ class Range extends Ruby::AstNode, TypeRange {
+ Range() { not this = any(Ruby::Lambda l).getBody() }
+
+ ModuleBase::Range getEnclosingModule() {
+ result = this
+ or
+ not this instanceof ModuleBase::Range and result = this.getOuterScope().getEnclosingModule()
+ }
+
+ MethodBase::Range getEnclosingMethod() {
+ result = this
+ or
+ not this instanceof MethodBase::Range and
+ not this instanceof ModuleBase::Range and
+ result = this.getOuterScope().getEnclosingMethod()
+ }
+
+ Range getOuterScope() { result = scopeOf(this) }
+ }
+}
+
+module MethodBase {
+ class TypeRange = @ruby_method or @ruby_singleton_method;
+
+ class Range extends Scope::Range, TypeRange { }
+}
+
+module Callable {
+ class TypeRange = MethodBase::TypeRange or @ruby_do_block or @ruby_lambda or @ruby_block;
+
+ class Range extends Scope::Range, TypeRange {
+ Parameter::Range getParameter(int i) {
+ result = this.(Ruby::Method).getParameters().getChild(i) or
+ result = this.(Ruby::SingletonMethod).getParameters().getChild(i) or
+ result = this.(Ruby::DoBlock).getParameters().getChild(i) or
+ result = this.(Ruby::Lambda).getParameters().getChild(i) or
+ result = this.(Ruby::Block).getParameters().getChild(i)
+ }
+ }
+}
+
+module ModuleBase {
+ class TypeRange = @ruby_program or @ruby_module or @ruby_class or @ruby_singleton_class;
+
+ class Range extends Scope::Range, TypeRange { }
+}
+
+pragma[noinline]
+private predicate rankHeredocBody(File f, Ruby::HeredocBody b, int i) {
+ b =
+ rank[i](Ruby::HeredocBody b0 |
+ f = b0.getLocation().getFile()
+ |
+ b0 order by b0.getLocation().getStartLine(), b0.getLocation().getStartColumn()
+ )
+}
+
+Ruby::HeredocBody getHereDocBody(Ruby::HeredocBeginning g) {
+ exists(int i, File f |
+ g =
+ rank[i](Ruby::HeredocBeginning b |
+ f = b.getLocation().getFile()
+ |
+ b order by b.getLocation().getStartLine(), b.getLocation().getStartColumn()
+ ) and
+ rankHeredocBody(f, result, i)
+ )
+}
+
+private Ruby::AstNode parentOf(Ruby::AstNode n) {
+ n = getHereDocBody(result)
+ or
+ exists(Ruby::AstNode parent | parent = n.getParent() |
+ if
+ n =
+ [
+ parent.(Ruby::Module).getName(), parent.(Ruby::Class).getName(),
+ parent.(Ruby::Class).getSuperclass(), parent.(Ruby::SingletonClass).getValue(),
+ parent.(Ruby::Method).getName(), parent.(Ruby::SingletonMethod).getName(),
+ parent.(Ruby::SingletonMethod).getObject()
+ ]
+ then result = parent.getParent()
+ else result = parent
+ )
+}
+
+/** Gets the enclosing scope of a node */
+cached
+Scope::Range scopeOf(Ruby::AstNode n) {
+ exists(Ruby::AstNode p | p = parentOf(n) |
+ p = result
+ or
+ not p instanceof Scope::Range and result = scopeOf(p)
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll
new file mode 100644
index 00000000000..a8673050148
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll
@@ -0,0 +1,797 @@
+/** Provides predicates for synthesizing AST nodes. */
+
+private import AST
+private import TreeSitter
+private import codeql.ruby.ast.internal.Call
+private import codeql.ruby.ast.internal.Variable
+private import codeql.ruby.ast.internal.Pattern
+private import codeql.ruby.AST
+
+/** A synthesized AST node kind. */
+newtype SynthKind =
+ AddExprKind() or
+ AssignExprKind() or
+ BitwiseAndExprKind() or
+ BitwiseOrExprKind() or
+ BitwiseXorExprKind() or
+ ClassVariableAccessKind(ClassVariable v) or
+ DivExprKind() or
+ ExponentExprKind() or
+ GlobalVariableAccessKind(GlobalVariable v) or
+ InstanceVariableAccessKind(InstanceVariable v) or
+ IntegerLiteralKind(int i) { i in [-1000 .. 1000] } or
+ LShiftExprKind() or
+ LocalVariableAccessRealKind(LocalVariableReal v) or
+ LocalVariableAccessSynthKind(TLocalVariableSynth v) or
+ LogicalAndExprKind() or
+ LogicalOrExprKind() or
+ MethodCallKind(string name, boolean setter, int arity) {
+ any(Synthesis s).methodCall(name, setter, arity)
+ } or
+ ModuloExprKind() or
+ MulExprKind() or
+ RangeLiteralKind(boolean inclusive) { inclusive in [false, true] } or
+ RShiftExprKind() or
+ SplatExprKind() or
+ StmtSequenceKind() or
+ SelfKind() or
+ SubExprKind() or
+ ConstantReadAccessKind(string value) { any(Synthesis s).constantReadAccess(value) }
+
+/**
+ * An AST child.
+ *
+ * Either a new synthesized node or a reference to an existing node.
+ */
+newtype Child =
+ SynthChild(SynthKind k) or
+ RealChild(AstNode n)
+
+private newtype TSynthesis = MkSynthesis()
+
+/** A class used for synthesizing AST nodes. */
+class Synthesis extends TSynthesis {
+ /**
+ * Holds if a node should be synthesized as the `i`th child of `parent`, or if
+ * a non-synthesized node should be the `i`th child of synthesized node `parent`.
+ *
+ * `i = -1` is used to represent that the synthesized node is a desugared version
+ * of its parent.
+ */
+ predicate child(AstNode parent, int i, Child child) { none() }
+
+ /**
+ * Holds if synthesized node `n` should have location `l`. Synthesized nodes for
+ * which this predicate does not hold, inherit their location (recursively) from
+ * their parent node.
+ */
+ predicate location(AstNode n, Location l) { none() }
+
+ /**
+ * Holds if a local variable, identified by `i`, should be synthesized for AST
+ * node `n`.
+ */
+ predicate localVariable(AstNode n, int i) { none() }
+
+ /**
+ * Holds if a method call to `name` with arity `arity` is needed.
+ */
+ predicate methodCall(string name, boolean setter, int arity) { none() }
+
+ /**
+ * Holds if a constant read access of `name` is needed.
+ */
+ predicate constantReadAccess(string name) { none() }
+
+ /**
+ * Holds if `n` should be excluded from `ControlFlowTree` in the CFG construction.
+ */
+ predicate excludeFromControlFlowTree(AstNode n) { none() }
+
+ final string toString() { none() }
+}
+
+private class Desugared extends AstNode {
+ Desugared() { this = any(AstNode sugar).getDesugared() }
+
+ AstNode getADescendant() { result = this.getAChild*() }
+}
+
+/**
+ * Gets the desugaring level of `n`. That is, the number of desugaring
+ * transformations required before the context in which `n` occurs is
+ * fully desugared.
+ */
+int desugarLevel(AstNode n) { result = count(Desugared desugared | n = desugared.getADescendant()) }
+
+/**
+ * Use this predicate in `Synthesis::child` to generate an assignment of `value` to
+ * synthesized variable `v`, where the assignment is a child of `assignParent` at
+ * index `assignIndex`.
+ */
+bindingset[v, assignParent, assignIndex, value]
+private predicate assign(
+ AstNode parent, int i, Child child, TLocalVariableSynth v, AstNode assignParent, int assignIndex,
+ AstNode value
+) {
+ parent = assignParent and
+ i = assignIndex and
+ child = SynthChild(AssignExprKind())
+ or
+ parent = TAssignExprSynth(assignParent, assignIndex) and
+ (
+ i = 0 and
+ child = SynthChild(LocalVariableAccessSynthKind(v))
+ or
+ i = 1 and
+ child = RealChild(value)
+ )
+}
+
+/** Holds if synthesized node `n` should have location `l`. */
+predicate synthLocation(AstNode n, Location l) {
+ n.isSynthesized() and any(Synthesis s).location(n, l)
+}
+
+private predicate hasLocation(AstNode n, Location l) {
+ l = toGenerated(n).getLocation()
+ or
+ synthLocation(n, l)
+}
+
+private module ImplicitSelfSynthesis {
+ pragma[nomagic]
+ private predicate identifierMethodCallSelfSynthesis(AstNode mc, int i, Child child) {
+ child = SynthChild(SelfKind()) and
+ mc = TIdentifierMethodCall(_) and
+ i = 0
+ }
+
+ private class IdentifierMethodCallSelfSynthesis extends Synthesis {
+ final override predicate child(AstNode parent, int i, Child child) {
+ identifierMethodCallSelfSynthesis(parent, i, child)
+ }
+ }
+
+ pragma[nomagic]
+ private predicate regularMethodCallSelfSynthesis(TRegularMethodCall mc, int i, Child child) {
+ exists(Ruby::AstNode g |
+ mc = TRegularMethodCall(g) and
+ // If there's no explicit receiver (or scope resolution that acts like a
+ // receiver), then the receiver is implicitly `self`. N.B. `::Foo()` is
+ // not valid Ruby.
+ not exists(g.(Ruby::Call).getReceiver()) and
+ not exists(g.(Ruby::Call).getMethod().(Ruby::ScopeResolution).getScope())
+ ) and
+ child = SynthChild(SelfKind()) and
+ i = 0
+ }
+
+ private class RegularMethodCallSelfSynthesis extends Synthesis {
+ final override predicate child(AstNode parent, int i, Child child) {
+ regularMethodCallSelfSynthesis(parent, i, child)
+ }
+ }
+}
+
+private module SetterDesugar {
+ /** An assignment where the left-hand side is a method call. */
+ private class SetterAssignExpr extends AssignExpr {
+ private MethodCall mc;
+
+ pragma[nomagic]
+ SetterAssignExpr() { mc = this.getLeftOperand() }
+
+ MethodCall getMethodCall() { result = mc }
+
+ pragma[nomagic]
+ MethodCallKind getCallKind(boolean setter, int arity) {
+ result = MethodCallKind(mc.getMethodName(), setter, arity)
+ }
+
+ pragma[nomagic]
+ Expr getReceiver() { result = mc.getReceiver() }
+
+ pragma[nomagic]
+ Expr getArgument(int i) { result = mc.getArgument(i) }
+
+ pragma[nomagic]
+ int getNumberOfArguments() { result = mc.getNumberOfArguments() }
+
+ pragma[nomagic]
+ Location getMethodCallLocation() { hasLocation(mc, result) }
+ }
+
+ pragma[nomagic]
+ private predicate setterMethodCallSynthesis(AstNode parent, int i, Child child) {
+ exists(SetterAssignExpr sae |
+ parent = sae and
+ i = -1 and
+ child = SynthChild(StmtSequenceKind())
+ or
+ exists(AstNode seq | seq = TStmtSequenceSynth(sae, -1) |
+ parent = seq and
+ i = 0 and
+ child = SynthChild(sae.getCallKind(true, sae.getNumberOfArguments() + 1))
+ or
+ exists(AstNode call | call = TMethodCallSynth(seq, 0, _, _, _) |
+ parent = call and
+ i = 0 and
+ child = RealChild(sae.getReceiver())
+ or
+ parent = call and
+ child = RealChild(sae.getArgument(i - 1))
+ or
+ exists(int valueIndex | valueIndex = sae.getNumberOfArguments() + 1 |
+ parent = call and
+ i = valueIndex and
+ child = SynthChild(AssignExprKind())
+ or
+ parent = TAssignExprSynth(call, valueIndex) and
+ (
+ i = 0 and
+ child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sae, 0)))
+ or
+ i = 1 and
+ child = RealChild(sae.getRightOperand())
+ )
+ )
+ )
+ or
+ parent = seq and
+ i = 1 and
+ child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sae, 0)))
+ )
+ )
+ }
+
+ /**
+ * ```rb
+ * x.foo = y
+ * ```
+ *
+ * desugars to
+ *
+ * ```rb
+ * x.foo=(__synth_0 = y);
+ * __synth_0;
+ * ```
+ */
+ private class SetterMethodCallSynthesis extends Synthesis {
+ final override predicate child(AstNode parent, int i, Child child) {
+ setterMethodCallSynthesis(parent, i, child)
+ }
+
+ final override predicate location(AstNode n, Location l) {
+ exists(SetterAssignExpr sae, StmtSequence seq |
+ seq = sae.getDesugared() and
+ l = sae.getMethodCallLocation() and
+ n = seq.getAStmt()
+ )
+ }
+
+ final override predicate excludeFromControlFlowTree(AstNode n) {
+ n = any(SetterAssignExpr sae).getMethodCall()
+ }
+
+ final override predicate localVariable(AstNode n, int i) {
+ n instanceof SetterAssignExpr and
+ i = 0
+ }
+
+ final override predicate methodCall(string name, boolean setter, int arity) {
+ exists(SetterAssignExpr sae |
+ name = sae.getMethodCall().getMethodName() and
+ setter = true and
+ arity = sae.getNumberOfArguments() + 1
+ )
+ }
+ }
+}
+
+private module AssignOperationDesugar {
+ /**
+ * Gets the operator kind to synthesize for operator assignment `ao`.
+ */
+ private SynthKind getKind(AssignOperation ao) {
+ ao instanceof AssignAddExpr and result = AddExprKind()
+ or
+ ao instanceof AssignSubExpr and result = SubExprKind()
+ or
+ ao instanceof AssignMulExpr and result = MulExprKind()
+ or
+ ao instanceof AssignDivExpr and result = DivExprKind()
+ or
+ ao instanceof AssignModuloExpr and result = ModuloExprKind()
+ or
+ ao instanceof AssignExponentExpr and result = ExponentExprKind()
+ or
+ ao instanceof AssignLogicalAndExpr and result = LogicalAndExprKind()
+ or
+ ao instanceof AssignLogicalOrExpr and result = LogicalOrExprKind()
+ or
+ ao instanceof AssignLShiftExpr and result = LShiftExprKind()
+ or
+ ao instanceof AssignRShiftExpr and result = RShiftExprKind()
+ or
+ ao instanceof AssignBitwiseAndExpr and result = BitwiseAndExprKind()
+ or
+ ao instanceof AssignBitwiseOrExpr and result = BitwiseOrExprKind()
+ or
+ ao instanceof AssignBitwiseXorExpr and result = BitwiseXorExprKind()
+ }
+
+ private Location getAssignOperationLocation(AssignOperation ao) {
+ exists(Ruby::OperatorAssignment g, Ruby::Token op |
+ g = toGenerated(ao) and
+ op.getParent() = g and
+ op.getParentIndex() = 1 and
+ result = op.getLocation()
+ )
+ }
+
+ /** An assignment operation where the left-hand side is a variable. */
+ private class VariableAssignOperation extends AssignOperation {
+ private Variable v;
+
+ pragma[nomagic]
+ VariableAssignOperation() { v = this.getLeftOperand().(VariableAccess).getVariable() }
+
+ pragma[nomagic]
+ SynthKind getVariableAccessKind() {
+ result in [
+ LocalVariableAccessRealKind(v).(SynthKind), InstanceVariableAccessKind(v),
+ ClassVariableAccessKind(v), GlobalVariableAccessKind(v)
+ ]
+ }
+ }
+
+ pragma[nomagic]
+ private predicate variableAssignOperationSynthesis(AstNode parent, int i, Child child) {
+ exists(VariableAssignOperation vao |
+ parent = vao and
+ i = -1 and
+ child = SynthChild(AssignExprKind())
+ or
+ exists(AstNode assign | assign = TAssignExprSynth(vao, -1) |
+ parent = assign and
+ i = 0 and
+ child = RealChild(vao.getLeftOperand())
+ or
+ parent = assign and
+ i = 1 and
+ child = SynthChild(getKind(vao))
+ or
+ parent = getSynthChild(assign, 1) and
+ (
+ i = 0 and
+ child = SynthChild(vao.getVariableAccessKind())
+ or
+ i = 1 and
+ child = RealChild(vao.getRightOperand())
+ )
+ )
+ )
+ }
+
+ /**
+ * ```rb
+ * x += y
+ * ```
+ *
+ * desugars to
+ *
+ * ```rb
+ * x = x + y
+ * ```
+ *
+ * when `x` is a variable.
+ */
+ private class VariableAssignOperationSynthesis extends Synthesis {
+ final override predicate child(AstNode parent, int i, Child child) {
+ variableAssignOperationSynthesis(parent, i, child)
+ }
+
+ final override predicate location(AstNode n, Location l) {
+ exists(VariableAssignOperation vao, BinaryOperation bo |
+ bo = vao.getDesugared().(AssignExpr).getRightOperand()
+ |
+ n = bo and
+ l = getAssignOperationLocation(vao)
+ or
+ n = bo.getLeftOperand() and
+ hasLocation(vao.getLeftOperand(), l)
+ )
+ }
+ }
+
+ /** An assignment operation where the left-hand side is a method call. */
+ private class SetterAssignOperation extends AssignOperation {
+ private MethodCall mc;
+
+ pragma[nomagic]
+ SetterAssignOperation() { mc = this.getLeftOperand() }
+
+ MethodCall getMethodCall() { result = mc }
+
+ pragma[nomagic]
+ MethodCallKind getCallKind(boolean setter, int arity) {
+ result = MethodCallKind(mc.getMethodName(), setter, arity)
+ }
+
+ pragma[nomagic]
+ Expr getReceiver() { result = mc.getReceiver() }
+
+ pragma[nomagic]
+ Expr getArgument(int i) { result = mc.getArgument(i) }
+
+ pragma[nomagic]
+ int getNumberOfArguments() { result = mc.getNumberOfArguments() }
+
+ pragma[nomagic]
+ Location getMethodCallLocation() { hasLocation(mc, result) }
+ }
+
+ pragma[nomagic]
+ private predicate methodCallAssignOperationSynthesis(AstNode parent, int i, Child child) {
+ exists(SetterAssignOperation sao |
+ parent = sao and
+ i = -1 and
+ child = SynthChild(StmtSequenceKind())
+ or
+ exists(AstNode seq | seq = TStmtSequenceSynth(sao, -1) |
+ // `__synth__0 = foo`
+ assign(parent, i, child, TLocalVariableSynth(sao, 0), seq, 0, sao.getReceiver())
+ or
+ // `__synth__1 = bar`
+ exists(Expr arg, int j | arg = sao.getArgument(j - 1) |
+ assign(parent, i, child, TLocalVariableSynth(sao, j), seq, j, arg)
+ )
+ or
+ // `__synth__2 = __synth__0.[](__synth__1) + y`
+ exists(int opAssignIndex | opAssignIndex = sao.getNumberOfArguments() + 1 |
+ parent = seq and
+ i = opAssignIndex and
+ child = SynthChild(AssignExprKind())
+ or
+ exists(AstNode assign | assign = TAssignExprSynth(seq, opAssignIndex) |
+ parent = assign and
+ i = 0 and
+ child =
+ SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex)))
+ or
+ parent = assign and
+ i = 1 and
+ child = SynthChild(getKind(sao))
+ or
+ // `__synth__0.[](__synth__1) + y`
+ exists(AstNode op | op = getSynthChild(assign, 1) |
+ parent = op and
+ i = 0 and
+ child = SynthChild(sao.getCallKind(false, sao.getNumberOfArguments()))
+ or
+ parent = TMethodCallSynth(op, 0, _, _, _) and
+ child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, i))) and
+ i in [0 .. sao.getNumberOfArguments()]
+ or
+ parent = op and
+ i = 1 and
+ child = RealChild(sao.getRightOperand())
+ )
+ )
+ or
+ // `__synth__0.[]=(__synth__1, __synth__2);`
+ parent = seq and
+ i = opAssignIndex + 1 and
+ child = SynthChild(sao.getCallKind(true, opAssignIndex))
+ or
+ exists(AstNode setter | setter = TMethodCallSynth(seq, opAssignIndex + 1, _, _, _) |
+ parent = setter and
+ child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, i))) and
+ i in [0 .. sao.getNumberOfArguments()]
+ or
+ parent = setter and
+ i = opAssignIndex + 1 and
+ child =
+ SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex)))
+ )
+ or
+ parent = seq and
+ i = opAssignIndex + 2 and
+ child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex)))
+ )
+ )
+ )
+ }
+
+ /**
+ * ```rb
+ * foo[bar] += y
+ * ```
+ *
+ * desugars to
+ *
+ * ```rb
+ * __synth__0 = foo;
+ * __synth__1 = bar;
+ * __synth__2 = __synth__0.[](__synth__1) + y;
+ * __synth__0.[]=(__synth__1, __synth__2);
+ * __synth__2;
+ * ```
+ */
+ private class MethodCallAssignOperationSynthesis extends Synthesis {
+ final override predicate child(AstNode parent, int i, Child child) {
+ methodCallAssignOperationSynthesis(parent, i, child)
+ }
+
+ final override predicate location(AstNode n, Location l) {
+ exists(SetterAssignOperation sao, StmtSequence seq | seq = sao.getDesugared() |
+ n = seq.getStmt(0) and
+ hasLocation(sao.getReceiver(), l)
+ or
+ exists(int i |
+ n = seq.getStmt(i + 1) and
+ hasLocation(sao.getArgument(i), l)
+ )
+ or
+ exists(AssignExpr ae, int opAssignIndex |
+ opAssignIndex = sao.getNumberOfArguments() + 1 and
+ ae = seq.getStmt(opAssignIndex)
+ |
+ l = getAssignOperationLocation(sao) and
+ n = ae
+ or
+ exists(BinaryOperation bo | bo = ae.getRightOperand() |
+ n = bo.getLeftOperand() and
+ l = sao.getMethodCallLocation()
+ or
+ exists(MethodCall mc | mc = bo.getLeftOperand() |
+ n = mc.getReceiver() and
+ hasLocation(sao.getReceiver(), l)
+ or
+ exists(int i |
+ n = mc.getArgument(i) and
+ hasLocation(sao.getArgument(i), l)
+ )
+ )
+ )
+ or
+ exists(MethodCall mc | mc = seq.getStmt(opAssignIndex + 1) |
+ n = mc and
+ l = sao.getMethodCallLocation()
+ or
+ n = mc.getReceiver() and
+ hasLocation(sao.getReceiver(), l)
+ or
+ exists(int i | n = mc.getArgument(i) |
+ hasLocation(sao.getArgument(i), l)
+ or
+ i = opAssignIndex and
+ l = getAssignOperationLocation(sao)
+ )
+ )
+ or
+ n = seq.getStmt(opAssignIndex + 2) and
+ l = getAssignOperationLocation(sao)
+ )
+ )
+ }
+
+ final override predicate localVariable(AstNode n, int i) {
+ n = any(SetterAssignOperation sao | i in [0 .. sao.getNumberOfArguments() + 1])
+ }
+
+ final override predicate methodCall(string name, boolean setter, int arity) {
+ exists(SetterAssignOperation sao | name = sao.getMethodCall().getMethodName() |
+ setter = false and
+ arity = sao.getNumberOfArguments()
+ or
+ setter = true and
+ arity = sao.getNumberOfArguments() + 1
+ )
+ }
+
+ final override predicate excludeFromControlFlowTree(AstNode n) {
+ n = any(SetterAssignOperation sao).getMethodCall()
+ }
+ }
+}
+
+private module CompoundAssignDesugar {
+ /** An assignment where the left-hand side is a tuple pattern. */
+ private class TupleAssignExpr extends AssignExpr {
+ private TuplePattern tp;
+
+ pragma[nomagic]
+ TupleAssignExpr() { tp = this.getLeftOperand() }
+
+ TuplePattern getTuplePattern() { result = tp }
+
+ pragma[nomagic]
+ Pattern getElement(int i) { result = tp.getElement(i) }
+
+ pragma[nomagic]
+ int getNumberOfElements() {
+ toGenerated(tp) = any(TuplePatternImpl impl | result = count(impl.getChildNode(_)))
+ }
+
+ pragma[nomagic]
+ int getRestIndexOrNumberOfElements() {
+ result = tp.getRestIndex()
+ or
+ toGenerated(tp) = any(TuplePatternImpl impl | not exists(impl.getRestIndex())) and
+ result = this.getNumberOfElements()
+ }
+ }
+
+ pragma[nomagic]
+ private predicate compoundAssignSynthesis(AstNode parent, int i, Child child) {
+ exists(TupleAssignExpr tae |
+ parent = tae and
+ i = -1 and
+ child = SynthChild(StmtSequenceKind())
+ or
+ exists(AstNode seq | seq = TStmtSequenceSynth(tae, -1) |
+ parent = seq and
+ i = 0 and
+ child = SynthChild(AssignExprKind())
+ or
+ exists(AstNode assign | assign = TAssignExprSynth(seq, 0) |
+ parent = assign and
+ i = 0 and
+ child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(tae, 0)))
+ or
+ parent = assign and
+ i = 1 and
+ child = SynthChild(SplatExprKind())
+ or
+ parent = TSplatExprSynth(assign, 1) and
+ i = 0 and
+ child = RealChild(tae.getRightOperand())
+ )
+ or
+ exists(Pattern p, int j, int restIndex |
+ p = tae.getElement(j) and
+ restIndex = tae.getRestIndexOrNumberOfElements()
+ |
+ parent = seq and
+ i = j + 1 and
+ child = SynthChild(AssignExprKind())
+ or
+ exists(AstNode assign | assign = TAssignExprSynth(seq, j + 1) |
+ parent = assign and
+ i = 0 and
+ child = RealChild(p)
+ or
+ parent = assign and
+ i = 1 and
+ child = SynthChild(MethodCallKind("[]", false, 1))
+ or
+ parent = TMethodCallSynth(assign, 1, _, _, _) and
+ i = 0 and
+ child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(tae, 0)))
+ or
+ j < restIndex and
+ parent = TMethodCallSynth(assign, 1, _, _, _) and
+ i = 1 and
+ child = SynthChild(IntegerLiteralKind(j))
+ or
+ j = restIndex and
+ (
+ parent = TMethodCallSynth(assign, 1, _, _, _) and
+ i = 1 and
+ child = SynthChild(RangeLiteralKind(true))
+ or
+ exists(AstNode call |
+ call = TMethodCallSynth(assign, 1, _, _, _) and
+ parent = TRangeLiteralSynth(call, 1, _)
+ |
+ i = 0 and
+ child = SynthChild(IntegerLiteralKind(j))
+ or
+ i = 1 and
+ child = SynthChild(IntegerLiteralKind(restIndex - tae.getNumberOfElements()))
+ )
+ )
+ or
+ j > restIndex and
+ parent = TMethodCallSynth(assign, 1, _, _, _) and
+ i = 1 and
+ child = SynthChild(IntegerLiteralKind(j - tae.getNumberOfElements()))
+ )
+ )
+ )
+ )
+ }
+
+ /**
+ * ```rb
+ * x, *y, z = w
+ * ```
+ * desugars to
+ *
+ * ```rb
+ * __synth__0 = *w;
+ * x = __synth__0[0];
+ * y = __synth__0[1..-2];
+ * z = __synth__0[-1];
+ * ```
+ */
+ private class CompoundAssignSynthesis extends Synthesis {
+ final override predicate child(AstNode parent, int i, Child child) {
+ compoundAssignSynthesis(parent, i, child)
+ }
+
+ final override predicate location(AstNode n, Location l) {
+ exists(TupleAssignExpr tae, StmtSequence seq | seq = tae.getDesugared() |
+ n = seq.getStmt(0) and
+ hasLocation(tae.getRightOperand(), l)
+ or
+ exists(Pattern p, int j |
+ p = tae.getElement(j) and
+ n = seq.getStmt(j + 1) and
+ hasLocation(p, l)
+ )
+ )
+ }
+
+ final override predicate localVariable(AstNode n, int i) {
+ n instanceof TupleAssignExpr and
+ i = 0
+ }
+
+ final override predicate methodCall(string name, boolean setter, int arity) {
+ name = "[]" and
+ setter = false and
+ arity = 1
+ }
+
+ final override predicate excludeFromControlFlowTree(AstNode n) {
+ n = any(TupleAssignExpr tae).getTuplePattern()
+ }
+ }
+}
+
+private module ArrayLiteralDesugar {
+ pragma[nomagic]
+ private predicate arrayLiteralSynthesis(AstNode parent, int i, Child child) {
+ exists(ArrayLiteral al |
+ parent = al and
+ i = -1 and
+ child = SynthChild(MethodCallKind("[]", false, al.getNumberOfElements() + 1))
+ or
+ exists(AstNode mc | mc = TMethodCallSynth(al, -1, _, _, _) |
+ parent = mc and
+ i = 0 and
+ child = SynthChild(ConstantReadAccessKind("::Array"))
+ or
+ parent = mc and
+ child = RealChild(al.getElement(i - 1))
+ )
+ )
+ }
+
+ /**
+ * ```rb
+ * [1, 2, 3]
+ * ```
+ * desugars to
+ *
+ * ```rb
+ * ::Array.[](1, 2, 3)
+ * ```
+ */
+ private class CompoundAssignSynthesis extends Synthesis {
+ final override predicate child(AstNode parent, int i, Child child) {
+ arrayLiteralSynthesis(parent, i, child)
+ }
+
+ final override predicate methodCall(string name, boolean setter, int arity) {
+ name = "[]" and
+ setter = false and
+ arity = any(ArrayLiteral al).getNumberOfElements() + 1
+ }
+
+ final override predicate constantReadAccess(string name) { name = "::Array" }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll
new file mode 100644
index 00000000000..d054d15b675
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll
@@ -0,0 +1,2000 @@
+/*
+ * CodeQL library for Ruby
+ * Automatically generated from the tree-sitter grammar; do not edit
+ */
+
+private import codeql.files.FileSystem
+private import codeql.Locations
+
+module Ruby {
+ /** The base class for all AST nodes */
+ class AstNode extends @ruby_ast_node {
+ /** Gets a string representation of this element. */
+ string toString() { result = this.getAPrimaryQlClass() }
+
+ /** Gets the location of this element. */
+ Location getLocation() { none() }
+
+ /** Gets the parent of this element. */
+ AstNode getParent() { ruby_ast_node_parent(this, result, _) }
+
+ /** Gets the index of this node among the children of its parent. */
+ int getParentIndex() { ruby_ast_node_parent(this, _, result) }
+
+ /** Gets a field or child node of this node. */
+ AstNode getAFieldOrChild() { none() }
+
+ /** Gets the name of the primary QL class for this element. */
+ string getAPrimaryQlClass() { result = "???" }
+
+ /** Gets a comma-separated list of the names of the primary CodeQL classes to which this element belongs. */
+ string getPrimaryQlClasses() { result = concat(getAPrimaryQlClass(), ",") }
+ }
+
+ /** A token. */
+ class Token extends @ruby_token, AstNode {
+ /** Gets the value of this token. */
+ string getValue() { ruby_tokeninfo(this, _, result, _) }
+
+ /** Gets the location of this token. */
+ override Location getLocation() { ruby_tokeninfo(this, _, _, result) }
+
+ /** Gets a string representation of this element. */
+ override string toString() { result = getValue() }
+
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Token" }
+ }
+
+ /** A reserved word. */
+ class ReservedWord extends @ruby_reserved_word, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ReservedWord" }
+ }
+
+ class UnderscoreArg extends @ruby_underscore_arg, AstNode { }
+
+ class UnderscoreLhs extends @ruby_underscore_lhs, AstNode { }
+
+ class UnderscoreMethodName extends @ruby_underscore_method_name, AstNode { }
+
+ class UnderscorePrimary extends @ruby_underscore_primary, AstNode { }
+
+ class UnderscoreStatement extends @ruby_underscore_statement, AstNode { }
+
+ class UnderscoreVariable extends @ruby_underscore_variable, AstNode { }
+
+ /** A class representing `alias` nodes. */
+ class Alias extends @ruby_alias, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Alias" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_alias_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `alias`. */
+ UnderscoreMethodName getAlias() { ruby_alias_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `name`. */
+ UnderscoreMethodName getName() { ruby_alias_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_alias_def(this, result, _, _) or ruby_alias_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `argument_list` nodes. */
+ class ArgumentList extends @ruby_argument_list, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ArgumentList" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_argument_list_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_argument_list_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_argument_list_child(this, _, result) }
+ }
+
+ /** A class representing `array` nodes. */
+ class Array extends @ruby_array, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Array" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_array_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_array_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_array_child(this, _, result) }
+ }
+
+ /** A class representing `assignment` nodes. */
+ class Assignment extends @ruby_assignment, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Assignment" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_assignment_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `left`. */
+ AstNode getLeft() { ruby_assignment_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `right`. */
+ AstNode getRight() { ruby_assignment_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_assignment_def(this, result, _, _) or ruby_assignment_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `bare_string` nodes. */
+ class BareString extends @ruby_bare_string, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "BareString" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_bare_string_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_bare_string_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_bare_string_child(this, _, result) }
+ }
+
+ /** A class representing `bare_symbol` nodes. */
+ class BareSymbol extends @ruby_bare_symbol, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "BareSymbol" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_bare_symbol_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_bare_symbol_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_bare_symbol_child(this, _, result) }
+ }
+
+ /** A class representing `begin` nodes. */
+ class Begin extends @ruby_begin, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Begin" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_begin_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_begin_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_begin_child(this, _, result) }
+ }
+
+ /** A class representing `begin_block` nodes. */
+ class BeginBlock extends @ruby_begin_block, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "BeginBlock" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_begin_block_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_begin_block_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_begin_block_child(this, _, result) }
+ }
+
+ /** A class representing `binary` nodes. */
+ class Binary extends @ruby_binary, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Binary" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_binary_def(this, _, _, _, result) }
+
+ /** Gets the node corresponding to the field `left`. */
+ AstNode getLeft() { ruby_binary_def(this, result, _, _, _) }
+
+ /** Gets the node corresponding to the field `operator`. */
+ string getOperator() {
+ exists(int value | ruby_binary_def(this, _, value, _, _) |
+ result = "!=" and value = 0
+ or
+ result = "!~" and value = 1
+ or
+ result = "%" and value = 2
+ or
+ result = "&" and value = 3
+ or
+ result = "&&" and value = 4
+ or
+ result = "*" and value = 5
+ or
+ result = "**" and value = 6
+ or
+ result = "+" and value = 7
+ or
+ result = "-" and value = 8
+ or
+ result = "/" and value = 9
+ or
+ result = "<" and value = 10
+ or
+ result = "<<" and value = 11
+ or
+ result = "<=" and value = 12
+ or
+ result = "<=>" and value = 13
+ or
+ result = "==" and value = 14
+ or
+ result = "===" and value = 15
+ or
+ result = "=~" and value = 16
+ or
+ result = ">" and value = 17
+ or
+ result = ">=" and value = 18
+ or
+ result = ">>" and value = 19
+ or
+ result = "^" and value = 20
+ or
+ result = "and" and value = 21
+ or
+ result = "or" and value = 22
+ or
+ result = "|" and value = 23
+ or
+ result = "||" and value = 24
+ )
+ }
+
+ /** Gets the node corresponding to the field `right`. */
+ AstNode getRight() { ruby_binary_def(this, _, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_binary_def(this, result, _, _, _) or ruby_binary_def(this, _, _, result, _)
+ }
+ }
+
+ /** A class representing `block` nodes. */
+ class Block extends @ruby_block, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Block" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_block_def(this, result) }
+
+ /** Gets the node corresponding to the field `parameters`. */
+ BlockParameters getParameters() { ruby_block_parameters(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_block_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_block_parameters(this, result) or ruby_block_child(this, _, result)
+ }
+ }
+
+ /** A class representing `block_argument` nodes. */
+ class BlockArgument extends @ruby_block_argument, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "BlockArgument" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_block_argument_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ UnderscoreArg getChild() { ruby_block_argument_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_block_argument_def(this, result, _) }
+ }
+
+ /** A class representing `block_parameter` nodes. */
+ class BlockParameter extends @ruby_block_parameter, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "BlockParameter" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_block_parameter_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ Identifier getName() { ruby_block_parameter_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_block_parameter_def(this, result, _) }
+ }
+
+ /** A class representing `block_parameters` nodes. */
+ class BlockParameters extends @ruby_block_parameters, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "BlockParameters" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_block_parameters_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_block_parameters_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_block_parameters_child(this, _, result) }
+ }
+
+ /** A class representing `break` nodes. */
+ class Break extends @ruby_break, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Break" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_break_def(this, result) }
+
+ /** Gets the child of this node. */
+ ArgumentList getChild() { ruby_break_child(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_break_child(this, result) }
+ }
+
+ /** A class representing `call` nodes. */
+ class Call extends @ruby_call, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Call" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_call_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `arguments`. */
+ ArgumentList getArguments() { ruby_call_arguments(this, result) }
+
+ /** Gets the node corresponding to the field `block`. */
+ AstNode getBlock() { ruby_call_block(this, result) }
+
+ /** Gets the node corresponding to the field `method`. */
+ AstNode getMethod() { ruby_call_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `receiver`. */
+ AstNode getReceiver() { ruby_call_receiver(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_call_arguments(this, result) or
+ ruby_call_block(this, result) or
+ ruby_call_def(this, result, _) or
+ ruby_call_receiver(this, result)
+ }
+ }
+
+ /** A class representing `case` nodes. */
+ class Case extends @ruby_case__, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Case" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_case_def(this, result) }
+
+ /** Gets the node corresponding to the field `value`. */
+ UnderscoreStatement getValue() { ruby_case_value(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_case_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_case_value(this, result) or ruby_case_child(this, _, result)
+ }
+ }
+
+ /** A class representing `chained_string` nodes. */
+ class ChainedString extends @ruby_chained_string, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ChainedString" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_chained_string_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ String getChild(int i) { ruby_chained_string_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_chained_string_child(this, _, result) }
+ }
+
+ /** A class representing `character` tokens. */
+ class Character extends @ruby_token_character, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Character" }
+ }
+
+ /** A class representing `class` nodes. */
+ class Class extends @ruby_class, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Class" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_class_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ AstNode getName() { ruby_class_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `superclass`. */
+ Superclass getSuperclass() { ruby_class_superclass(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_class_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_class_def(this, result, _) or
+ ruby_class_superclass(this, result) or
+ ruby_class_child(this, _, result)
+ }
+ }
+
+ /** A class representing `class_variable` tokens. */
+ class ClassVariable extends @ruby_token_class_variable, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ClassVariable" }
+ }
+
+ /** A class representing `comment` tokens. */
+ class Comment extends @ruby_token_comment, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Comment" }
+ }
+
+ /** A class representing `complex` tokens. */
+ class Complex extends @ruby_token_complex, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Complex" }
+ }
+
+ /** A class representing `conditional` nodes. */
+ class Conditional extends @ruby_conditional, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Conditional" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_conditional_def(this, _, _, _, result) }
+
+ /** Gets the node corresponding to the field `alternative`. */
+ UnderscoreArg getAlternative() { ruby_conditional_def(this, result, _, _, _) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ UnderscoreArg getCondition() { ruby_conditional_def(this, _, result, _, _) }
+
+ /** Gets the node corresponding to the field `consequence`. */
+ UnderscoreArg getConsequence() { ruby_conditional_def(this, _, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_conditional_def(this, result, _, _, _) or
+ ruby_conditional_def(this, _, result, _, _) or
+ ruby_conditional_def(this, _, _, result, _)
+ }
+ }
+
+ /** A class representing `constant` tokens. */
+ class Constant extends @ruby_token_constant, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Constant" }
+ }
+
+ /** A class representing `delimited_symbol` nodes. */
+ class DelimitedSymbol extends @ruby_delimited_symbol, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "DelimitedSymbol" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_delimited_symbol_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_delimited_symbol_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_delimited_symbol_child(this, _, result) }
+ }
+
+ /** A class representing `destructured_left_assignment` nodes. */
+ class DestructuredLeftAssignment extends @ruby_destructured_left_assignment, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "DestructuredLeftAssignment" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_destructured_left_assignment_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_destructured_left_assignment_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_destructured_left_assignment_child(this, _, result) }
+ }
+
+ /** A class representing `destructured_parameter` nodes. */
+ class DestructuredParameter extends @ruby_destructured_parameter, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "DestructuredParameter" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_destructured_parameter_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_destructured_parameter_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_destructured_parameter_child(this, _, result) }
+ }
+
+ /** A class representing `do` nodes. */
+ class Do extends @ruby_do, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Do" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_do_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_do_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_do_child(this, _, result) }
+ }
+
+ /** A class representing `do_block` nodes. */
+ class DoBlock extends @ruby_do_block, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "DoBlock" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_do_block_def(this, result) }
+
+ /** Gets the node corresponding to the field `parameters`. */
+ BlockParameters getParameters() { ruby_do_block_parameters(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_do_block_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_do_block_parameters(this, result) or ruby_do_block_child(this, _, result)
+ }
+ }
+
+ /** A class representing `element_reference` nodes. */
+ class ElementReference extends @ruby_element_reference, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ElementReference" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_element_reference_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `object`. */
+ UnderscorePrimary getObject() { ruby_element_reference_def(this, result, _) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_element_reference_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_element_reference_def(this, result, _) or ruby_element_reference_child(this, _, result)
+ }
+ }
+
+ /** A class representing `else` nodes. */
+ class Else extends @ruby_else, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Else" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_else_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_else_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_else_child(this, _, result) }
+ }
+
+ /** A class representing `elsif` nodes. */
+ class Elsif extends @ruby_elsif, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Elsif" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_elsif_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `alternative`. */
+ AstNode getAlternative() { ruby_elsif_alternative(this, result) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ UnderscoreStatement getCondition() { ruby_elsif_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `consequence`. */
+ Then getConsequence() { ruby_elsif_consequence(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_elsif_alternative(this, result) or
+ ruby_elsif_def(this, result, _) or
+ ruby_elsif_consequence(this, result)
+ }
+ }
+
+ /** A class representing `empty_statement` tokens. */
+ class EmptyStatement extends @ruby_token_empty_statement, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "EmptyStatement" }
+ }
+
+ /** A class representing `end_block` nodes. */
+ class EndBlock extends @ruby_end_block, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "EndBlock" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_end_block_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_end_block_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_end_block_child(this, _, result) }
+ }
+
+ /** A class representing `ensure` nodes. */
+ class Ensure extends @ruby_ensure, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Ensure" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_ensure_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_ensure_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_ensure_child(this, _, result) }
+ }
+
+ /** A class representing `escape_sequence` tokens. */
+ class EscapeSequence extends @ruby_token_escape_sequence, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "EscapeSequence" }
+ }
+
+ /** A class representing `exception_variable` nodes. */
+ class ExceptionVariable extends @ruby_exception_variable, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ExceptionVariable" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_exception_variable_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ UnderscoreLhs getChild() { ruby_exception_variable_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_exception_variable_def(this, result, _) }
+ }
+
+ /** A class representing `exceptions` nodes. */
+ class Exceptions extends @ruby_exceptions, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Exceptions" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_exceptions_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_exceptions_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_exceptions_child(this, _, result) }
+ }
+
+ /** A class representing `false` tokens. */
+ class False extends @ruby_token_false, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "False" }
+ }
+
+ /** A class representing `float` tokens. */
+ class Float extends @ruby_token_float, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Float" }
+ }
+
+ /** A class representing `for` nodes. */
+ class For extends @ruby_for, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "For" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_for_def(this, _, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ Do getBody() { ruby_for_def(this, result, _, _, _) }
+
+ /** Gets the node corresponding to the field `pattern`. */
+ AstNode getPattern() { ruby_for_def(this, _, result, _, _) }
+
+ /** Gets the node corresponding to the field `value`. */
+ In getValue() { ruby_for_def(this, _, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_for_def(this, result, _, _, _) or
+ ruby_for_def(this, _, result, _, _) or
+ ruby_for_def(this, _, _, result, _)
+ }
+ }
+
+ /** A class representing `forward_argument` tokens. */
+ class ForwardArgument extends @ruby_token_forward_argument, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ForwardArgument" }
+ }
+
+ /** A class representing `forward_parameter` tokens. */
+ class ForwardParameter extends @ruby_token_forward_parameter, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ForwardParameter" }
+ }
+
+ /** A class representing `global_variable` tokens. */
+ class GlobalVariable extends @ruby_token_global_variable, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "GlobalVariable" }
+ }
+
+ /** A class representing `hash` nodes. */
+ class Hash extends @ruby_hash, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Hash" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_hash_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_hash_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_hash_child(this, _, result) }
+ }
+
+ /** A class representing `hash_key_symbol` tokens. */
+ class HashKeySymbol extends @ruby_token_hash_key_symbol, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "HashKeySymbol" }
+ }
+
+ /** A class representing `hash_splat_argument` nodes. */
+ class HashSplatArgument extends @ruby_hash_splat_argument, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "HashSplatArgument" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_hash_splat_argument_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ UnderscoreArg getChild() { ruby_hash_splat_argument_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_hash_splat_argument_def(this, result, _) }
+ }
+
+ /** A class representing `hash_splat_parameter` nodes. */
+ class HashSplatParameter extends @ruby_hash_splat_parameter, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "HashSplatParameter" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_hash_splat_parameter_def(this, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ Identifier getName() { ruby_hash_splat_parameter_name(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_hash_splat_parameter_name(this, result) }
+ }
+
+ /** A class representing `heredoc_beginning` tokens. */
+ class HeredocBeginning extends @ruby_token_heredoc_beginning, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "HeredocBeginning" }
+ }
+
+ /** A class representing `heredoc_body` nodes. */
+ class HeredocBody extends @ruby_heredoc_body, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "HeredocBody" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_heredoc_body_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_heredoc_body_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_heredoc_body_child(this, _, result) }
+ }
+
+ /** A class representing `heredoc_content` tokens. */
+ class HeredocContent extends @ruby_token_heredoc_content, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "HeredocContent" }
+ }
+
+ /** A class representing `heredoc_end` tokens. */
+ class HeredocEnd extends @ruby_token_heredoc_end, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "HeredocEnd" }
+ }
+
+ /** A class representing `identifier` tokens. */
+ class Identifier extends @ruby_token_identifier, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Identifier" }
+ }
+
+ /** A class representing `if` nodes. */
+ class If extends @ruby_if, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "If" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_if_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `alternative`. */
+ AstNode getAlternative() { ruby_if_alternative(this, result) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ UnderscoreStatement getCondition() { ruby_if_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `consequence`. */
+ Then getConsequence() { ruby_if_consequence(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_if_alternative(this, result) or
+ ruby_if_def(this, result, _) or
+ ruby_if_consequence(this, result)
+ }
+ }
+
+ /** A class representing `if_modifier` nodes. */
+ class IfModifier extends @ruby_if_modifier, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "IfModifier" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_if_modifier_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ UnderscoreStatement getBody() { ruby_if_modifier_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ AstNode getCondition() { ruby_if_modifier_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_if_modifier_def(this, result, _, _) or ruby_if_modifier_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `in` nodes. */
+ class In extends @ruby_in, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "In" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_in_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ UnderscoreArg getChild() { ruby_in_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_in_def(this, result, _) }
+ }
+
+ /** A class representing `instance_variable` tokens. */
+ class InstanceVariable extends @ruby_token_instance_variable, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "InstanceVariable" }
+ }
+
+ /** A class representing `integer` tokens. */
+ class Integer extends @ruby_token_integer, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Integer" }
+ }
+
+ /** A class representing `interpolation` nodes. */
+ class Interpolation extends @ruby_interpolation, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Interpolation" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_interpolation_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_interpolation_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_interpolation_child(this, _, result) }
+ }
+
+ /** A class representing `keyword_parameter` nodes. */
+ class KeywordParameter extends @ruby_keyword_parameter, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "KeywordParameter" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_keyword_parameter_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ Identifier getName() { ruby_keyword_parameter_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `value`. */
+ UnderscoreArg getValue() { ruby_keyword_parameter_value(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_keyword_parameter_def(this, result, _) or ruby_keyword_parameter_value(this, result)
+ }
+ }
+
+ /** A class representing `lambda` nodes. */
+ class Lambda extends @ruby_lambda, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Lambda" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_lambda_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ AstNode getBody() { ruby_lambda_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `parameters`. */
+ LambdaParameters getParameters() { ruby_lambda_parameters(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_lambda_def(this, result, _) or ruby_lambda_parameters(this, result)
+ }
+ }
+
+ /** A class representing `lambda_parameters` nodes. */
+ class LambdaParameters extends @ruby_lambda_parameters, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "LambdaParameters" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_lambda_parameters_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_lambda_parameters_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_lambda_parameters_child(this, _, result) }
+ }
+
+ /** A class representing `left_assignment_list` nodes. */
+ class LeftAssignmentList extends @ruby_left_assignment_list, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "LeftAssignmentList" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_left_assignment_list_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_left_assignment_list_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_left_assignment_list_child(this, _, result) }
+ }
+
+ /** A class representing `method` nodes. */
+ class Method extends @ruby_method, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Method" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_method_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ UnderscoreMethodName getName() { ruby_method_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `parameters`. */
+ MethodParameters getParameters() { ruby_method_parameters(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_method_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_method_def(this, result, _) or
+ ruby_method_parameters(this, result) or
+ ruby_method_child(this, _, result)
+ }
+ }
+
+ /** A class representing `method_parameters` nodes. */
+ class MethodParameters extends @ruby_method_parameters, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "MethodParameters" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_method_parameters_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_method_parameters_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_method_parameters_child(this, _, result) }
+ }
+
+ /** A class representing `module` nodes. */
+ class Module extends @ruby_module, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Module" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_module_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ AstNode getName() { ruby_module_def(this, result, _) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_module_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_module_def(this, result, _) or ruby_module_child(this, _, result)
+ }
+ }
+
+ /** A class representing `next` nodes. */
+ class Next extends @ruby_next, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Next" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_next_def(this, result) }
+
+ /** Gets the child of this node. */
+ ArgumentList getChild() { ruby_next_child(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_next_child(this, result) }
+ }
+
+ /** A class representing `nil` tokens. */
+ class Nil extends @ruby_token_nil, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Nil" }
+ }
+
+ /** A class representing `operator` tokens. */
+ class Operator extends @ruby_token_operator, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Operator" }
+ }
+
+ /** A class representing `operator_assignment` nodes. */
+ class OperatorAssignment extends @ruby_operator_assignment, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "OperatorAssignment" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_operator_assignment_def(this, _, _, _, result) }
+
+ /** Gets the node corresponding to the field `left`. */
+ UnderscoreLhs getLeft() { ruby_operator_assignment_def(this, result, _, _, _) }
+
+ /** Gets the node corresponding to the field `operator`. */
+ string getOperator() {
+ exists(int value | ruby_operator_assignment_def(this, _, value, _, _) |
+ result = "%=" and value = 0
+ or
+ result = "&&=" and value = 1
+ or
+ result = "&=" and value = 2
+ or
+ result = "**=" and value = 3
+ or
+ result = "*=" and value = 4
+ or
+ result = "+=" and value = 5
+ or
+ result = "-=" and value = 6
+ or
+ result = "/=" and value = 7
+ or
+ result = "<<=" and value = 8
+ or
+ result = ">>=" and value = 9
+ or
+ result = "^=" and value = 10
+ or
+ result = "|=" and value = 11
+ or
+ result = "||=" and value = 12
+ )
+ }
+
+ /** Gets the node corresponding to the field `right`. */
+ AstNode getRight() { ruby_operator_assignment_def(this, _, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_operator_assignment_def(this, result, _, _, _) or
+ ruby_operator_assignment_def(this, _, _, result, _)
+ }
+ }
+
+ /** A class representing `optional_parameter` nodes. */
+ class OptionalParameter extends @ruby_optional_parameter, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "OptionalParameter" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_optional_parameter_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ Identifier getName() { ruby_optional_parameter_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `value`. */
+ UnderscoreArg getValue() { ruby_optional_parameter_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_optional_parameter_def(this, result, _, _) or
+ ruby_optional_parameter_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `pair` nodes. */
+ class Pair extends @ruby_pair, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Pair" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_pair_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `key`. */
+ AstNode getKey() { ruby_pair_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `value`. */
+ UnderscoreArg getValue() { ruby_pair_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_pair_def(this, result, _, _) or ruby_pair_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `parenthesized_statements` nodes. */
+ class ParenthesizedStatements extends @ruby_parenthesized_statements, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ParenthesizedStatements" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_parenthesized_statements_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_parenthesized_statements_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_parenthesized_statements_child(this, _, result) }
+ }
+
+ /** A class representing `pattern` nodes. */
+ class Pattern extends @ruby_pattern, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Pattern" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_pattern_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ AstNode getChild() { ruby_pattern_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_pattern_def(this, result, _) }
+ }
+
+ /** A class representing `program` nodes. */
+ class Program extends @ruby_program, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Program" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_program_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_program_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_program_child(this, _, result) }
+ }
+
+ /** A class representing `range` nodes. */
+ class Range extends @ruby_range, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Range" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_range_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `begin`. */
+ UnderscoreArg getBegin() { ruby_range_begin(this, result) }
+
+ /** Gets the node corresponding to the field `end`. */
+ UnderscoreArg getEnd() { ruby_range_end(this, result) }
+
+ /** Gets the node corresponding to the field `operator`. */
+ string getOperator() {
+ exists(int value | ruby_range_def(this, value, _) |
+ result = ".." and value = 0
+ or
+ result = "..." and value = 1
+ )
+ }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_range_begin(this, result) or ruby_range_end(this, result)
+ }
+ }
+
+ /** A class representing `rational` nodes. */
+ class Rational extends @ruby_rational, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Rational" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_rational_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ AstNode getChild() { ruby_rational_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_rational_def(this, result, _) }
+ }
+
+ /** A class representing `redo` nodes. */
+ class Redo extends @ruby_redo, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Redo" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_redo_def(this, result) }
+
+ /** Gets the child of this node. */
+ ArgumentList getChild() { ruby_redo_child(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_redo_child(this, result) }
+ }
+
+ /** A class representing `regex` nodes. */
+ class Regex extends @ruby_regex, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Regex" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_regex_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_regex_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_regex_child(this, _, result) }
+ }
+
+ /** A class representing `rescue` nodes. */
+ class Rescue extends @ruby_rescue, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Rescue" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_rescue_def(this, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ Then getBody() { ruby_rescue_body(this, result) }
+
+ /** Gets the node corresponding to the field `exceptions`. */
+ Exceptions getExceptions() { ruby_rescue_exceptions(this, result) }
+
+ /** Gets the node corresponding to the field `variable`. */
+ ExceptionVariable getVariable() { ruby_rescue_variable(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_rescue_body(this, result) or
+ ruby_rescue_exceptions(this, result) or
+ ruby_rescue_variable(this, result)
+ }
+ }
+
+ /** A class representing `rescue_modifier` nodes. */
+ class RescueModifier extends @ruby_rescue_modifier, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "RescueModifier" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_rescue_modifier_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ UnderscoreStatement getBody() { ruby_rescue_modifier_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `handler`. */
+ AstNode getHandler() { ruby_rescue_modifier_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_rescue_modifier_def(this, result, _, _) or ruby_rescue_modifier_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `rest_assignment` nodes. */
+ class RestAssignment extends @ruby_rest_assignment, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "RestAssignment" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_rest_assignment_def(this, result) }
+
+ /** Gets the child of this node. */
+ UnderscoreLhs getChild() { ruby_rest_assignment_child(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_rest_assignment_child(this, result) }
+ }
+
+ /** A class representing `retry` nodes. */
+ class Retry extends @ruby_retry, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Retry" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_retry_def(this, result) }
+
+ /** Gets the child of this node. */
+ ArgumentList getChild() { ruby_retry_child(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_retry_child(this, result) }
+ }
+
+ /** A class representing `return` nodes. */
+ class Return extends @ruby_return, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Return" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_return_def(this, result) }
+
+ /** Gets the child of this node. */
+ ArgumentList getChild() { ruby_return_child(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_return_child(this, result) }
+ }
+
+ /** A class representing `right_assignment_list` nodes. */
+ class RightAssignmentList extends @ruby_right_assignment_list, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "RightAssignmentList" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_right_assignment_list_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_right_assignment_list_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_right_assignment_list_child(this, _, result) }
+ }
+
+ /** A class representing `scope_resolution` nodes. */
+ class ScopeResolution extends @ruby_scope_resolution, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ScopeResolution" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_scope_resolution_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ AstNode getName() { ruby_scope_resolution_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `scope`. */
+ UnderscorePrimary getScope() { ruby_scope_resolution_scope(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_scope_resolution_def(this, result, _) or ruby_scope_resolution_scope(this, result)
+ }
+ }
+
+ /** A class representing `self` tokens. */
+ class Self extends @ruby_token_self, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Self" }
+ }
+
+ /** A class representing `setter` nodes. */
+ class Setter extends @ruby_setter, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Setter" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_setter_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ Identifier getName() { ruby_setter_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_setter_def(this, result, _) }
+ }
+
+ /** A class representing `simple_symbol` tokens. */
+ class SimpleSymbol extends @ruby_token_simple_symbol, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "SimpleSymbol" }
+ }
+
+ /** A class representing `singleton_class` nodes. */
+ class SingletonClass extends @ruby_singleton_class, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "SingletonClass" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_singleton_class_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `value`. */
+ UnderscoreArg getValue() { ruby_singleton_class_def(this, result, _) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_singleton_class_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_singleton_class_def(this, result, _) or ruby_singleton_class_child(this, _, result)
+ }
+ }
+
+ /** A class representing `singleton_method` nodes. */
+ class SingletonMethod extends @ruby_singleton_method, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "SingletonMethod" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_singleton_method_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ UnderscoreMethodName getName() { ruby_singleton_method_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `object`. */
+ AstNode getObject() { ruby_singleton_method_def(this, _, result, _) }
+
+ /** Gets the node corresponding to the field `parameters`. */
+ MethodParameters getParameters() { ruby_singleton_method_parameters(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_singleton_method_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_singleton_method_def(this, result, _, _) or
+ ruby_singleton_method_def(this, _, result, _) or
+ ruby_singleton_method_parameters(this, result) or
+ ruby_singleton_method_child(this, _, result)
+ }
+ }
+
+ /** A class representing `splat_argument` nodes. */
+ class SplatArgument extends @ruby_splat_argument, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "SplatArgument" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_splat_argument_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ UnderscoreArg getChild() { ruby_splat_argument_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_splat_argument_def(this, result, _) }
+ }
+
+ /** A class representing `splat_parameter` nodes. */
+ class SplatParameter extends @ruby_splat_parameter, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "SplatParameter" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_splat_parameter_def(this, result) }
+
+ /** Gets the node corresponding to the field `name`. */
+ Identifier getName() { ruby_splat_parameter_name(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_splat_parameter_name(this, result) }
+ }
+
+ /** A class representing `string` nodes. */
+ class String extends @ruby_string__, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "String" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_string_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_string_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_string_child(this, _, result) }
+ }
+
+ /** A class representing `string_array` nodes. */
+ class StringArray extends @ruby_string_array, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "StringArray" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_string_array_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ BareString getChild(int i) { ruby_string_array_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_string_array_child(this, _, result) }
+ }
+
+ /** A class representing `string_content` tokens. */
+ class StringContent extends @ruby_token_string_content, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "StringContent" }
+ }
+
+ /** A class representing `subshell` nodes. */
+ class Subshell extends @ruby_subshell, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Subshell" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_subshell_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_subshell_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_subshell_child(this, _, result) }
+ }
+
+ /** A class representing `super` tokens. */
+ class Super extends @ruby_token_super, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Super" }
+ }
+
+ /** A class representing `superclass` nodes. */
+ class Superclass extends @ruby_superclass, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Superclass" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_superclass_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ AstNode getChild() { ruby_superclass_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_superclass_def(this, result, _) }
+ }
+
+ /** A class representing `symbol_array` nodes. */
+ class SymbolArray extends @ruby_symbol_array, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "SymbolArray" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_symbol_array_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ BareSymbol getChild(int i) { ruby_symbol_array_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_symbol_array_child(this, _, result) }
+ }
+
+ /** A class representing `then` nodes. */
+ class Then extends @ruby_then, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Then" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_then_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { ruby_then_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_then_child(this, _, result) }
+ }
+
+ /** A class representing `true` tokens. */
+ class True extends @ruby_token_true, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "True" }
+ }
+
+ /** A class representing `unary` nodes. */
+ class Unary extends @ruby_unary, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Unary" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_unary_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `operand`. */
+ AstNode getOperand() { ruby_unary_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `operator`. */
+ string getOperator() {
+ exists(int value | ruby_unary_def(this, _, value, _) |
+ result = "!" and value = 0
+ or
+ result = "+" and value = 1
+ or
+ result = "-" and value = 2
+ or
+ result = "defined?" and value = 3
+ or
+ result = "not" and value = 4
+ or
+ result = "~" and value = 5
+ )
+ }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_unary_def(this, result, _, _) }
+ }
+
+ /** A class representing `undef` nodes. */
+ class Undef extends @ruby_undef, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Undef" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_undef_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ UnderscoreMethodName getChild(int i) { ruby_undef_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_undef_child(this, _, result) }
+ }
+
+ /** A class representing `uninterpreted` tokens. */
+ class Uninterpreted extends @ruby_token_uninterpreted, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Uninterpreted" }
+ }
+
+ /** A class representing `unless` nodes. */
+ class Unless extends @ruby_unless, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Unless" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_unless_def(this, _, result) }
+
+ /** Gets the node corresponding to the field `alternative`. */
+ AstNode getAlternative() { ruby_unless_alternative(this, result) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ UnderscoreStatement getCondition() { ruby_unless_def(this, result, _) }
+
+ /** Gets the node corresponding to the field `consequence`. */
+ Then getConsequence() { ruby_unless_consequence(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_unless_alternative(this, result) or
+ ruby_unless_def(this, result, _) or
+ ruby_unless_consequence(this, result)
+ }
+ }
+
+ /** A class representing `unless_modifier` nodes. */
+ class UnlessModifier extends @ruby_unless_modifier, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "UnlessModifier" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_unless_modifier_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ UnderscoreStatement getBody() { ruby_unless_modifier_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ AstNode getCondition() { ruby_unless_modifier_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_unless_modifier_def(this, result, _, _) or ruby_unless_modifier_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `until` nodes. */
+ class Until extends @ruby_until, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Until" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_until_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ Do getBody() { ruby_until_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ UnderscoreStatement getCondition() { ruby_until_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_until_def(this, result, _, _) or ruby_until_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `until_modifier` nodes. */
+ class UntilModifier extends @ruby_until_modifier, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "UntilModifier" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_until_modifier_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ UnderscoreStatement getBody() { ruby_until_modifier_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ AstNode getCondition() { ruby_until_modifier_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_until_modifier_def(this, result, _, _) or ruby_until_modifier_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `when` nodes. */
+ class When extends @ruby_when, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "When" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_when_def(this, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ Then getBody() { ruby_when_body(this, result) }
+
+ /** Gets the node corresponding to the field `pattern`. */
+ Pattern getPattern(int i) { ruby_when_pattern(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_when_body(this, result) or ruby_when_pattern(this, _, result)
+ }
+ }
+
+ /** A class representing `while` nodes. */
+ class While extends @ruby_while, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "While" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_while_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ Do getBody() { ruby_while_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ UnderscoreStatement getCondition() { ruby_while_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_while_def(this, result, _, _) or ruby_while_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `while_modifier` nodes. */
+ class WhileModifier extends @ruby_while_modifier, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "WhileModifier" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_while_modifier_def(this, _, _, result) }
+
+ /** Gets the node corresponding to the field `body`. */
+ UnderscoreStatement getBody() { ruby_while_modifier_def(this, result, _, _) }
+
+ /** Gets the node corresponding to the field `condition`. */
+ AstNode getCondition() { ruby_while_modifier_def(this, _, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() {
+ ruby_while_modifier_def(this, result, _, _) or ruby_while_modifier_def(this, _, result, _)
+ }
+ }
+
+ /** A class representing `yield` nodes. */
+ class Yield extends @ruby_yield, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Yield" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { ruby_yield_def(this, result) }
+
+ /** Gets the child of this node. */
+ ArgumentList getChild() { ruby_yield_child(this, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { ruby_yield_child(this, result) }
+ }
+}
+
+module Erb {
+ /** The base class for all AST nodes */
+ class AstNode extends @erb_ast_node {
+ /** Gets a string representation of this element. */
+ string toString() { result = this.getAPrimaryQlClass() }
+
+ /** Gets the location of this element. */
+ Location getLocation() { none() }
+
+ /** Gets the parent of this element. */
+ AstNode getParent() { erb_ast_node_parent(this, result, _) }
+
+ /** Gets the index of this node among the children of its parent. */
+ int getParentIndex() { erb_ast_node_parent(this, _, result) }
+
+ /** Gets a field or child node of this node. */
+ AstNode getAFieldOrChild() { none() }
+
+ /** Gets the name of the primary QL class for this element. */
+ string getAPrimaryQlClass() { result = "???" }
+
+ /** Gets a comma-separated list of the names of the primary CodeQL classes to which this element belongs. */
+ string getPrimaryQlClasses() { result = concat(getAPrimaryQlClass(), ",") }
+ }
+
+ /** A token. */
+ class Token extends @erb_token, AstNode {
+ /** Gets the value of this token. */
+ string getValue() { erb_tokeninfo(this, _, result, _) }
+
+ /** Gets the location of this token. */
+ override Location getLocation() { erb_tokeninfo(this, _, _, result) }
+
+ /** Gets a string representation of this element. */
+ override string toString() { result = getValue() }
+
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Token" }
+ }
+
+ /** A reserved word. */
+ class ReservedWord extends @erb_reserved_word, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "ReservedWord" }
+ }
+
+ /** A class representing `code` tokens. */
+ class Code extends @erb_token_code, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Code" }
+ }
+
+ /** A class representing `comment` tokens. */
+ class Comment extends @erb_token_comment, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Comment" }
+ }
+
+ /** A class representing `comment_directive` nodes. */
+ class CommentDirective extends @erb_comment_directive, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "CommentDirective" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { erb_comment_directive_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ Comment getChild() { erb_comment_directive_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { erb_comment_directive_def(this, result, _) }
+ }
+
+ /** A class representing `content` tokens. */
+ class Content extends @erb_token_content, Token {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Content" }
+ }
+
+ /** A class representing `directive` nodes. */
+ class Directive extends @erb_directive, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Directive" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { erb_directive_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ Code getChild() { erb_directive_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { erb_directive_def(this, result, _) }
+ }
+
+ /** A class representing `graphql_directive` nodes. */
+ class GraphqlDirective extends @erb_graphql_directive, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "GraphqlDirective" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { erb_graphql_directive_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ Code getChild() { erb_graphql_directive_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { erb_graphql_directive_def(this, result, _) }
+ }
+
+ /** A class representing `output_directive` nodes. */
+ class OutputDirective extends @erb_output_directive, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "OutputDirective" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { erb_output_directive_def(this, _, result) }
+
+ /** Gets the child of this node. */
+ Code getChild() { erb_output_directive_def(this, result, _) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { erb_output_directive_def(this, result, _) }
+ }
+
+ /** A class representing `template` nodes. */
+ class Template extends @erb_template, AstNode {
+ /** Gets the name of the primary QL class for this element. */
+ override string getAPrimaryQlClass() { result = "Template" }
+
+ /** Gets the location of this element. */
+ override Location getLocation() { erb_template_def(this, result) }
+
+ /** Gets the `i`th child of this node. */
+ AstNode getChild(int i) { erb_template_child(this, i, result) }
+
+ /** Gets a field or child node of this node. */
+ override AstNode getAFieldOrChild() { erb_template_child(this, _, result) }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll
new file mode 100644
index 00000000000..3394ef0665a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll
@@ -0,0 +1,604 @@
+private import TreeSitter
+private import codeql.Locations
+private import codeql.ruby.AST
+private import codeql.ruby.ast.internal.AST
+private import codeql.ruby.ast.internal.Parameter
+private import codeql.ruby.ast.internal.Scope
+private import codeql.ruby.ast.internal.Synthesis
+
+/**
+ * Holds if `n` is in the left-hand-side of an explicit assignment `assignment`.
+ */
+predicate explicitAssignmentNode(Ruby::AstNode n, Ruby::AstNode assignment) {
+ n = assignment.(Ruby::Assignment).getLeft()
+ or
+ n = assignment.(Ruby::OperatorAssignment).getLeft()
+ or
+ exists(Ruby::AstNode parent |
+ parent = n.getParent() and
+ explicitAssignmentNode(parent, assignment)
+ |
+ parent instanceof Ruby::DestructuredLeftAssignment
+ or
+ parent instanceof Ruby::LeftAssignmentList
+ or
+ parent instanceof Ruby::RestAssignment
+ )
+}
+
+/** Holds if `n` is inside an implicit assignment. */
+predicate implicitAssignmentNode(Ruby::AstNode n) {
+ n = any(Ruby::ExceptionVariable ev).getChild()
+ or
+ n = any(Ruby::For for).getPattern()
+ or
+ implicitAssignmentNode(n.getParent())
+}
+
+/** Holds if `n` is inside a parameter. */
+predicate implicitParameterAssignmentNode(Ruby::AstNode n, Callable::Range c) {
+ n = c.getParameter(_)
+ or
+ implicitParameterAssignmentNode(n.getParent().(Ruby::DestructuredParameter), c)
+}
+
+private predicate instanceVariableAccess(
+ Ruby::InstanceVariable var, string name, Scope::Range scope, boolean instance
+) {
+ name = var.getValue() and
+ scope = enclosingModuleOrClass(var) and
+ if hasEnclosingMethod(var) then instance = true else instance = false
+}
+
+private predicate classVariableAccess(Ruby::ClassVariable var, string name, Scope::Range scope) {
+ name = var.getValue() and
+ scope = enclosingModuleOrClass(var)
+}
+
+private predicate hasEnclosingMethod(Ruby::AstNode node) {
+ exists(Scope::Range s | scopeOf(node) = s and exists(s.getEnclosingMethod()))
+}
+
+private ModuleBase::Range enclosingModuleOrClass(Ruby::AstNode node) {
+ exists(Scope::Range s | scopeOf(node) = s and result = s.getEnclosingModule())
+}
+
+private predicate parameterAssignment(Callable::Range scope, string name, Ruby::Identifier i) {
+ implicitParameterAssignmentNode(i, scope) and
+ name = i.getValue()
+}
+
+/** Holds if `scope` defines `name` in its parameter declaration at `i`. */
+private predicate scopeDefinesParameterVariable(
+ Callable::Range scope, string name, Ruby::Identifier i
+) {
+ // In case of overlapping parameter names (e.g. `_`), only the first
+ // parameter will give rise to a variable
+ i =
+ min(Ruby::Identifier other |
+ parameterAssignment(scope, name, other)
+ |
+ other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
+ )
+ or
+ exists(Parameter::Range p |
+ p = scope.getParameter(_) and
+ name = i.getValue()
+ |
+ i = p.(Ruby::BlockParameter).getName() or
+ i = p.(Ruby::HashSplatParameter).getName() or
+ i = p.(Ruby::KeywordParameter).getName() or
+ i = p.(Ruby::OptionalParameter).getName() or
+ i = p.(Ruby::SplatParameter).getName()
+ )
+}
+
+/** Holds if `name` is assigned in `scope` at `i`. */
+private predicate scopeAssigns(Scope::Range scope, string name, Ruby::Identifier i) {
+ (explicitAssignmentNode(i, _) or implicitAssignmentNode(i)) and
+ name = i.getValue() and
+ scope = scopeOf(i)
+}
+
+cached
+private module Cached {
+ cached
+ newtype TVariable =
+ TGlobalVariable(string name) { name = any(Ruby::GlobalVariable var).getValue() } or
+ TClassVariable(Scope::Range scope, string name, Ruby::AstNode decl) {
+ decl =
+ min(Ruby::ClassVariable other |
+ classVariableAccess(other, name, scope)
+ |
+ other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
+ )
+ } or
+ TInstanceVariable(Scope::Range scope, string name, boolean instance, Ruby::AstNode decl) {
+ decl =
+ min(Ruby::InstanceVariable other |
+ instanceVariableAccess(other, name, scope, instance)
+ |
+ other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
+ )
+ } or
+ TLocalVariableReal(Scope::Range scope, string name, Ruby::Identifier i) {
+ scopeDefinesParameterVariable(scope, name, i)
+ or
+ i =
+ min(Ruby::Identifier other |
+ scopeAssigns(scope, name, other)
+ |
+ other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
+ ) and
+ not scopeDefinesParameterVariable(scope, name, _) and
+ not inherits(scope, name, _)
+ } or
+ TLocalVariableSynth(AstNode n, int i) { any(Synthesis s).localVariable(n, i) }
+
+ // Db types that can be vcalls
+ private class VcallToken =
+ @ruby_scope_resolution or @ruby_token_constant or @ruby_token_identifier or @ruby_token_super;
+
+ /**
+ * Holds if `i` is an `identifier` node occurring in the context where it
+ * should be considered a VCALL. VCALL is the term that MRI/Ripper uses
+ * internally when there's an identifier without arguments or parentheses,
+ * i.e. it *might* be a method call, but it might also be a variable access,
+ * depending on the bindings in the current scope.
+ * ```rb
+ * foo # in MRI this is a VCALL, and the predicate should hold for this
+ * bar() # in MRI this would be an FCALL. Tree-sitter gives us a `call` node,
+ * # and the `method` field will be an `identifier`, but this predicate
+ * # will not hold for that identifier.
+ * ```
+ */
+ cached
+ predicate vcall(VcallToken i) {
+ i = any(Ruby::ArgumentList x).getChild(_)
+ or
+ i = any(Ruby::Array x).getChild(_)
+ or
+ i = any(Ruby::Assignment x).getRight()
+ or
+ i = any(Ruby::Begin x).getChild(_)
+ or
+ i = any(Ruby::BeginBlock x).getChild(_)
+ or
+ i = any(Ruby::Binary x).getLeft()
+ or
+ i = any(Ruby::Binary x).getRight()
+ or
+ i = any(Ruby::Block x).getChild(_)
+ or
+ i = any(Ruby::BlockArgument x).getChild()
+ or
+ i = any(Ruby::Call x).getReceiver()
+ or
+ i = any(Ruby::Case x).getValue()
+ or
+ i = any(Ruby::Class x).getChild(_)
+ or
+ i = any(Ruby::Conditional x).getCondition()
+ or
+ i = any(Ruby::Conditional x).getConsequence()
+ or
+ i = any(Ruby::Conditional x).getAlternative()
+ or
+ i = any(Ruby::Do x).getChild(_)
+ or
+ i = any(Ruby::DoBlock x).getChild(_)
+ or
+ i = any(Ruby::ElementReference x).getChild(_)
+ or
+ i = any(Ruby::ElementReference x).getObject()
+ or
+ i = any(Ruby::Else x).getChild(_)
+ or
+ i = any(Ruby::Elsif x).getCondition()
+ or
+ i = any(Ruby::EndBlock x).getChild(_)
+ or
+ i = any(Ruby::Ensure x).getChild(_)
+ or
+ i = any(Ruby::Exceptions x).getChild(_)
+ or
+ i = any(Ruby::HashSplatArgument x).getChild()
+ or
+ i = any(Ruby::If x).getCondition()
+ or
+ i = any(Ruby::IfModifier x).getCondition()
+ or
+ i = any(Ruby::IfModifier x).getBody()
+ or
+ i = any(Ruby::In x).getChild()
+ or
+ i = any(Ruby::Interpolation x).getChild(_)
+ or
+ i = any(Ruby::KeywordParameter x).getValue()
+ or
+ i = any(Ruby::Method x).getChild(_)
+ or
+ i = any(Ruby::Module x).getChild(_)
+ or
+ i = any(Ruby::OperatorAssignment x).getRight()
+ or
+ i = any(Ruby::OptionalParameter x).getValue()
+ or
+ i = any(Ruby::Pair x).getKey()
+ or
+ i = any(Ruby::Pair x).getValue()
+ or
+ i = any(Ruby::ParenthesizedStatements x).getChild(_)
+ or
+ i = any(Ruby::Pattern x).getChild()
+ or
+ i = any(Ruby::Program x).getChild(_)
+ or
+ i = any(Ruby::Range x).getBegin()
+ or
+ i = any(Ruby::Range x).getEnd()
+ or
+ i = any(Ruby::RescueModifier x).getBody()
+ or
+ i = any(Ruby::RescueModifier x).getHandler()
+ or
+ i = any(Ruby::RightAssignmentList x).getChild(_)
+ or
+ i = any(Ruby::ScopeResolution x).getScope()
+ or
+ i = any(Ruby::SingletonClass x).getValue()
+ or
+ i = any(Ruby::SingletonClass x).getChild(_)
+ or
+ i = any(Ruby::SingletonMethod x).getChild(_)
+ or
+ i = any(Ruby::SingletonMethod x).getObject()
+ or
+ i = any(Ruby::SplatArgument x).getChild()
+ or
+ i = any(Ruby::Superclass x).getChild()
+ or
+ i = any(Ruby::Then x).getChild(_)
+ or
+ i = any(Ruby::Unary x).getOperand()
+ or
+ i = any(Ruby::Unless x).getCondition()
+ or
+ i = any(Ruby::UnlessModifier x).getCondition()
+ or
+ i = any(Ruby::UnlessModifier x).getBody()
+ or
+ i = any(Ruby::Until x).getCondition()
+ or
+ i = any(Ruby::UntilModifier x).getCondition()
+ or
+ i = any(Ruby::UntilModifier x).getBody()
+ or
+ i = any(Ruby::While x).getCondition()
+ or
+ i = any(Ruby::WhileModifier x).getCondition()
+ or
+ i = any(Ruby::WhileModifier x).getBody()
+ }
+
+ cached
+ predicate access(Ruby::Identifier access, VariableReal variable) {
+ exists(string name |
+ variable.getNameImpl() = name and
+ name = access.getValue()
+ |
+ variable.getDeclaringScopeImpl() = scopeOf(access) and
+ not access.getLocation().strictlyBefore(variable.getLocationImpl()) and
+ // In case of overlapping parameter names, later parameters should not
+ // be considered accesses to the first parameter
+ if parameterAssignment(_, _, access)
+ then scopeDefinesParameterVariable(_, _, access)
+ else any()
+ or
+ exists(Scope::Range declScope |
+ variable.getDeclaringScopeImpl() = declScope and
+ inherits(scopeOf(access), name, declScope)
+ )
+ )
+ }
+
+ private class Access extends Ruby::Token {
+ Access() {
+ access(this, _) or
+ this instanceof Ruby::GlobalVariable or
+ this instanceof Ruby::InstanceVariable or
+ this instanceof Ruby::ClassVariable
+ }
+ }
+
+ cached
+ predicate explicitWriteAccess(Access access, Ruby::AstNode assignment) {
+ explicitAssignmentNode(access, assignment)
+ }
+
+ cached
+ predicate implicitWriteAccess(Access access) {
+ implicitAssignmentNode(access)
+ or
+ scopeDefinesParameterVariable(_, _, access)
+ }
+
+ cached
+ predicate isCapturedAccess(LocalVariableAccess access) {
+ toGenerated(access.getVariable().getDeclaringScope()) != scopeOf(toGenerated(access))
+ }
+
+ cached
+ predicate instanceVariableAccess(Ruby::InstanceVariable var, InstanceVariable v) {
+ exists(string name, Scope::Range scope, boolean instance |
+ v = TInstanceVariable(scope, name, instance, _) and
+ instanceVariableAccess(var, name, scope, instance)
+ )
+ }
+
+ cached
+ predicate classVariableAccess(Ruby::ClassVariable var, ClassVariable variable) {
+ exists(Scope::Range scope, string name |
+ variable = TClassVariable(scope, name, _) and
+ classVariableAccess(var, name, scope)
+ )
+ }
+}
+
+import Cached
+
+/** Holds if this scope inherits `name` from an outer scope `outer`. */
+private predicate inherits(Scope::Range scope, string name, Scope::Range outer) {
+ (scope instanceof Ruby::Block or scope instanceof Ruby::DoBlock) and
+ not scopeDefinesParameterVariable(scope, name, _) and
+ (
+ outer = scope.getOuterScope() and
+ (
+ scopeDefinesParameterVariable(outer, name, _)
+ or
+ exists(Ruby::Identifier i |
+ scopeAssigns(outer, name, i) and
+ i.getLocation().strictlyBefore(scope.getLocation())
+ )
+ )
+ or
+ inherits(scope.getOuterScope(), name, outer)
+ )
+}
+
+abstract class VariableImpl extends TVariable {
+ abstract string getNameImpl();
+
+ final string toString() { result = this.getNameImpl() }
+
+ abstract Location getLocationImpl();
+}
+
+class TVariableReal = TGlobalVariable or TClassVariable or TInstanceVariable or TLocalVariableReal;
+
+class TLocalVariable = TLocalVariableReal or TLocalVariableSynth;
+
+/**
+ * This class only exists to avoid negative recursion warnings. Ideally,
+ * we would use `VariableImpl` directly, but that results in incorrect
+ * negative recursion warnings. Adding new root-defs for the predicates
+ * below works around this.
+ */
+abstract class VariableReal extends TVariableReal {
+ abstract string getNameImpl();
+
+ abstract Location getLocationImpl();
+
+ abstract Scope::Range getDeclaringScopeImpl();
+
+ final string toString() { result = this.getNameImpl() }
+}
+
+// Convert extensions of `VariableReal` into extensions of `VariableImpl`
+private class VariableRealAdapter extends VariableImpl, TVariableReal instanceof VariableReal {
+ final override string getNameImpl() { result = VariableReal.super.getNameImpl() }
+
+ final override Location getLocationImpl() { result = VariableReal.super.getLocationImpl() }
+}
+
+class LocalVariableReal extends VariableReal, TLocalVariableReal {
+ private Scope::Range scope;
+ private string name;
+ private Ruby::Identifier i;
+
+ LocalVariableReal() { this = TLocalVariableReal(scope, name, i) }
+
+ final override string getNameImpl() { result = name }
+
+ final override Location getLocationImpl() { result = i.getLocation() }
+
+ final override Scope::Range getDeclaringScopeImpl() { result = scope }
+
+ final VariableAccess getDefiningAccessImpl() { toGenerated(result) = i }
+}
+
+class LocalVariableSynth extends VariableImpl, TLocalVariableSynth {
+ private AstNode n;
+ private int i;
+
+ LocalVariableSynth() { this = TLocalVariableSynth(n, i) }
+
+ final override string getNameImpl() {
+ exists(int level | level = desugarLevel(n) |
+ if level > 0 then result = "__synth__" + i + "__" + level else result = "__synth__" + i
+ )
+ }
+
+ final override Location getLocationImpl() { result = n.getLocation() }
+}
+
+class GlobalVariableImpl extends VariableReal, TGlobalVariable {
+ private string name;
+
+ GlobalVariableImpl() { this = TGlobalVariable(name) }
+
+ final override string getNameImpl() { result = name }
+
+ final override Location getLocationImpl() { none() }
+
+ final override Scope::Range getDeclaringScopeImpl() { none() }
+}
+
+class InstanceVariableImpl extends VariableReal, TInstanceVariable {
+ private ModuleBase::Range scope;
+ private boolean instance;
+ private string name;
+ private Ruby::AstNode decl;
+
+ InstanceVariableImpl() { this = TInstanceVariable(scope, name, instance, decl) }
+
+ final override string getNameImpl() { result = name }
+
+ final predicate isClassInstanceVariable() { instance = false }
+
+ final override Location getLocationImpl() { result = decl.getLocation() }
+
+ final override Scope::Range getDeclaringScopeImpl() { result = scope }
+}
+
+class ClassVariableImpl extends VariableReal, TClassVariable {
+ private ModuleBase::Range scope;
+ private string name;
+ private Ruby::AstNode decl;
+
+ ClassVariableImpl() { this = TClassVariable(scope, name, decl) }
+
+ final override string getNameImpl() { result = name }
+
+ final override Location getLocationImpl() { result = decl.getLocation() }
+
+ final override Scope::Range getDeclaringScopeImpl() { result = scope }
+}
+
+abstract class VariableAccessImpl extends Expr, TVariableAccess {
+ abstract VariableImpl getVariableImpl();
+}
+
+module LocalVariableAccess {
+ predicate range(Ruby::Identifier id, LocalVariable v) {
+ access(id, v) and
+ (
+ explicitWriteAccess(id, _)
+ or
+ implicitWriteAccess(id)
+ or
+ vcall(id)
+ )
+ }
+}
+
+class TVariableAccessReal =
+ TLocalVariableAccessReal or TGlobalVariableAccess or TInstanceVariableAccess or
+ TClassVariableAccess;
+
+abstract class LocalVariableAccessImpl extends VariableAccessImpl, TLocalVariableAccess { }
+
+private class LocalVariableAccessReal extends LocalVariableAccessImpl, TLocalVariableAccessReal {
+ private Ruby::Identifier g;
+ private LocalVariable v;
+
+ LocalVariableAccessReal() { this = TLocalVariableAccessReal(g, v) }
+
+ final override LocalVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = g.getValue() }
+}
+
+private class LocalVariableAccessSynth extends LocalVariableAccessImpl, TLocalVariableAccessSynth {
+ private LocalVariable v;
+
+ LocalVariableAccessSynth() { this = TLocalVariableAccessSynth(_, _, v) }
+
+ final override LocalVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = v.getName() }
+}
+
+module GlobalVariableAccess {
+ predicate range(Ruby::GlobalVariable n, GlobalVariableImpl v) { n.getValue() = v.getNameImpl() }
+}
+
+abstract class GlobalVariableAccessImpl extends VariableAccessImpl, TGlobalVariableAccess { }
+
+private class GlobalVariableAccessReal extends GlobalVariableAccessImpl, TGlobalVariableAccessReal {
+ private Ruby::GlobalVariable g;
+ private GlobalVariable v;
+
+ GlobalVariableAccessReal() { this = TGlobalVariableAccessReal(g, v) }
+
+ final override GlobalVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = g.getValue() }
+}
+
+private class GlobalVariableAccessSynth extends GlobalVariableAccessImpl, TGlobalVariableAccessSynth {
+ private GlobalVariable v;
+
+ GlobalVariableAccessSynth() { this = TGlobalVariableAccessSynth(_, _, v) }
+
+ final override GlobalVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = v.getName() }
+}
+
+module InstanceVariableAccess {
+ predicate range(Ruby::InstanceVariable n, InstanceVariable v) { instanceVariableAccess(n, v) }
+}
+
+abstract class InstanceVariableAccessImpl extends VariableAccessImpl, TInstanceVariableAccess { }
+
+private class InstanceVariableAccessReal extends InstanceVariableAccessImpl,
+ TInstanceVariableAccessReal {
+ private Ruby::InstanceVariable g;
+ private InstanceVariable v;
+
+ InstanceVariableAccessReal() { this = TInstanceVariableAccessReal(g, v) }
+
+ final override InstanceVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = g.getValue() }
+}
+
+private class InstanceVariableAccessSynth extends InstanceVariableAccessImpl,
+ TInstanceVariableAccessSynth {
+ private InstanceVariable v;
+
+ InstanceVariableAccessSynth() { this = TInstanceVariableAccessSynth(_, _, v) }
+
+ final override InstanceVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = v.getName() }
+}
+
+module ClassVariableAccess {
+ predicate range(Ruby::ClassVariable n, ClassVariable v) { classVariableAccess(n, v) }
+}
+
+abstract class ClassVariableAccessRealImpl extends VariableAccessImpl, TClassVariableAccess { }
+
+private class ClassVariableAccessReal extends ClassVariableAccessRealImpl, TClassVariableAccessReal {
+ private Ruby::ClassVariable g;
+ private ClassVariable v;
+
+ ClassVariableAccessReal() { this = TClassVariableAccessReal(g, v) }
+
+ final override ClassVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = g.getValue() }
+}
+
+private class ClassVariableAccessSynth extends ClassVariableAccessRealImpl,
+ TClassVariableAccessSynth {
+ private ClassVariable v;
+
+ ClassVariableAccessSynth() { this = TClassVariableAccessSynth(_, _, v) }
+
+ final override ClassVariable getVariableImpl() { result = v }
+
+ final override string toString() { result = v.getName() }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll
new file mode 100644
index 00000000000..42aef4b794b
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/BasicBlocks.qll
@@ -0,0 +1,414 @@
+/** Provides classes representing basic blocks. */
+
+private import codeql.Locations
+private import codeql.ruby.AST
+private import codeql.ruby.ast.internal.AST
+private import codeql.ruby.ast.internal.TreeSitter
+private import codeql.ruby.controlflow.ControlFlowGraph
+private import internal.ControlFlowGraphImpl
+private import CfgNodes
+private import SuccessorTypes
+
+/**
+ * A basic block, that is, a maximal straight-line sequence of control flow nodes
+ * without branches or joins.
+ */
+class BasicBlock extends TBasicBlockStart {
+ /** Gets the scope of this basic block. */
+ CfgScope getScope() { result = this.getAPredecessor().getScope() }
+
+ /** Gets an immediate successor of this basic block, if any. */
+ BasicBlock getASuccessor() { result = this.getASuccessor(_) }
+
+ /** Gets an immediate successor of this basic block of a given type, if any. */
+ BasicBlock getASuccessor(SuccessorType t) {
+ result.getFirstNode() = this.getLastNode().getASuccessor(t)
+ }
+
+ /** Gets an immediate predecessor of this basic block, if any. */
+ BasicBlock getAPredecessor() { result.getASuccessor() = this }
+
+ /** Gets an immediate predecessor of this basic block of a given type, if any. */
+ BasicBlock getAPredecessor(SuccessorType t) { result.getASuccessor(t) = this }
+
+ /** Gets the control flow node at a specific (zero-indexed) position in this basic block. */
+ CfgNode getNode(int pos) { bbIndex(this.getFirstNode(), result, pos) }
+
+ /** Gets a control flow node in this basic block. */
+ CfgNode getANode() { result = this.getNode(_) }
+
+ /** Gets the first control flow node in this basic block. */
+ CfgNode getFirstNode() { this = TBasicBlockStart(result) }
+
+ /** Gets the last control flow node in this basic block. */
+ CfgNode getLastNode() { result = this.getNode(this.length() - 1) }
+
+ /** Gets the length of this basic block. */
+ int length() { result = strictcount(this.getANode()) }
+
+ /**
+ * Holds if this basic block immediately dominates basic block `bb`.
+ *
+ * That is, all paths reaching basic block `bb` from some entry point
+ * basic block must go through this basic block (which is an immediate
+ * predecessor of `bb`).
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * if b
+ * return 0
+ * end
+ * return 1
+ * end
+ * ```
+ *
+ * The basic block starting on line 2 immediately dominates the
+ * basic block on line 5 (all paths from the entry point of `m`
+ * to `return 1` must go through the `if` block).
+ */
+ predicate immediatelyDominates(BasicBlock bb) { bbIDominates(this, bb) }
+
+ /**
+ * Holds if this basic block strictly dominates basic block `bb`.
+ *
+ * That is, all paths reaching basic block `bb` from some entry point
+ * basic block must go through this basic block (which must be different
+ * from `bb`).
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * if b
+ * return 0
+ * end
+ * return 1
+ * end
+ * ```
+ *
+ * The basic block starting on line 2 strictly dominates the
+ * basic block on line 5 (all paths from the entry point of `m`
+ * to `return 1` must go through the `if` block).
+ */
+ predicate strictlyDominates(BasicBlock bb) { bbIDominates+(this, bb) }
+
+ /**
+ * Holds if this basic block dominates basic block `bb`.
+ *
+ * That is, all paths reaching basic block `bb` from some entry point
+ * basic block must go through this basic block.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * if b
+ * return 0
+ * end
+ * return 1
+ * end
+ * ```
+ *
+ * The basic block starting on line 2 dominates the basic
+ * basic block on line 5 (all paths from the entry point of `m`
+ * to `return 1` must go through the `if` block).
+ */
+ predicate dominates(BasicBlock bb) {
+ bb = this or
+ this.strictlyDominates(bb)
+ }
+
+ /**
+ * Holds if `df` is in the dominance frontier of this basic block.
+ * That is, this basic block dominates a predecessor of `df`, but
+ * does not dominate `df` itself.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m x
+ * if x < 0
+ * x = -x
+ * if x > 10
+ * x = x - 1
+ * end
+ * end
+ * puts x
+ * end
+ * ```
+ *
+ * The basic block on line 8 is in the dominance frontier
+ * of the basic block starting on line 3 because that block
+ * dominates the basic block on line 4, which is a predecessor of
+ * `puts x`. Also, the basic block starting on line 3 does not
+ * dominate the basic block on line 8.
+ */
+ predicate inDominanceFrontier(BasicBlock df) {
+ this.dominatesPredecessor(df) and
+ not strictlyDominates(df)
+ }
+
+ /**
+ * Holds if this basic block dominates a predecessor of `df`.
+ */
+ private predicate dominatesPredecessor(BasicBlock df) { this.dominates(df.getAPredecessor()) }
+
+ /**
+ * Gets the basic block that immediately dominates this basic block, if any.
+ *
+ * That is, all paths reaching this basic block from some entry point
+ * basic block must go through the result, which is an immediate basic block
+ * predecessor of this basic block.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * if b
+ * return 0
+ * end
+ * return 1
+ * end
+ * ```
+ *
+ * The basic block starting on line 2 is an immediate dominator of
+ * the basic block on line 5 (all paths from the entry point of `m`
+ * to `return 1` must go through the `if` block, and the `if` block
+ * is an immediate predecessor of `return 1`).
+ */
+ BasicBlock getImmediateDominator() { bbIDominates(result, this) }
+
+ /**
+ * Holds if this basic block strictly post-dominates basic block `bb`.
+ *
+ * That is, all paths reaching a normal exit point basic block from basic
+ * block `bb` must go through this basic block (which must be different
+ * from `bb`).
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * if b
+ * puts "b"
+ * end
+ * puts "m"
+ * end
+ * ```
+ *
+ * The basic block on line 5 strictly post-dominates the basic block on
+ * line 3 (all paths to the exit point of `m` from `puts "b"` must go
+ * through `puts "m"`).
+ */
+ predicate strictlyPostDominates(BasicBlock bb) { bbIPostDominates+(this, bb) }
+
+ /**
+ * Holds if this basic block post-dominates basic block `bb`.
+ *
+ * That is, all paths reaching a normal exit point basic block from basic
+ * block `bb` must go through this basic block.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * if b
+ * puts "b"
+ * end
+ * puts "m"
+ * end
+ * ```
+ *
+ * The basic block on line 5 post-dominates the basic block on line 3
+ * (all paths to the exit point of `m` from `puts "b"` must go through
+ * `puts "m"`).
+ */
+ predicate postDominates(BasicBlock bb) {
+ this.strictlyPostDominates(bb) or
+ this = bb
+ }
+
+ /** Holds if this basic block is in a loop in the control flow graph. */
+ predicate inLoop() { this.getASuccessor+() = this }
+
+ /** Gets a textual representation of this basic block. */
+ string toString() { result = this.getFirstNode().toString() }
+
+ /** Gets the location of this basic block. */
+ Location getLocation() { result = this.getFirstNode().getLocation() }
+}
+
+cached
+private module Cached {
+ /** Internal representation of basic blocks. */
+ cached
+ newtype TBasicBlock = TBasicBlockStart(CfgNode cfn) { startsBB(cfn) }
+
+ /** Holds if `cfn` starts a new basic block. */
+ private predicate startsBB(CfgNode cfn) {
+ not exists(cfn.getAPredecessor()) and exists(cfn.getASuccessor())
+ or
+ cfn.isJoin()
+ or
+ cfn.getAPredecessor().isBranch()
+ }
+
+ /**
+ * Holds if `succ` is a control flow successor of `pred` within
+ * the same basic block.
+ */
+ private predicate intraBBSucc(CfgNode pred, CfgNode succ) {
+ succ = pred.getASuccessor() and
+ not startsBB(succ)
+ }
+
+ /**
+ * Holds if `cfn` is the `i`th node in basic block `bb`.
+ *
+ * In other words, `i` is the shortest distance from a node `bb`
+ * that starts a basic block to `cfn` along the `intraBBSucc` relation.
+ */
+ cached
+ predicate bbIndex(CfgNode bbStart, CfgNode cfn, int i) =
+ shortestDistances(startsBB/1, intraBBSucc/2)(bbStart, cfn, i)
+
+ /**
+ * Holds if the first node of basic block `succ` is a control flow
+ * successor of the last node of basic block `pred`.
+ */
+ private predicate succBB(BasicBlock pred, BasicBlock succ) { succ = pred.getASuccessor() }
+
+ /** Holds if `dom` is an immediate dominator of `bb`. */
+ cached
+ predicate bbIDominates(BasicBlock dom, BasicBlock bb) =
+ idominance(entryBB/1, succBB/2)(_, dom, bb)
+
+ /** Holds if `pred` is a basic block predecessor of `succ`. */
+ private predicate predBB(BasicBlock succ, BasicBlock pred) { succBB(pred, succ) }
+
+ /** Holds if `bb` is an exit basic block that represents normal exit. */
+ private predicate normalExitBB(BasicBlock bb) { bb.getANode().(AnnotatedExitNode).isNormal() }
+
+ /** Holds if `dom` is an immediate post-dominator of `bb`. */
+ cached
+ predicate bbIPostDominates(BasicBlock dom, BasicBlock bb) =
+ idominance(normalExitBB/1, predBB/2)(_, dom, bb)
+
+ /**
+ * Gets the `i`th predecessor of join block `jb`, with respect to some
+ * arbitrary order.
+ */
+ cached
+ JoinBlockPredecessor getJoinBlockPredecessor(JoinBlock jb, int i) {
+ result =
+ rank[i + 1](JoinBlockPredecessor jbp |
+ jbp = jb.getAPredecessor()
+ |
+ jbp order by JoinBlockPredecessors::getId(jbp), JoinBlockPredecessors::getSplitString(jbp)
+ )
+ }
+}
+
+private import Cached
+
+/** Holds if `bb` is an entry basic block. */
+private predicate entryBB(BasicBlock bb) { bb.getFirstNode() instanceof EntryNode }
+
+/**
+ * An entry basic block, that is, a basic block whose first node is
+ * an entry node.
+ */
+class EntryBasicBlock extends BasicBlock {
+ EntryBasicBlock() { entryBB(this) }
+
+ override CfgScope getScope() { this.getFirstNode() = TEntryNode(result) }
+}
+
+/**
+ * An annotated exit basic block, that is, a basic block whose last node is
+ * an annotated exit node.
+ */
+class AnnotatedExitBasicBlock extends BasicBlock {
+ private boolean normal;
+
+ AnnotatedExitBasicBlock() {
+ exists(AnnotatedExitNode n |
+ n = this.getANode() and
+ if n.isNormal() then normal = true else normal = false
+ )
+ }
+
+ /** Holds if this block represent a normal exit. */
+ final predicate isNormal() { normal = true }
+}
+
+/**
+ * An exit basic block, that is, a basic block whose last node is
+ * an exit node.
+ */
+class ExitBasicBlock extends BasicBlock {
+ ExitBasicBlock() { this.getLastNode() instanceof ExitNode }
+}
+
+private module JoinBlockPredecessors {
+ private predicate id(Ruby::AstNode x, Ruby::AstNode y) { x = y }
+
+ private predicate idOf(Ruby::AstNode x, int y) = equivalenceRelation(id/2)(x, y)
+
+ int getId(JoinBlockPredecessor jbp) {
+ idOf(toGeneratedInclSynth(jbp.getFirstNode().(AstCfgNode).getNode()), result)
+ or
+ idOf(toGeneratedInclSynth(jbp.(EntryBasicBlock).getScope()), result)
+ }
+
+ string getSplitString(JoinBlockPredecessor jbp) {
+ result = jbp.getFirstNode().(AstCfgNode).getSplitsString()
+ or
+ not exists(jbp.getFirstNode().(AstCfgNode).getSplitsString()) and
+ result = ""
+ }
+}
+
+/** A basic block with more than one predecessor. */
+class JoinBlock extends BasicBlock {
+ JoinBlock() { getFirstNode().isJoin() }
+
+ /**
+ * Gets the `i`th predecessor of this join block, with respect to some
+ * arbitrary order.
+ */
+ JoinBlockPredecessor getJoinBlockPredecessor(int i) { result = getJoinBlockPredecessor(this, i) }
+}
+
+/** A basic block that is an immediate predecessor of a join block. */
+class JoinBlockPredecessor extends BasicBlock {
+ JoinBlockPredecessor() { this.getASuccessor() instanceof JoinBlock }
+}
+
+/** A basic block that terminates in a condition, splitting the subsequent control flow. */
+class ConditionBlock extends BasicBlock {
+ ConditionBlock() { this.getLastNode().isCondition() }
+
+ /**
+ * Holds if basic block `succ` is immediately controlled by this basic
+ * block with conditional value `s`. That is, `succ` is an immediate
+ * successor of this block, and `succ` can only be reached from
+ * the callable entry point by going via the `s` edge out of this basic block.
+ */
+ pragma[nomagic]
+ predicate immediatelyControls(BasicBlock succ, BooleanSuccessor s) {
+ succ = this.getASuccessor(s) and
+ forall(BasicBlock pred | pred = succ.getAPredecessor() and pred != this | succ.dominates(pred))
+ }
+
+ /**
+ * Holds if basic block `controlled` is controlled by this basic block with
+ * conditional value `s`. That is, `controlled` can only be reached from
+ * the callable entry point by going via the `s` edge out of this basic block.
+ */
+ predicate controls(BasicBlock controlled, BooleanSuccessor s) {
+ exists(BasicBlock succ | this.immediatelyControls(succ, s) | succ.dominates(controlled))
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll
new file mode 100644
index 00000000000..dcc1dd6352b
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll
@@ -0,0 +1,484 @@
+/** Provides classes representing nodes in a control flow graph. */
+
+private import codeql.ruby.AST
+private import codeql.ruby.controlflow.BasicBlocks
+private import codeql.ruby.dataflow.SSA
+private import ControlFlowGraph
+private import internal.ControlFlowGraphImpl
+private import internal.Splitting
+
+/** An entry node for a given scope. */
+class EntryNode extends CfgNode, TEntryNode {
+ private CfgScope scope;
+
+ EntryNode() { this = TEntryNode(scope) }
+
+ final override EntryBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() }
+
+ final override Location getLocation() { result = scope.getLocation() }
+
+ final override string toString() { result = "enter " + scope }
+}
+
+/** An exit node for a given scope, annotated with the type of exit. */
+class AnnotatedExitNode extends CfgNode, TAnnotatedExitNode {
+ private CfgScope scope;
+ private boolean normal;
+
+ AnnotatedExitNode() { this = TAnnotatedExitNode(scope, normal) }
+
+ /** Holds if this node represent a normal exit. */
+ final predicate isNormal() { normal = true }
+
+ final override AnnotatedExitBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() }
+
+ final override Location getLocation() { result = scope.getLocation() }
+
+ final override string toString() {
+ exists(string s |
+ normal = true and s = "normal"
+ or
+ normal = false and s = "abnormal"
+ |
+ result = "exit " + scope + " (" + s + ")"
+ )
+ }
+}
+
+/** An exit node for a given scope. */
+class ExitNode extends CfgNode, TExitNode {
+ private CfgScope scope;
+
+ ExitNode() { this = TExitNode(scope) }
+
+ final override Location getLocation() { result = scope.getLocation() }
+
+ final override string toString() { result = "exit " + scope }
+}
+
+/**
+ * A node for an AST node.
+ *
+ * Each AST node maps to zero or more `AstCfgNode`s: zero when the node in unreachable
+ * (dead) code or not important for control flow, and multiple when there are different
+ * splits for the AST node.
+ */
+class AstCfgNode extends CfgNode, TElementNode {
+ private Splits splits;
+ private AstNode n;
+
+ AstCfgNode() { this = TElementNode(n, splits) }
+
+ final override AstNode getNode() { result = n }
+
+ override Location getLocation() { result = n.getLocation() }
+
+ final override string toString() {
+ exists(string s | s = n.(AstNode).toString() |
+ result = "[" + this.getSplitsString() + "] " + s
+ or
+ not exists(this.getSplitsString()) and result = s
+ )
+ }
+
+ /** Gets a comma-separated list of strings for each split in this node, if any. */
+ final string getSplitsString() {
+ result = splits.toString() and
+ result != ""
+ }
+
+ /** Gets a split for this control flow node, if any. */
+ final Split getASplit() { result = splits.getASplit() }
+}
+
+/** A control-flow node that wraps an AST expression. */
+class ExprCfgNode extends AstCfgNode {
+ Expr e;
+
+ ExprCfgNode() { e = this.getNode() }
+
+ /** Gets the underlying expression. */
+ Expr getExpr() { result = e }
+
+ private ExprCfgNode getSource() {
+ exists(Ssa::WriteDefinition def |
+ def.assigns(result) and
+ this = def.getARead()
+ )
+ }
+
+ /** Gets the textual (constant) value of this expression, if any. */
+ string getValueText() { result = this.getSource().getValueText() }
+}
+
+/** A control-flow node that wraps a return-like statement. */
+class ReturningCfgNode extends AstCfgNode {
+ ReturningStmt s;
+
+ ReturningCfgNode() { s = this.getNode() }
+
+ /** Gets the node of the returned value, if any. */
+ ExprCfgNode getReturnedValueNode() {
+ result = this.getAPredecessor() and
+ result.getNode() = s.getValue()
+ }
+}
+
+/** A control-flow node that wraps a `StringComponent` AST expression. */
+class StringComponentCfgNode extends AstCfgNode {
+ StringComponentCfgNode() { this.getNode() instanceof StringComponent }
+}
+
+private Expr desugar(Expr n) {
+ result = n.getDesugared()
+ or
+ not exists(n.getDesugared()) and
+ result = n
+}
+
+/**
+ * A class for mapping parent-child AST nodes to parent-child CFG nodes.
+ */
+abstract private class ExprChildMapping extends Expr {
+ /**
+ * Holds if `child` is a (possibly nested) child of this expression
+ * for which we would like to find a matching CFG child.
+ */
+ abstract predicate relevantChild(Expr child);
+
+ pragma[nomagic]
+ private predicate reachesBasicBlock(Expr child, CfgNode cfn, BasicBlock bb) {
+ this.relevantChild(child) and
+ cfn = this.getAControlFlowNode() and
+ bb.getANode() = cfn
+ or
+ exists(BasicBlock mid |
+ this.reachesBasicBlock(child, cfn, mid) and
+ bb = mid.getAPredecessor() and
+ not mid.getANode().getNode() = child
+ )
+ }
+
+ /**
+ * Holds if there is a control-flow path from `cfn` to `cfnChild`, where `cfn`
+ * is a control-flow node for this expression, and `cfnChild` is a control-flow
+ * node for `child`.
+ *
+ * The path never escapes the syntactic scope of this expression.
+ */
+ cached
+ predicate hasCfgChild(Expr child, CfgNode cfn, CfgNode cfnChild) {
+ this.reachesBasicBlock(child, cfn, cfnChild.getBasicBlock()) and
+ cfnChild = desugar(child).getAControlFlowNode()
+ }
+}
+
+/** Provides classes for control-flow nodes that wrap AST expressions. */
+module ExprNodes {
+ private class LiteralChildMapping extends ExprChildMapping, Literal {
+ override predicate relevantChild(Expr e) { none() }
+ }
+
+ /** A control-flow node that wraps an `ArrayLiteral` AST expression. */
+ class LiteralCfgNode extends ExprCfgNode {
+ override LiteralChildMapping e;
+
+ override Literal getExpr() { result = super.getExpr() }
+
+ override string getValueText() { result = e.getValueText() }
+ }
+
+ private class AssignExprChildMapping extends ExprChildMapping, AssignExpr {
+ override predicate relevantChild(Expr e) { e = this.getAnOperand() }
+ }
+
+ /** A control-flow node that wraps an `AssignExpr` AST expression. */
+ class AssignExprCfgNode extends ExprCfgNode {
+ override AssignExprChildMapping e;
+
+ final override AssignExpr getExpr() { result = ExprCfgNode.super.getExpr() }
+
+ /** Gets the LHS of this assignment. */
+ final ExprCfgNode getLhs() { e.hasCfgChild(e.getLeftOperand(), this, result) }
+
+ /** Gets the RHS of this assignment. */
+ final ExprCfgNode getRhs() { e.hasCfgChild(e.getRightOperand(), this, result) }
+ }
+
+ private class OperationExprChildMapping extends ExprChildMapping, Operation {
+ override predicate relevantChild(Expr e) { e = this.getAnOperand() }
+ }
+
+ /** A control-flow node that wraps an `Operation` AST expression. */
+ class OperationCfgNode extends ExprCfgNode {
+ override OperationExprChildMapping e;
+
+ override Operation getExpr() { result = super.getExpr() }
+
+ /** Gets an operand of this operation. */
+ final ExprCfgNode getAnOperand() { e.hasCfgChild(e.getAnOperand(), this, result) }
+ }
+
+ /** A control-flow node that wraps a `BinaryOperation` AST expression. */
+ class BinaryOperationCfgNode extends OperationCfgNode {
+ private BinaryOperation bo;
+
+ BinaryOperationCfgNode() { e = bo }
+
+ override BinaryOperation getExpr() { result = super.getExpr() }
+
+ /** Gets the left operand of this binary operation. */
+ final ExprCfgNode getLeftOperand() { e.hasCfgChild(bo.getLeftOperand(), this, result) }
+
+ /** Gets the right operand of this binary operation. */
+ final ExprCfgNode getRightOperand() { e.hasCfgChild(bo.getRightOperand(), this, result) }
+
+ final override string getValueText() {
+ exists(string left, string right, string op |
+ left = this.getLeftOperand().getValueText() and
+ right = this.getRightOperand().getValueText() and
+ op = this.getExpr().getOperator()
+ |
+ op = "+" and
+ (
+ result = (left.toInt() + right.toInt()).toString()
+ or
+ not (exists(left.toInt()) and exists(right.toInt())) and
+ result = (left.toFloat() + right.toFloat()).toString()
+ or
+ not (exists(left.toFloat()) and exists(right.toFloat())) and
+ result = left + right
+ )
+ or
+ op = "-" and
+ (
+ result = (left.toInt() - right.toInt()).toString()
+ or
+ not (exists(left.toInt()) and exists(right.toInt())) and
+ result = (left.toFloat() - right.toFloat()).toString()
+ )
+ or
+ op = "*" and
+ (
+ result = (left.toInt() * right.toInt()).toString()
+ or
+ not (exists(left.toInt()) and exists(right.toInt())) and
+ result = (left.toFloat() * right.toFloat()).toString()
+ )
+ or
+ op = "/" and
+ (
+ result = (left.toInt() / right.toInt()).toString()
+ or
+ not (exists(left.toInt()) and exists(right.toInt())) and
+ result = (left.toFloat() / right.toFloat()).toString()
+ )
+ )
+ }
+ }
+
+ private class BlockArgumentChildMapping extends ExprChildMapping, BlockArgument {
+ override predicate relevantChild(Expr e) { e = this.getValue() }
+ }
+
+ /** A control-flow node that wraps a `BlockArgument` AST expression. */
+ class BlockArgumentCfgNode extends ExprCfgNode {
+ override BlockArgumentChildMapping e;
+
+ final override BlockArgument getExpr() { result = ExprCfgNode.super.getExpr() }
+
+ /** Gets the value of this block argument. */
+ final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) }
+ }
+
+ private class CallExprChildMapping extends ExprChildMapping, Call {
+ override predicate relevantChild(Expr e) {
+ e = [this.getAnArgument(), this.(MethodCall).getReceiver(), this.(MethodCall).getBlock()]
+ }
+ }
+
+ /** A control-flow node that wraps a `Call` AST expression. */
+ class CallCfgNode extends ExprCfgNode {
+ override CallExprChildMapping e;
+
+ override Call getExpr() { result = super.getExpr() }
+
+ /** Gets the `n`th argument of this call. */
+ final ExprCfgNode getArgument(int n) { e.hasCfgChild(e.getArgument(n), this, result) }
+
+ /** Gets the the keyword argument whose key is `keyword` of this call. */
+ final ExprCfgNode getKeywordArgument(string keyword) {
+ e.hasCfgChild(e.getKeywordArgument(keyword), this, result)
+ }
+
+ /** Gets the number of arguments of this call. */
+ final int getNumberOfArguments() { result = e.getNumberOfArguments() }
+
+ /** Gets the receiver of this call. */
+ final ExprCfgNode getReceiver() { e.hasCfgChild(e.(MethodCall).getReceiver(), this, result) }
+
+ /** Gets the block of this call. */
+ final ExprCfgNode getBlock() { e.hasCfgChild(e.(MethodCall).getBlock(), this, result) }
+ }
+
+ private class CaseExprChildMapping extends ExprChildMapping, CaseExpr {
+ override predicate relevantChild(Expr e) { e = this.getValue() or e = this.getBranch(_) }
+ }
+
+ /** A control-flow node that wraps a `MethodCall` AST expression. */
+ class MethodCallCfgNode extends CallCfgNode {
+ MethodCallCfgNode() { super.getExpr() instanceof MethodCall }
+
+ override MethodCall getExpr() { result = super.getExpr() }
+ }
+
+ /** A control-flow node that wraps a `CaseExpr` AST expression. */
+ class CaseExprCfgNode extends ExprCfgNode {
+ override CaseExprChildMapping e;
+
+ final override CaseExpr getExpr() { result = ExprCfgNode.super.getExpr() }
+
+ /** Gets the expression being compared, if any. */
+ final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) }
+
+ /**
+ * Gets the `n`th branch of this case expression.
+ */
+ final ExprCfgNode getBranch(int n) { e.hasCfgChild(e.getBranch(n), this, result) }
+ }
+
+ private class ConditionalExprChildMapping extends ExprChildMapping, ConditionalExpr {
+ override predicate relevantChild(Expr e) { e = this.getCondition() or e = this.getBranch(_) }
+ }
+
+ /** A control-flow node that wraps a `ConditionalExpr` AST expression. */
+ class ConditionalExprCfgNode extends ExprCfgNode {
+ override ConditionalExprChildMapping e;
+
+ final override ConditionalExpr getExpr() { result = ExprCfgNode.super.getExpr() }
+
+ /** Gets the condition expression. */
+ final ExprCfgNode getCondition() { e.hasCfgChild(e.getCondition(), this, result) }
+
+ /**
+ * Gets the branch of this conditional expression that is taken when the condition
+ * evaluates to cond, if any.
+ */
+ final ExprCfgNode getBranch(boolean cond) { e.hasCfgChild(e.getBranch(cond), this, result) }
+ }
+
+ private class ConstantAccessChildMapping extends ExprChildMapping, ConstantAccess {
+ override predicate relevantChild(Expr e) { e = this.getScopeExpr() }
+ }
+
+ /** A control-flow node that wraps a `ConditionalExpr` AST expression. */
+ class ConstantAccessCfgNode extends ExprCfgNode {
+ override ConstantAccessChildMapping e;
+
+ final override ConstantAccess getExpr() { result = super.getExpr() }
+
+ /** Gets the scope expression. */
+ final ExprCfgNode getScopeExpr() { e.hasCfgChild(e.getScopeExpr(), this, result) }
+ }
+
+ private class StmtSequenceChildMapping extends ExprChildMapping, StmtSequence {
+ override predicate relevantChild(Expr e) { e = this.getLastStmt() }
+ }
+
+ /** A control-flow node that wraps a `StmtSequence` AST expression. */
+ class StmtSequenceCfgNode extends ExprCfgNode {
+ override StmtSequenceChildMapping e;
+
+ final override StmtSequence getExpr() { result = ExprCfgNode.super.getExpr() }
+
+ /** Gets the last statement in this sequence, if any. */
+ final ExprCfgNode getLastStmt() { e.hasCfgChild(e.getLastStmt(), this, result) }
+ }
+
+ private class ForExprChildMapping extends ExprChildMapping, ForExpr {
+ override predicate relevantChild(Expr e) { e = this.getValue() }
+ }
+
+ /** A control-flow node that wraps a `ForExpr` AST expression. */
+ class ForExprCfgNode extends ExprCfgNode {
+ override ForExprChildMapping e;
+
+ final override ForExpr getExpr() { result = ExprCfgNode.super.getExpr() }
+
+ /** Gets the value being iterated over. */
+ final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) }
+ }
+
+ /** A control-flow node that wraps a `ParenthesizedExpr` AST expression. */
+ class ParenthesizedExprCfgNode extends StmtSequenceCfgNode {
+ ParenthesizedExprCfgNode() { this.getExpr() instanceof ParenthesizedExpr }
+ }
+
+ /** A control-flow node that wraps a `VariableReadAccess` AST expression. */
+ class VariableReadAccessCfgNode extends ExprCfgNode {
+ override VariableReadAccess e;
+
+ final override VariableReadAccess getExpr() { result = ExprCfgNode.super.getExpr() }
+ }
+
+ /** A control-flow node that wraps a `InstanceVariableWriteAccess` AST expression. */
+ class InstanceVariableWriteAccessCfgNode extends ExprCfgNode {
+ override InstanceVariableWriteAccess e;
+
+ final override InstanceVariableWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
+ }
+
+ /** A control-flow node that wraps a `StringInterpolationComponent` AST expression. */
+ class StringInterpolationComponentCfgNode extends StmtSequenceCfgNode {
+ StringInterpolationComponentCfgNode() { this.getNode() instanceof StringInterpolationComponent }
+ }
+
+ private class StringlikeLiteralChildMapping extends ExprChildMapping, StringlikeLiteral {
+ override predicate relevantChild(Expr e) { e = this.getComponent(_) }
+ }
+
+ /** A control-flow node that wraps a `StringlikeLiteral` AST expression. */
+ class StringlikeLiteralCfgNode extends ExprCfgNode {
+ override StringlikeLiteralChildMapping e;
+
+ final override StringlikeLiteral getExpr() { result = super.getExpr() }
+
+ /** Gets a component of this `StringlikeLiteral` */
+ StringComponentCfgNode getAComponent() { e.hasCfgChild(e.getComponent(_), this, result) }
+ }
+
+ /** A control-flow node that wraps a `StringLiteral` AST expression. */
+ class StringLiteralCfgNode extends ExprCfgNode {
+ override StringLiteral e;
+
+ final override StringLiteral getExpr() { result = super.getExpr() }
+ }
+
+ /** A control-flow node that wraps a `RegExpLiteral` AST expression. */
+ class RegExpLiteralCfgNode extends ExprCfgNode {
+ override RegExpLiteral e;
+
+ final override RegExpLiteral getExpr() { result = super.getExpr() }
+ }
+
+ /** A control-flow node that wraps a `ComparisonOperation` AST expression. */
+ class ComparisonOperationCfgNode extends BinaryOperationCfgNode {
+ ComparisonOperationCfgNode() { e instanceof ComparisonOperation }
+
+ override ComparisonOperation getExpr() { result = super.getExpr() }
+ }
+
+ /** A control-flow node that wraps a `RelationalOperation` AST expression. */
+ class RelationalOperationCfgNode extends ComparisonOperationCfgNode {
+ RelationalOperationCfgNode() { e instanceof RelationalOperation }
+
+ final override RelationalOperation getExpr() { result = super.getExpr() }
+ }
+
+ /** A control-flow node that wraps an `ElementReference` AST expression. */
+ class ElementReferenceCfgNode extends MethodCallCfgNode {
+ ElementReferenceCfgNode() { e instanceof ElementReference }
+
+ final override ElementReference getExpr() { result = super.getExpr() }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/ControlFlowGraph.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/ControlFlowGraph.qll
new file mode 100644
index 00000000000..aad9daa4827
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/ControlFlowGraph.qll
@@ -0,0 +1,341 @@
+/** Provides classes representing the control flow graph. */
+
+private import codeql.Locations
+private import codeql.ruby.AST
+private import codeql.ruby.controlflow.BasicBlocks
+private import SuccessorTypes
+private import internal.ControlFlowGraphImpl
+private import internal.Splitting
+private import internal.Completion
+
+/** An AST node with an associated control-flow graph. */
+class CfgScope extends Scope instanceof CfgScope::Range_ {
+ /** Gets the CFG scope that this scope is nested under, if any. */
+ final CfgScope getOuterCfgScope() {
+ exists(AstNode parent |
+ parent = this.getParent() and
+ result = getCfgScope(parent)
+ )
+ }
+}
+
+/**
+ * A control flow node.
+ *
+ * A control flow node is a node in the control flow graph (CFG). There is a
+ * many-to-one relationship between CFG nodes and AST nodes.
+ *
+ * Only nodes that can be reached from an entry point are included in the CFG.
+ */
+class CfgNode extends TNode {
+ /** Gets a textual representation of this control flow node. */
+ string toString() { none() }
+
+ /** Gets the AST node that this node corresponds to, if any. */
+ AstNode getNode() { none() }
+
+ /** Gets the location of this control flow node. */
+ Location getLocation() { none() }
+
+ /** Gets the file of this control flow node. */
+ final File getFile() { result = this.getLocation().getFile() }
+
+ /** Holds if this control flow node has conditional successors. */
+ final predicate isCondition() { exists(this.getASuccessor(any(BooleanSuccessor bs))) }
+
+ /** Gets the scope of this node. */
+ final CfgScope getScope() { result = this.getBasicBlock().getScope() }
+
+ /** Gets the basic block that this control flow node belongs to. */
+ BasicBlock getBasicBlock() { result.getANode() = this }
+
+ /** Gets a successor node of a given type, if any. */
+ final CfgNode getASuccessor(SuccessorType t) { result = getASuccessor(this, t) }
+
+ /** Gets an immediate successor, if any. */
+ final CfgNode getASuccessor() { result = this.getASuccessor(_) }
+
+ /** Gets an immediate predecessor node of a given flow type, if any. */
+ final CfgNode getAPredecessor(SuccessorType t) { result.getASuccessor(t) = this }
+
+ /** Gets an immediate predecessor, if any. */
+ final CfgNode getAPredecessor() { result = this.getAPredecessor(_) }
+
+ /** Holds if this node has more than one predecessor. */
+ final predicate isJoin() { strictcount(this.getAPredecessor()) > 1 }
+
+ /** Holds if this node has more than one successor. */
+ final predicate isBranch() { strictcount(this.getASuccessor()) > 1 }
+}
+
+/** The type of a control flow successor. */
+class SuccessorType extends TSuccessorType {
+ /** Gets a textual representation of successor type. */
+ string toString() { none() }
+}
+
+/** Provides different types of control flow successor types. */
+module SuccessorTypes {
+ /** A normal control flow successor. */
+ class NormalSuccessor extends SuccessorType, TSuccessorSuccessor {
+ final override string toString() { result = "successor" }
+ }
+
+ /**
+ * A conditional control flow successor. Either a Boolean successor (`BooleanSuccessor`),
+ * an emptiness successor (`EmptinessSuccessor`), or a matching successor
+ * (`MatchingSuccessor`)
+ */
+ class ConditionalSuccessor extends SuccessorType {
+ boolean value;
+
+ ConditionalSuccessor() {
+ this = TBooleanSuccessor(value) or
+ this = TEmptinessSuccessor(value) or
+ this = TMatchingSuccessor(value)
+ }
+
+ /** Gets the Boolean value of this successor. */
+ final boolean getValue() { result = value }
+
+ override string toString() { result = getValue().toString() }
+ }
+
+ /**
+ * A Boolean control flow successor.
+ *
+ * For example, in
+ *
+ * ```rb
+ * if x >= 0
+ * puts "positive"
+ * else
+ * puts "negative"
+ * end
+ * ```
+ *
+ * `x >= 0` has both a `true` successor and a `false` successor.
+ */
+ class BooleanSuccessor extends ConditionalSuccessor, TBooleanSuccessor { }
+
+ /**
+ * An emptiness control flow successor.
+ *
+ * For example, this program fragment:
+ *
+ * ```rb
+ * for arg in args do
+ * puts arg
+ * end
+ * puts "done";
+ * ```
+ *
+ * has a control flow graph containing emptiness successors:
+ *
+ * ```
+ * args
+ * |
+ * for------<-----
+ * / \ \
+ * / \ |
+ * / \ |
+ * / \ |
+ * empty non-empty |
+ * | \ |
+ * puts "done" \ |
+ * arg |
+ * | |
+ * puts arg |
+ * \___/
+ * ```
+ */
+ class EmptinessSuccessor extends ConditionalSuccessor, TEmptinessSuccessor {
+ override string toString() { if value = true then result = "empty" else result = "non-empty" }
+ }
+
+ /**
+ * A matching control flow successor.
+ *
+ * For example, this program fragment:
+ *
+ * ```rb
+ * case x
+ * when 1 then puts "one"
+ * else puts "not one"
+ * end
+ * ```
+ *
+ * has a control flow graph containing matching successors:
+ *
+ * ```
+ * x
+ * |
+ * 1
+ * / \
+ * / \
+ * / \
+ * / \
+ * match non-match
+ * | |
+ * puts "one" puts "not one"
+ * ```
+ */
+ class MatchingSuccessor extends ConditionalSuccessor, TMatchingSuccessor {
+ override string toString() { if value = true then result = "match" else result = "no-match" }
+ }
+
+ /**
+ * A `return` control flow successor.
+ *
+ * Example:
+ *
+ * ```rb
+ * def sum(x,y)
+ * return x + y
+ * end
+ * ```
+ *
+ * The exit node of `sum` is a `return` successor of the `return x + y`
+ * statement.
+ */
+ class ReturnSuccessor extends SuccessorType, TReturnSuccessor {
+ final override string toString() { result = "return" }
+ }
+
+ /**
+ * A `break` control flow successor.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m
+ * while x >= 0
+ * x -= 1
+ * if num > 100
+ * break
+ * end
+ * end
+ * puts "done"
+ * end
+ * ```
+ *
+ * The node `puts "done"` is `break` successor of the node `break`.
+ */
+ class BreakSuccessor extends SuccessorType, TBreakSuccessor {
+ final override string toString() { result = "break" }
+ }
+
+ /**
+ * A `next` control flow successor.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m
+ * while x >= 0
+ * x -= 1
+ * if num > 100
+ * next
+ * end
+ * end
+ * puts "done"
+ * end
+ * ```
+ *
+ * The node `x >= 0` is `next` successor of the node `next`.
+ */
+ class NextSuccessor extends SuccessorType, TNextSuccessor {
+ final override string toString() { result = "next" }
+ }
+
+ /**
+ * A `redo` control flow successor.
+ *
+ * Example:
+ *
+ * Example:
+ *
+ * ```rb
+ * def m
+ * while x >= 0
+ * x -= 1
+ * if num > 100
+ * redo
+ * end
+ * end
+ * puts "done"
+ * end
+ * ```
+ *
+ * The node `x -= 1` is `redo` successor of the node `redo`.
+ */
+ class RedoSuccessor extends SuccessorType, TRedoSuccessor {
+ final override string toString() { result = "redo" }
+ }
+
+ /**
+ * A `retry` control flow successor.
+ *
+ * Example:
+ *
+ * Example:
+ *
+ * ```rb
+ * def m
+ * begin
+ * puts "Retry"
+ * raise
+ * rescue
+ * retry
+ * end
+ * end
+ * ```
+ *
+ * The node `puts "Retry"` is `retry` successor of the node `retry`.
+ */
+ class RetrySuccessor extends SuccessorType, TRetrySuccessor {
+ final override string toString() { result = "retry" }
+ }
+
+ /**
+ * An exceptional control flow successor.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m x
+ * if x > 2
+ * raise "x > 2"
+ * end
+ * puts "x <= 2"
+ * end
+ * ```
+ *
+ * The exit node of `m` is an exceptional successor of the node
+ * `raise "x > 2"`.
+ */
+ class RaiseSuccessor extends SuccessorType, TRaiseSuccessor {
+ final override string toString() { result = "raise" }
+ }
+
+ /**
+ * An exit control flow successor.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m x
+ * if x > 2
+ * exit 1
+ * end
+ * puts "x <= 2"
+ * end
+ * ```
+ *
+ * The exit node of `m` is an exit successor of the node
+ * `exit 1`.
+ */
+ class ExitSuccessor extends SuccessorType, TExitSuccessor {
+ final override string toString() { result = "exit" }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Completion.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Completion.qll
new file mode 100644
index 00000000000..e7f64d1318e
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Completion.qll
@@ -0,0 +1,507 @@
+/**
+ * Provides classes representing control flow completions.
+ *
+ * A completion represents how a statement or expression terminates.
+ */
+
+private import codeql.ruby.AST
+private import codeql.ruby.ast.internal.AST
+private import codeql.ruby.controlflow.ControlFlowGraph
+private import ControlFlowGraphImpl
+private import NonReturning
+private import SuccessorTypes
+
+private newtype TCompletion =
+ TSimpleCompletion() or
+ TBooleanCompletion(boolean b) { b in [false, true] } or
+ TEmptinessCompletion(boolean isEmpty) { isEmpty in [false, true] } or
+ TMatchingCompletion(boolean isMatch) { isMatch in [false, true] } or
+ TReturnCompletion() or
+ TBreakCompletion() or
+ TNextCompletion() or
+ TRedoCompletion() or
+ TRetryCompletion() or
+ TRaiseCompletion() or // TODO: Add exception type?
+ TExitCompletion() or
+ TNestedCompletion(Completion inner, Completion outer, int nestLevel) {
+ inner = TBreakCompletion() and
+ outer instanceof NonNestedNormalCompletion and
+ nestLevel = 0
+ or
+ inner instanceof NormalCompletion and
+ nestedEnsureCompletion(outer, nestLevel)
+ }
+
+pragma[noinline]
+private predicate nestedEnsureCompletion(Completion outer, int nestLevel) {
+ (
+ outer = TReturnCompletion()
+ or
+ outer = TBreakCompletion()
+ or
+ outer = TNextCompletion()
+ or
+ outer = TRedoCompletion()
+ or
+ outer = TRetryCompletion()
+ or
+ outer = TRaiseCompletion()
+ or
+ outer = TExitCompletion()
+ ) and
+ nestLevel = any(Trees::BodyStmtTree t).getNestLevel()
+}
+
+pragma[noinline]
+private predicate completionIsValidForStmt(AstNode n, Completion c) {
+ n = TForIn(_) and
+ c instanceof EmptinessCompletion
+ or
+ n instanceof BreakStmt and
+ c = TBreakCompletion()
+ or
+ n instanceof NextStmt and
+ c = TNextCompletion()
+ or
+ n instanceof RedoStmt and
+ c = TRedoCompletion()
+ or
+ n instanceof ReturnStmt and
+ c = TReturnCompletion()
+}
+
+/**
+ * Holds if `c` happens in an exception-aware context, that is, it may be
+ * `rescue`d or `ensure`d. In such cases, we assume that the target of `c`
+ * may raise an exception (in addition to evaluating normally).
+ */
+private predicate mayRaise(Call c) {
+ exists(Trees::BodyStmtTree bst | c = bst.getBodyChild(_, true).getAChild*() |
+ exists(bst.getARescue())
+ or
+ exists(bst.getEnsure())
+ )
+}
+
+/** A completion of a statement or an expression. */
+abstract class Completion extends TCompletion {
+ /** Holds if this completion is valid for node `n`. */
+ predicate isValidFor(AstNode n) {
+ this = n.(NonReturningCall).getACompletion()
+ or
+ completionIsValidForStmt(n, this)
+ or
+ mustHaveBooleanCompletion(n) and
+ (
+ exists(boolean value | isBooleanConstant(n, value) | this = TBooleanCompletion(value))
+ or
+ not isBooleanConstant(n, _) and
+ this = TBooleanCompletion(_)
+ )
+ or
+ mustHaveMatchingCompletion(n) and
+ this = TMatchingCompletion(_)
+ or
+ n = any(RescueModifierExpr parent).getBody() and this = TRaiseCompletion()
+ or
+ mayRaise(n) and
+ this = TRaiseCompletion()
+ or
+ not n instanceof NonReturningCall and
+ not completionIsValidForStmt(n, _) and
+ not mustHaveBooleanCompletion(n) and
+ not mustHaveMatchingCompletion(n) and
+ this = TSimpleCompletion()
+ }
+
+ /**
+ * Holds if this completion will continue a loop when it is the completion
+ * of a loop body.
+ */
+ predicate continuesLoop() {
+ this instanceof NormalCompletion or
+ this instanceof NextCompletion
+ }
+
+ /**
+ * Gets the inner completion. This is either the inner completion,
+ * when the completion is nested, or the completion itself.
+ */
+ Completion getInnerCompletion() { result = this }
+
+ /**
+ * Gets the outer completion. This is either the outer completion,
+ * when the completion is nested, or the completion itself.
+ */
+ Completion getOuterCompletion() { result = this }
+
+ /** Gets a successor type that matches this completion. */
+ abstract SuccessorType getAMatchingSuccessorType();
+
+ /** Gets a textual representation of this completion. */
+ abstract string toString();
+}
+
+/** Holds if node `n` has the Boolean constant value `value`. */
+private predicate isBooleanConstant(AstNode n, boolean value) {
+ mustHaveBooleanCompletion(n) and
+ (
+ n.(BooleanLiteral).isTrue() and
+ value = true
+ or
+ n.(BooleanLiteral).isFalse() and
+ value = false
+ )
+}
+
+/**
+ * Holds if a normal completion of `n` must be a Boolean completion.
+ */
+private predicate mustHaveBooleanCompletion(AstNode n) {
+ inBooleanContext(n) and
+ not n instanceof NonReturningCall
+}
+
+/**
+ * Holds if `n` is used in a Boolean context. That is, the value
+ * that `n` evaluates to determines a true/false branch successor.
+ */
+private predicate inBooleanContext(AstNode n) {
+ exists(ConditionalExpr i |
+ n = i.getCondition()
+ or
+ inBooleanContext(i) and
+ n = i.getBranch(_)
+ )
+ or
+ n = any(ConditionalLoop parent).getCondition()
+ or
+ exists(LogicalAndExpr parent |
+ n = parent.getLeftOperand()
+ or
+ inBooleanContext(parent) and
+ n = parent.getRightOperand()
+ )
+ or
+ exists(LogicalOrExpr parent |
+ n = parent.getLeftOperand()
+ or
+ inBooleanContext(parent) and
+ n = parent.getRightOperand()
+ )
+ or
+ n = any(NotExpr parent | inBooleanContext(parent)).getOperand()
+ or
+ n = any(StmtSequence parent | inBooleanContext(parent)).getLastStmt()
+ or
+ exists(CaseExpr c, WhenExpr w |
+ not exists(c.getValue()) and
+ c.getAWhenBranch() = w and
+ w.getPattern(_) = n
+ )
+}
+
+/**
+ * Holds if a normal completion of `n` must be a matching completion.
+ */
+private predicate mustHaveMatchingCompletion(AstNode n) {
+ inMatchingContext(n) and
+ not n instanceof NonReturningCall
+}
+
+/**
+ * Holds if `n` is used in a matching context. That is, whether or
+ * not the value of `n` matches, determines the successor.
+ */
+private predicate inMatchingContext(AstNode n) {
+ n = any(RescueClause r).getException(_)
+ or
+ exists(CaseExpr c, WhenExpr w |
+ exists(c.getValue()) and
+ c.getAWhenBranch() = w and
+ w.getPattern(_) = n
+ )
+ or
+ n.(Trees::DefaultValueParameterTree).hasDefaultValue()
+}
+
+/**
+ * A completion that represents normal evaluation of a statement or an
+ * expression.
+ */
+abstract class NormalCompletion extends Completion { }
+
+abstract private class NonNestedNormalCompletion extends NormalCompletion { }
+
+/** A simple (normal) completion. */
+class SimpleCompletion extends NonNestedNormalCompletion, TSimpleCompletion {
+ override NormalSuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() { result = "simple" }
+}
+
+/**
+ * A completion that represents evaluation of an expression, whose value determines
+ * the successor. Either a Boolean completion (`BooleanCompletion`), an emptiness
+ * completion (`EmptinessCompletion`), or a matching completion (`MatchingCompletion`).
+ */
+abstract class ConditionalCompletion extends NonNestedNormalCompletion {
+ boolean value;
+
+ bindingset[value]
+ ConditionalCompletion() { any() }
+
+ /** Gets the Boolean value of this conditional completion. */
+ final boolean getValue() { result = value }
+}
+
+/**
+ * A completion that represents evaluation of an expression
+ * with a Boolean value.
+ */
+class BooleanCompletion extends ConditionalCompletion, TBooleanCompletion {
+ BooleanCompletion() { this = TBooleanCompletion(value) }
+
+ /** Gets the dual Boolean completion. */
+ BooleanCompletion getDual() { result = TBooleanCompletion(value.booleanNot()) }
+
+ override BooleanSuccessor getAMatchingSuccessorType() { result.getValue() = value }
+
+ override string toString() { result = value.toString() }
+}
+
+/** A Boolean `true` completion. */
+class TrueCompletion extends BooleanCompletion {
+ TrueCompletion() { this.getValue() = true }
+}
+
+/** A Boolean `false` completion. */
+class FalseCompletion extends BooleanCompletion {
+ FalseCompletion() { this.getValue() = false }
+}
+
+/**
+ * A completion that represents evaluation of an emptiness test, for example
+ * a test in a `for in` statement.
+ */
+class EmptinessCompletion extends ConditionalCompletion, TEmptinessCompletion {
+ EmptinessCompletion() { this = TEmptinessCompletion(value) }
+
+ override EmptinessSuccessor getAMatchingSuccessorType() { result.getValue() = value }
+
+ override string toString() { if value = true then result = "empty" else result = "non-empty" }
+}
+
+/**
+ * A completion that represents evaluation of a matching test, for example
+ * a test in a `rescue` statement.
+ */
+class MatchingCompletion extends ConditionalCompletion, TMatchingCompletion {
+ MatchingCompletion() { this = TMatchingCompletion(value) }
+
+ override MatchingSuccessor getAMatchingSuccessorType() { result.getValue() = value }
+
+ override string toString() { if value = true then result = "match" else result = "no-match" }
+}
+
+/**
+ * A completion that represents evaluation of a statement or an
+ * expression resulting in a return.
+ */
+class ReturnCompletion extends Completion {
+ ReturnCompletion() {
+ this = TReturnCompletion() or
+ this = TNestedCompletion(_, TReturnCompletion(), _)
+ }
+
+ override ReturnSuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() {
+ // `NestedCompletion` defines `toString()` for the other case
+ this = TReturnCompletion() and result = "return"
+ }
+}
+
+/**
+ * A completion that represents evaluation of a statement or an
+ * expression resulting in a break from a loop.
+ */
+class BreakCompletion extends Completion {
+ BreakCompletion() {
+ this = TBreakCompletion() or
+ this = TNestedCompletion(_, TBreakCompletion(), _)
+ }
+
+ override BreakSuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() {
+ // `NestedCompletion` defines `toString()` for the other case
+ this = TBreakCompletion() and result = "break"
+ }
+}
+
+/**
+ * A completion that represents evaluation of a statement or an
+ * expression resulting in a continuation of a loop.
+ */
+class NextCompletion extends Completion {
+ NextCompletion() {
+ this = TNextCompletion() or
+ this = TNestedCompletion(_, TNextCompletion(), _)
+ }
+
+ override NextSuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() {
+ // `NestedCompletion` defines `toString()` for the other case
+ this = TNextCompletion() and result = "next"
+ }
+}
+
+/**
+ * A completion that represents evaluation of a statement or an
+ * expression resulting in a redo of a loop iteration.
+ */
+class RedoCompletion extends Completion {
+ RedoCompletion() {
+ this = TRedoCompletion() or
+ this = TNestedCompletion(_, TRedoCompletion(), _)
+ }
+
+ override RedoSuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() {
+ // `NestedCompletion` defines `toString()` for the other case
+ this = TRedoCompletion() and result = "redo"
+ }
+}
+
+/**
+ * A completion that represents evaluation of a statement or an
+ * expression resulting in a retry.
+ */
+class RetryCompletion extends Completion {
+ RetryCompletion() {
+ this = TRetryCompletion() or
+ this = TNestedCompletion(_, TRetryCompletion(), _)
+ }
+
+ override RetrySuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() {
+ // `NestedCompletion` defines `toString()` for the other case
+ this = TRetryCompletion() and result = "retry"
+ }
+}
+
+/**
+ * A completion that represents evaluation of a statement or an
+ * expression resulting in a thrown exception.
+ */
+class RaiseCompletion extends Completion {
+ RaiseCompletion() {
+ this = TRaiseCompletion() or
+ this = TNestedCompletion(_, TRaiseCompletion(), _)
+ }
+
+ override RaiseSuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() {
+ // `NestedCompletion` defines `toString()` for the other case
+ this = TRaiseCompletion() and result = "raise"
+ }
+}
+
+/**
+ * A completion that represents evaluation of a statement or an
+ * expression resulting in an abort/exit.
+ */
+class ExitCompletion extends Completion {
+ ExitCompletion() {
+ this = TExitCompletion() or
+ this = TNestedCompletion(_, TExitCompletion(), _)
+ }
+
+ override ExitSuccessor getAMatchingSuccessorType() { any() }
+
+ override string toString() {
+ // `NestedCompletion` defines `toString()` for the other case
+ this = TExitCompletion() and result = "exit"
+ }
+}
+
+/**
+ * A nested completion. For example, in
+ *
+ * ```rb
+ * def m
+ * while x >= 0
+ * x -= 1
+ * if num > 100
+ * break
+ * end
+ * end
+ * puts "done"
+ * end
+ * ```
+ *
+ * the `while` loop can have a nested completion where the inner completion
+ * is a `break` and the outer completion is a simple successor.
+ */
+abstract class NestedCompletion extends Completion, TNestedCompletion {
+ Completion inner;
+ Completion outer;
+ int nestLevel;
+
+ NestedCompletion() { this = TNestedCompletion(inner, outer, nestLevel) }
+
+ /** Gets a completion that is compatible with the inner completion. */
+ abstract Completion getAnInnerCompatibleCompletion();
+
+ /** Gets the level of this nested completion. */
+ final int getNestLevel() { result = nestLevel }
+
+ override string toString() { result = outer + " [" + inner + "] (" + nestLevel + ")" }
+}
+
+class NestedBreakCompletion extends NormalCompletion, NestedCompletion {
+ NestedBreakCompletion() {
+ inner = TBreakCompletion() and
+ outer instanceof NonNestedNormalCompletion
+ }
+
+ override BreakCompletion getInnerCompletion() { result = inner }
+
+ override NonNestedNormalCompletion getOuterCompletion() { result = outer }
+
+ override Completion getAnInnerCompatibleCompletion() {
+ result = inner and
+ outer = TSimpleCompletion()
+ or
+ result = TNestedCompletion(outer, inner, _)
+ }
+
+ override SuccessorType getAMatchingSuccessorType() {
+ outer instanceof SimpleCompletion and
+ result instanceof BreakSuccessor
+ or
+ result = outer.(ConditionalCompletion).getAMatchingSuccessorType()
+ }
+}
+
+class NestedEnsureCompletion extends NestedCompletion {
+ NestedEnsureCompletion() {
+ inner instanceof NormalCompletion and
+ nestedEnsureCompletion(outer, nestLevel)
+ }
+
+ override NormalCompletion getInnerCompletion() { result = inner }
+
+ override Completion getOuterCompletion() { result = outer }
+
+ override Completion getAnInnerCompatibleCompletion() {
+ result.getOuterCompletion() = this.getInnerCompletion()
+ }
+
+ override SuccessorType getAMatchingSuccessorType() { none() }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll
new file mode 100644
index 00000000000..5bfea3aca7b
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImpl.qll
@@ -0,0 +1,1164 @@
+/**
+ * Provides auxiliary classes and predicates used to construct the basic successor
+ * relation on control flow elements.
+ *
+ * The implementation is centered around the concept of a _completion_, which
+ * models how the execution of a statement or expression terminates.
+ * Completions are represented as an algebraic data type `Completion` defined in
+ * `Completion.qll`.
+ *
+ * The CFG is built by structural recursion over the AST. To achieve this the
+ * CFG edges related to a given AST node, `n`, are divided into three categories:
+ *
+ * 1. The in-going edge that points to the first CFG node to execute when
+ * `n` is going to be executed.
+ * 2. The out-going edges for control flow leaving `n` that are going to some
+ * other node in the surrounding context of `n`.
+ * 3. The edges that have both of their end-points entirely within the AST
+ * node and its children.
+ *
+ * The edges in (1) and (2) are inherently non-local and are therefore
+ * initially calculated as half-edges, that is, the single node, `k`, of the
+ * edge contained within `n`, by the predicates `k = first(n)` and `k = last(n, _)`,
+ * respectively. The edges in (3) can then be enumerated directly by the predicate
+ * `succ` by calling `first` and `last` recursively on the children of `n` and
+ * connecting the end-points. This yields the entire CFG, since all edges are in
+ * (3) for _some_ AST node.
+ *
+ * The second parameter of `last` is the completion, which is necessary to distinguish
+ * the out-going edges from `n`. Note that the completion changes as the calculation of
+ * `last` proceeds outward through the AST; for example, a `BreakCompletion` is
+ * caught up by its surrounding loop and turned into a `NormalCompletion`.
+ */
+
+private import codeql.ruby.AST
+private import codeql.ruby.ast.internal.AST as ASTInternal
+private import codeql.ruby.ast.internal.Scope
+private import codeql.ruby.ast.Scope
+private import codeql.ruby.ast.internal.TreeSitter
+private import codeql.ruby.ast.internal.Variable
+private import codeql.ruby.controlflow.ControlFlowGraph
+private import Completion
+import ControlFlowGraphImplShared
+
+module CfgScope {
+ abstract class Range_ extends AstNode {
+ abstract predicate entry(AstNode first);
+
+ abstract predicate exit(AstNode last, Completion c);
+ }
+
+ private class ToplevelScope extends Range_, Toplevel {
+ final override predicate entry(AstNode first) { first(this, first) }
+
+ final override predicate exit(AstNode last, Completion c) { last(this, last, c) }
+ }
+
+ private class EndBlockScope extends Range_, EndBlock {
+ final override predicate entry(AstNode first) {
+ first(this.(Trees::EndBlockTree).getBodyChild(0, _), first)
+ }
+
+ final override predicate exit(AstNode last, Completion c) {
+ last(this.(Trees::EndBlockTree).getLastBodyChild(), last, c)
+ }
+ }
+
+ private class BodyStmtCallableScope extends Range_, ASTInternal::TBodyStmt, Callable {
+ final override predicate entry(AstNode first) { this.(Trees::BodyStmtTree).firstInner(first) }
+
+ final override predicate exit(AstNode last, Completion c) {
+ this.(Trees::BodyStmtTree).lastInner(last, c)
+ }
+ }
+
+ private class BraceBlockScope extends Range_, BraceBlock {
+ final override predicate entry(AstNode first) {
+ first(this.(Trees::BraceBlockTree).getBodyChild(0, _), first)
+ }
+
+ final override predicate exit(AstNode last, Completion c) {
+ last(this.(Trees::BraceBlockTree).getLastBodyChild(), last, c)
+ }
+ }
+}
+
+/** Holds if `first` is first executed when entering `scope`. */
+pragma[nomagic]
+predicate succEntry(CfgScope::Range_ scope, AstNode first) { scope.entry(first) }
+
+/** Holds if `last` with completion `c` can exit `scope`. */
+pragma[nomagic]
+predicate succExit(CfgScope::Range_ scope, AstNode last, Completion c) { scope.exit(last, c) }
+
+// TODO: remove this class; it should be replaced with an implicit non AST node
+private class ForIn extends AstNode, ASTInternal::TForIn {
+ final override string toString() { result = "In" }
+}
+
+// TODO: remove this class; it should be replaced with an implicit non AST node
+private class ForRange extends ForExpr {
+ override AstNode getAChild(string pred) {
+ result = super.getAChild(pred)
+ or
+ pred = "" and
+ result = this.getIn()
+ }
+
+ ForIn getIn() {
+ result = ASTInternal::TForIn(ASTInternal::toGenerated(this).(Ruby::For).getValue())
+ }
+}
+
+/** Defines the CFG by dispatch on the various AST types. */
+module Trees {
+ private class AliasStmtTree extends StandardPreOrderTree, AliasStmt {
+ final override ControlFlowTree getChildElement(int i) {
+ result = this.getNewName() and i = 0
+ or
+ result = this.getOldName() and i = 1
+ }
+ }
+
+ private class ArgumentListTree extends StandardTree, ArgumentList {
+ final override ControlFlowTree getChildElement(int i) { result = this.getElement(i) }
+
+ final override predicate first(AstNode first) { first(this.getFirstChildElement(), first) }
+
+ final override predicate last(AstNode last, Completion c) {
+ last(this.getLastChildElement(), last, c)
+ }
+ }
+
+ private class AssignExprTree extends StandardPostOrderTree, AssignExpr {
+ AssignExprTree() {
+ exists(Expr left | left = this.getLeftOperand() |
+ left instanceof VariableAccess or
+ left instanceof ConstantAccess
+ )
+ }
+
+ final override ControlFlowTree getChildElement(int i) {
+ result = this.getLeftOperand() and i = 0
+ or
+ result = this.getRightOperand() and i = 1
+ }
+ }
+
+ private class BeginTree extends BodyStmtTree, BeginExpr {
+ final override predicate first(AstNode first) { this.firstInner(first) }
+
+ final override predicate last(AstNode last, Completion c) { this.lastInner(last, c) }
+
+ final override predicate propagatesAbnormal(AstNode child) { none() }
+ }
+
+ private class BlockArgumentTree extends StandardPostOrderTree, BlockArgument {
+ final override ControlFlowTree getChildElement(int i) { result = this.getValue() and i = 0 }
+ }
+
+ abstract private class NonDefaultValueParameterTree extends ControlFlowTree, NamedParameter {
+ final override predicate first(AstNode first) {
+ this.getDefiningAccess().(ControlFlowTree).first(first)
+ }
+
+ final override predicate last(AstNode last, Completion c) {
+ this.getDefiningAccess().(ControlFlowTree).last(last, c)
+ }
+
+ override predicate propagatesAbnormal(AstNode child) {
+ this.getDefiningAccess().(ControlFlowTree).propagatesAbnormal(child)
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) { none() }
+ }
+
+ private class BlockParameterTree extends NonDefaultValueParameterTree, BlockParameter { }
+
+ abstract class BodyStmtTree extends StmtSequenceTree, BodyStmt {
+ override predicate first(AstNode first) { first = this }
+
+ predicate firstInner(AstNode first) {
+ first(this.getBodyChild(0, _), first)
+ or
+ not exists(this.getBodyChild(_, _)) and
+ (
+ first(this.getRescue(_), first)
+ or
+ not exists(this.getRescue(_)) and
+ first(this.getEnsure(), first)
+ )
+ }
+
+ predicate lastInner(AstNode last, Completion c) {
+ exists(boolean ensurable | last = this.getAnEnsurePredecessor(c, ensurable) |
+ not this.hasEnsure()
+ or
+ ensurable = false
+ )
+ or
+ // If the body completes normally, take the completion from the `ensure` block
+ this.lastEnsure(last, c, any(NormalCompletion nc), _)
+ or
+ // If the `ensure` block completes normally, it inherits any non-normal
+ // completion from the body
+ c =
+ any(NestedEnsureCompletion nec |
+ this.lastEnsure(last, nec.getAnInnerCompatibleCompletion(), nec.getOuterCompletion(),
+ nec.getNestLevel())
+ )
+ or
+ not exists(this.getBodyChild(_, _)) and
+ not exists(this.getRescue(_)) and
+ this.lastEnsure0(last, c)
+ or
+ last([this.getEnsure(), this.getBodyChild(_, false)], last, c) and
+ not c instanceof NormalCompletion
+ }
+
+ override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ // Normal left-to-right evaluation in the body
+ exists(int i |
+ last(this.getBodyChild(i, _), pred, c) and
+ first(this.getBodyChild(i + 1, _), succ) and
+ c instanceof NormalCompletion
+ )
+ or
+ // Exceptional flow from body to first `rescue`
+ this.lastBody(pred, c, true) and
+ first(this.getRescue(0), succ) and
+ c instanceof RaiseCompletion
+ or
+ // Flow from one `rescue` clause to the next when there is no match
+ exists(RescueTree rescue, int i | rescue = this.getRescue(i) |
+ rescue.lastNoMatch(pred, c) and
+ first(this.getRescue(i + 1), succ)
+ )
+ or
+ // Flow from body to `else` block when no exception
+ this.lastBody(pred, c, _) and
+ first(this.getElse(), succ) and
+ c instanceof NormalCompletion
+ or
+ // Flow into `ensure` block
+ pred = getAnEnsurePredecessor(c, true) and
+ first(this.getEnsure(), succ)
+ }
+
+ /**
+ * Gets a last element from this block that may finish with completion `c`, such
+ * that control may be transferred to the `ensure` block (if it exists), but only
+ * if `ensurable = true`.
+ */
+ pragma[nomagic]
+ private AstNode getAnEnsurePredecessor(Completion c, boolean ensurable) {
+ this.lastBody(result, c, ensurable) and
+ (
+ // Any non-throw completion will always continue directly to the `ensure` block,
+ // unless there is an `else` block
+ not c instanceof RaiseCompletion and
+ not exists(this.getElse())
+ or
+ // Any completion will continue to the `ensure` block when there are no `rescue`
+ // blocks
+ not exists(this.getRescue(_))
+ )
+ or
+ // Last element from any matching `rescue` block continues to the `ensure` block
+ this.getRescue(_).(RescueTree).lastMatch(result, c) and
+ ensurable = true
+ or
+ // If the last `rescue` block does not match, continue to the `ensure` block
+ exists(int lst, MatchingCompletion mc |
+ this.getRescue(lst).(RescueTree).lastNoMatch(result, mc) and
+ mc.getValue() = false and
+ not exists(this.getRescue(lst + 1)) and
+ c =
+ any(NestedEnsureCompletion nec |
+ nec.getOuterCompletion() instanceof RaiseCompletion and
+ nec.getInnerCompletion() = mc and
+ nec.getNestLevel() = 0
+ ) and
+ ensurable = true
+ )
+ or
+ // Last element of `else` block continues to the `ensure` block
+ last(this.getElse(), result, c) and
+ ensurable = true
+ }
+
+ pragma[nomagic]
+ private predicate lastEnsure0(AstNode last, Completion c) { last(this.getEnsure(), last, c) }
+
+ /**
+ * Gets a descendant that belongs to the `ensure` block of this block, if any.
+ * Nested `ensure` blocks are not included.
+ */
+ pragma[nomagic]
+ AstNode getAnEnsureDescendant() {
+ result = this.getEnsure()
+ or
+ exists(AstNode mid |
+ mid = this.getAnEnsureDescendant() and
+ result = mid.getAChild() and
+ getCfgScope(result) = getCfgScope(mid) and
+ not exists(BodyStmt nestedBlock |
+ result = nestedBlock.getEnsure() and
+ nestedBlock != this
+ )
+ )
+ }
+
+ /**
+ * Holds if `innerBlock` has an `ensure` block and is immediately nested inside the
+ * `ensure` block of this block.
+ */
+ private predicate nestedEnsure(BodyStmtTree innerBlock) {
+ exists(StmtSequence innerEnsure |
+ innerEnsure = this.getAnEnsureDescendant().getAChild() and
+ getCfgScope(innerEnsure) = getCfgScope(this) and
+ innerEnsure = innerBlock.(BodyStmt).getEnsure()
+ )
+ }
+
+ /**
+ * Gets the `ensure`-nesting level of this block. That is, the number of `ensure`
+ * blocks that this block is nested under.
+ */
+ int getNestLevel() { result = count(BodyStmtTree outer | outer.nestedEnsure+(this)) }
+
+ pragma[nomagic]
+ private predicate lastEnsure(
+ AstNode last, NormalCompletion ensure, Completion outer, int nestLevel
+ ) {
+ this.lastEnsure0(last, ensure) and
+ exists(
+ this.getAnEnsurePredecessor(any(Completion c0 | outer = c0.getOuterCompletion()), true)
+ ) and
+ nestLevel = this.getNestLevel()
+ }
+
+ /**
+ * Holds if `last` is a last element in the body of this block. `ensurable`
+ * indicates whether `last` may be a predecessor of an `ensure` block.
+ */
+ pragma[nomagic]
+ private predicate lastBody(AstNode last, Completion c, boolean ensurable) {
+ exists(boolean rescuable |
+ if c instanceof RaiseCompletion then ensurable = rescuable else ensurable = true
+ |
+ last(this.getBodyChild(_, rescuable), last, c) and
+ not c instanceof NormalCompletion
+ or
+ exists(int lst |
+ last(this.getBodyChild(lst, rescuable), last, c) and
+ not exists(this.getBodyChild(lst + 1, _))
+ )
+ )
+ }
+ }
+
+ private class BooleanLiteralTree extends LeafTree, BooleanLiteral { }
+
+ class BraceBlockTree extends StmtSequenceTree, BraceBlock {
+ final override predicate propagatesAbnormal(AstNode child) { none() }
+
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getParameter(i) and rescuable = false
+ or
+ result = StmtSequenceTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable)
+ }
+
+ override predicate first(AstNode first) { first = this }
+
+ override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ // Normal left-to-right evaluation in the body
+ exists(int i |
+ last(this.getBodyChild(i, _), pred, c) and
+ first(this.getBodyChild(i + 1, _), succ) and
+ c instanceof NormalCompletion
+ )
+ }
+ }
+
+ private class CallTree extends StandardPostOrderTree, Call {
+ CallTree() {
+ // Logical operations are handled separately
+ not this instanceof UnaryLogicalOperation and
+ not this instanceof BinaryLogicalOperation
+ }
+
+ override ControlFlowTree getChildElement(int i) { result = this.getArgument(i) }
+ }
+
+ private class CaseTree extends PreOrderTree, CaseExpr {
+ final override predicate propagatesAbnormal(AstNode child) {
+ child = this.getValue() or child = this.getABranch()
+ }
+
+ final override predicate last(AstNode last, Completion c) {
+ last(this.getValue(), last, c) and not exists(this.getABranch())
+ or
+ last(this.getAWhenBranch().getBody(), last, c)
+ or
+ exists(int i, ControlFlowTree lastBranch |
+ lastBranch = this.getBranch(i) and
+ not exists(this.getBranch(i + 1)) and
+ last(lastBranch, last, c)
+ )
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ exists(AstNode next |
+ pred = this and
+ first(next, succ) and
+ c instanceof SimpleCompletion
+ |
+ next = this.getValue()
+ or
+ not exists(this.getValue()) and
+ next = this.getBranch(0)
+ )
+ or
+ last(this.getValue(), pred, c) and
+ first(this.getBranch(0), succ) and
+ c instanceof SimpleCompletion
+ or
+ exists(int i, WhenTree branch | branch = this.getBranch(i) |
+ last(branch.getLastPattern(), pred, c) and
+ first(this.getBranch(i + 1), succ) and
+ c.(ConditionalCompletion).getValue() = false
+ )
+ }
+ }
+
+ private class CharacterTree extends LeafTree, CharacterLiteral { }
+
+ private class ClassDeclarationTree extends NamespaceTree, ClassDeclaration {
+ /** Gets the `i`th child in the body of this block. */
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getScopeExpr() and i = 0 and rescuable = false
+ or
+ result = this.getSuperclassExpr() and
+ i = count(this.getScopeExpr()) and
+ rescuable = true
+ or
+ result =
+ super
+ .getBodyChild(i - count(this.getScopeExpr()) - count(this.getSuperclassExpr()),
+ rescuable)
+ }
+ }
+
+ private class ClassVariableTree extends LeafTree, ClassVariableAccess { }
+
+ private class ConditionalExprTree extends PostOrderTree, ConditionalExpr {
+ final override predicate propagatesAbnormal(AstNode child) {
+ child = this.getCondition() or child = this.getBranch(_)
+ }
+
+ final override predicate first(AstNode first) { first(this.getCondition(), first) }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ exists(boolean b |
+ last(this.getCondition(), pred, c) and
+ b = c.(BooleanCompletion).getValue()
+ |
+ first(this.getBranch(b), succ)
+ or
+ not exists(this.getBranch(b)) and
+ succ = this
+ )
+ or
+ last(this.getBranch(_), pred, c) and
+ succ = this and
+ c instanceof NormalCompletion
+ }
+ }
+
+ private class ConditionalLoopTree extends PostOrderTree, ConditionalLoop {
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getCondition() }
+
+ final override predicate first(AstNode first) { first(this.getCondition(), first) }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ last(this.getCondition(), pred, c) and
+ this.entersLoopWhenConditionIs(c.(BooleanCompletion).getValue()) and
+ first(this.getBody(), succ)
+ or
+ last(this.getBody(), pred, c) and
+ first(this.getCondition(), succ) and
+ c.continuesLoop()
+ or
+ last(this.getBody(), pred, c) and
+ first(this.getBody(), succ) and
+ c instanceof RedoCompletion
+ or
+ succ = this and
+ (
+ last(this.getCondition(), pred, c) and
+ this.entersLoopWhenConditionIs(c.(BooleanCompletion).getValue().booleanNot())
+ or
+ last(this.getBody(), pred, c) and
+ not c.continuesLoop() and
+ not c instanceof BreakCompletion and
+ not c instanceof RedoCompletion
+ or
+ last(this.getBody(), pred, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion())
+ )
+ }
+ }
+
+ private class ConstantAccessTree extends PostOrderTree, ConstantAccess {
+ ConstantAccessTree() {
+ not this instanceof ClassDeclaration and
+ not this instanceof ModuleDeclaration
+ }
+
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getScopeExpr() }
+
+ final override predicate first(AstNode first) {
+ first(this.getScopeExpr(), first)
+ or
+ not exists(this.getScopeExpr()) and
+ first = this
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ last(this.getScopeExpr(), pred, c) and
+ succ = this and
+ c instanceof NormalCompletion
+ }
+ }
+
+ /** A parameter that may have a default value. */
+ abstract class DefaultValueParameterTree extends ControlFlowTree {
+ abstract Expr getDefaultValueExpr();
+
+ abstract AstNode getAccessNode();
+
+ predicate hasDefaultValue() { exists(this.getDefaultValueExpr()) }
+
+ final override predicate propagatesAbnormal(AstNode child) {
+ child = this.getDefaultValueExpr() or child = this.getAccessNode()
+ }
+
+ final override predicate first(AstNode first) { first = this.getAccessNode() }
+
+ final override predicate last(AstNode last, Completion c) {
+ last(this.getDefaultValueExpr(), last, c) and
+ c instanceof NormalCompletion
+ or
+ last = this.getAccessNode() and
+ (
+ not this.hasDefaultValue() and
+ c instanceof SimpleCompletion
+ or
+ this.hasDefaultValue() and
+ c.(MatchingCompletion).getValue() = true
+ )
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ pred = this.getAccessNode() and
+ first(this.getDefaultValueExpr(), succ) and
+ c.(MatchingCompletion).getValue() = false
+ }
+ }
+
+ private class DesugaredTree extends ControlFlowTree {
+ ControlFlowTree desugared;
+
+ DesugaredTree() { desugared = this.getDesugared() }
+
+ final override predicate propagatesAbnormal(AstNode child) {
+ desugared.propagatesAbnormal(child)
+ }
+
+ final override predicate first(AstNode first) { desugared.first(first) }
+
+ final override predicate last(AstNode last, Completion c) { desugared.last(last, c) }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) { none() }
+ }
+
+ private class DoBlockTree extends BodyStmtTree, DoBlock {
+ /** Gets the `i`th child in the body of this block. */
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getParameter(i) and rescuable = false
+ or
+ result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable)
+ }
+
+ override predicate propagatesAbnormal(AstNode child) { none() }
+ }
+
+ private class EmptyStatementTree extends LeafTree, EmptyStmt { }
+
+ class EndBlockTree extends StmtSequenceTree, EndBlock {
+ override predicate first(AstNode first) { first = this }
+
+ override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ // Normal left-to-right evaluation in the body
+ exists(int i |
+ last(this.getBodyChild(i, _), pred, c) and
+ first(this.getBodyChild(i + 1, _), succ) and
+ c instanceof NormalCompletion
+ )
+ }
+ }
+
+ private class ForInTree extends LeafTree, ForIn { }
+
+ /**
+ * Control flow of a for-in loop
+ *
+ * For example, this program fragment:
+ *
+ * ```rb
+ * for arg in args do
+ * puts arg
+ * end
+ * puts "done";
+ * ```
+ *
+ * has the following control flow graph:
+ *
+ * ```
+ * args
+ * |
+ * in------<-----
+ * / \ \
+ * / \ |
+ * / \ |
+ * / \ |
+ * empty non-empty |
+ * | \ |
+ * for \ |
+ * | arg |
+ * | | |
+ * puts "done" puts arg |
+ * \___/
+ * ```
+ */
+ private class ForTree extends PostOrderTree, ForRange {
+ final override predicate propagatesAbnormal(AstNode child) {
+ child = this.getPattern() or child = this.getValue()
+ }
+
+ final override predicate first(AstNode first) { first(this.getValue(), first) }
+
+ /**
+ * for pattern in array do body end
+ * ```
+ * array +-> in +--[non empty]--> pattern -> body -> in
+ * |--[empty]--> for
+ * ```
+ */
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ last(this.getValue(), pred, c) and
+ first(this.getIn(), succ) and
+ c instanceof SimpleCompletion
+ or
+ last(this.getIn(), pred, c) and
+ first(this.getPattern(), succ) and
+ c.(EmptinessCompletion).getValue() = false
+ or
+ last(this.getPattern(), pred, c) and
+ first(this.getBody(), succ) and
+ c instanceof NormalCompletion
+ or
+ last(this.getBody(), pred, c) and
+ first(this.getIn(), succ) and
+ c.continuesLoop()
+ or
+ last(this.getBody(), pred, c) and
+ first(this.getBody(), succ) and
+ c instanceof RedoCompletion
+ or
+ succ = this and
+ (
+ last(this.getIn(), pred, c) and
+ c.(EmptinessCompletion).getValue() = true
+ or
+ last(this.getBody(), pred, c) and
+ not c.continuesLoop() and
+ not c instanceof BreakCompletion and
+ not c instanceof RedoCompletion
+ or
+ last(this.getBody(), pred, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion())
+ )
+ }
+ }
+
+ private class GlobalVariableTree extends LeafTree, GlobalVariableAccess { }
+
+ private class HashLiteralTree extends StandardPostOrderTree, HashLiteral {
+ final override ControlFlowTree getChildElement(int i) { result = this.getElement(i) }
+ }
+
+ private class HashSplatParameterTree extends NonDefaultValueParameterTree, HashSplatParameter { }
+
+ private class HereDocTree extends StandardPreOrderTree, HereDoc {
+ final override ControlFlowTree getChildElement(int i) { result = this.getComponent(i) }
+ }
+
+ private class InstanceVariableTree extends LeafTree, InstanceVariableAccess { }
+
+ private class KeywordParameterTree extends DefaultValueParameterTree, KeywordParameter {
+ final override Expr getDefaultValueExpr() { result = this.getDefaultValue() }
+
+ final override AstNode getAccessNode() { result = this.getDefiningAccess() }
+ }
+
+ private class LambdaTree extends BodyStmtTree, Lambda {
+ final override predicate propagatesAbnormal(AstNode child) { none() }
+
+ /** Gets the `i`th child in the body of this block. */
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getParameter(i) and rescuable = false
+ or
+ result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable)
+ }
+ }
+
+ private class LocalVariableAccessTree extends LeafTree, LocalVariableAccess { }
+
+ private class LogicalAndTree extends PostOrderTree, LogicalAndExpr {
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getAnOperand() }
+
+ final override predicate first(AstNode first) { first(this.getLeftOperand(), first) }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ last(this.getLeftOperand(), pred, c) and
+ c instanceof TrueCompletion and
+ first(this.getRightOperand(), succ)
+ or
+ last(this.getLeftOperand(), pred, c) and
+ c instanceof FalseCompletion and
+ succ = this
+ or
+ last(this.getRightOperand(), pred, c) and
+ c instanceof NormalCompletion and
+ succ = this
+ }
+ }
+
+ private class LogicalNotTree extends PostOrderTree, NotExpr {
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getOperand() }
+
+ final override predicate first(AstNode first) { first(this.getOperand(), first) }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ succ = this and
+ last(this.getOperand(), pred, c) and
+ c instanceof NormalCompletion
+ }
+ }
+
+ private class LogicalOrTree extends PostOrderTree, LogicalOrExpr {
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getAnOperand() }
+
+ final override predicate first(AstNode first) { first(this.getLeftOperand(), first) }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ last(this.getLeftOperand(), pred, c) and
+ c instanceof FalseCompletion and
+ first(this.getRightOperand(), succ)
+ or
+ last(this.getLeftOperand(), pred, c) and
+ c instanceof TrueCompletion and
+ succ = this
+ or
+ last(this.getRightOperand(), pred, c) and
+ c instanceof NormalCompletion and
+ succ = this
+ }
+ }
+
+ private class MethodCallTree extends CallTree, MethodCall {
+ final override ControlFlowTree getChildElement(int i) {
+ result = this.getReceiver() and i = 0
+ or
+ result = this.getArgument(i - 1)
+ or
+ result = this.getBlock() and i = 1 + this.getNumberOfArguments()
+ }
+ }
+
+ private class MethodNameTree extends LeafTree, MethodName, ASTInternal::TTokenMethodName { }
+
+ private class MethodTree extends BodyStmtTree, Method {
+ final override predicate propagatesAbnormal(AstNode child) { none() }
+
+ /** Gets the `i`th child in the body of this block. */
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getParameter(i) and rescuable = false
+ or
+ result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable)
+ }
+ }
+
+ private class ModuleDeclarationTree extends NamespaceTree, ModuleDeclaration {
+ /** Gets the `i`th child in the body of this block. */
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getScopeExpr() and i = 0 and rescuable = false
+ or
+ result = NamespaceTree.super.getBodyChild(i - count(this.getScopeExpr()), rescuable)
+ }
+ }
+
+ private class NamespaceTree extends BodyStmtTree, Namespace {
+ final override predicate first(AstNode first) {
+ this.firstInner(first)
+ or
+ not exists(this.getAChild(_)) and
+ first = this
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ BodyStmtTree.super.succ(pred, succ, c)
+ or
+ succ = this and
+ this.lastInner(pred, c)
+ }
+ }
+
+ private class NilTree extends LeafTree, NilLiteral { }
+
+ private class NumericLiteralTree extends LeafTree, NumericLiteral { }
+
+ private class OptionalParameterTree extends DefaultValueParameterTree, OptionalParameter {
+ final override Expr getDefaultValueExpr() { result = this.getDefaultValue() }
+
+ final override AstNode getAccessNode() { result = this.getDefiningAccess() }
+ }
+
+ private class PairTree extends StandardPostOrderTree, Pair {
+ final override ControlFlowTree getChildElement(int i) {
+ result = this.getKey() and i = 0
+ or
+ result = this.getValue() and i = 1
+ }
+ }
+
+ private class RangeLiteralTree extends StandardPostOrderTree, RangeLiteral {
+ final override ControlFlowTree getChildElement(int i) {
+ result = this.getBegin() and i = 0
+ or
+ result = this.getEnd() and i = 1
+ }
+ }
+
+ private class RedoStmtTree extends LeafTree, RedoStmt { }
+
+ private class RescueModifierTree extends PreOrderTree, RescueModifierExpr {
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getHandler() }
+
+ final override predicate last(AstNode last, Completion c) {
+ last(this.getBody(), last, c) and
+ not c instanceof RaiseCompletion
+ or
+ last(this.getHandler(), last, c)
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ pred = this and
+ first(this.getBody(), succ) and
+ c instanceof SimpleCompletion
+ or
+ last(this.getBody(), pred, c) and
+ c instanceof RaiseCompletion and
+ first(this.getHandler(), succ)
+ }
+ }
+
+ private class RescueTree extends PreOrderTree, RescueClause {
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getAnException() }
+
+ private Expr getLastException() {
+ exists(int i | result = this.getException(i) and not exists(this.getException(i + 1)))
+ }
+
+ predicate lastMatch(AstNode last, Completion c) {
+ last(this.getBody(), last, c)
+ or
+ not exists(this.getBody()) and
+ (
+ last(this.getVariableExpr(), last, c)
+ or
+ not exists(this.getVariableExpr()) and
+ (
+ last(this.getAnException(), last, c) and
+ c.(MatchingCompletion).getValue() = true
+ or
+ not exists(this.getAnException()) and
+ last = this and
+ c.isValidFor(this)
+ )
+ )
+ }
+
+ predicate lastNoMatch(AstNode last, Completion c) {
+ last(this.getLastException(), last, c) and
+ c.(MatchingCompletion).getValue() = false
+ }
+
+ final override predicate last(AstNode last, Completion c) {
+ this.lastNoMatch(last, c)
+ or
+ this.lastMatch(last, c)
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ exists(AstNode next |
+ pred = this and
+ first(next, succ) and
+ c instanceof SimpleCompletion
+ |
+ next = this.getException(0)
+ or
+ not exists(this.getException(0)) and
+ (
+ next = this.getVariableExpr()
+ or
+ not exists(this.getVariableExpr()) and
+ next = this.getBody()
+ )
+ )
+ or
+ exists(AstNode next |
+ last(this.getAnException(), pred, c) and
+ first(next, succ) and
+ c.(MatchingCompletion).getValue() = true
+ |
+ next = this.getVariableExpr()
+ or
+ not exists(this.getVariableExpr()) and
+ next = this.getBody()
+ )
+ or
+ exists(int i |
+ last(this.getException(i), pred, c) and
+ c.(MatchingCompletion).getValue() = false and
+ first(this.getException(i + 1), succ)
+ )
+ or
+ last(this.getVariableExpr(), pred, c) and
+ first(this.getBody(), succ) and
+ c instanceof NormalCompletion
+ }
+ }
+
+ private class RetryStmtTree extends LeafTree, RetryStmt { }
+
+ private class ReturningStmtTree extends StandardPostOrderTree, ReturningStmt {
+ final override ControlFlowTree getChildElement(int i) { result = this.getValue() and i = 0 }
+ }
+
+ private class SelfTree extends LeafTree, Self { }
+
+ private class SimpleParameterTree extends NonDefaultValueParameterTree, SimpleParameter { }
+
+ // Corner case: For duplicated '_' parameters, only the first occurence has a defining
+ // access. For subsequent parameters we simply include the parameter itself in the CFG
+ private class SimpleParameterTreeDupUnderscore extends LeafTree, SimpleParameter {
+ SimpleParameterTreeDupUnderscore() { not exists(this.getDefiningAccess()) }
+ }
+
+ private class SingletonClassTree extends BodyStmtTree, SingletonClass {
+ final override predicate first(AstNode first) {
+ this.firstInner(first)
+ or
+ not exists(this.getAChild(_)) and
+ first = this
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ BodyStmtTree.super.succ(pred, succ, c)
+ or
+ succ = this and
+ this.lastInner(pred, c)
+ }
+
+ /** Gets the `i`th child in the body of this block. */
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ (
+ result = this.getValue() and i = 0 and rescuable = false
+ or
+ result = BodyStmtTree.super.getBodyChild(i - 1, rescuable)
+ )
+ }
+ }
+
+ private class SingletonMethodTree extends BodyStmtTree, SingletonMethod {
+ final override predicate propagatesAbnormal(AstNode child) { none() }
+
+ /** Gets the `i`th child in the body of this block. */
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getParameter(i) and rescuable = false
+ or
+ result = BodyStmtTree.super.getBodyChild(i - this.getNumberOfParameters(), rescuable)
+ }
+
+ override predicate first(AstNode first) { first(this.getObject(), first) }
+
+ override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ BodyStmtTree.super.succ(pred, succ, c)
+ or
+ last(this.getObject(), pred, c) and
+ succ = this and
+ c instanceof NormalCompletion
+ }
+ }
+
+ private class SplatParameterTree extends NonDefaultValueParameterTree, SplatParameter { }
+
+ class StmtSequenceTree extends PostOrderTree, StmtSequence {
+ override predicate propagatesAbnormal(AstNode child) { child = this.getAStmt() }
+
+ override predicate first(AstNode first) { first(this.getStmt(0), first) }
+
+ /** Gets the `i`th child in the body of this body statement. */
+ AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getStmt(i) and
+ rescuable = true
+ }
+
+ final AstNode getLastBodyChild() {
+ exists(int i |
+ result = this.getBodyChild(i, _) and
+ not exists(this.getBodyChild(i + 1, _))
+ )
+ }
+
+ override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ // Normal left-to-right evaluation in the body
+ exists(int i |
+ last(this.getBodyChild(i, _), pred, c) and
+ first(this.getBodyChild(i + 1, _), succ) and
+ c instanceof NormalCompletion
+ )
+ or
+ succ = this and
+ last(this.getLastBodyChild(), pred, c) and
+ c instanceof NormalCompletion
+ }
+ }
+
+ private class StringConcatenationTree extends StandardTree, StringConcatenation {
+ final override ControlFlowTree getChildElement(int i) { result = this.getString(i) }
+
+ final override predicate first(AstNode first) { first(this.getFirstChildElement(), first) }
+
+ final override predicate last(AstNode last, Completion c) {
+ last(this.getLastChildElement(), last, c)
+ }
+ }
+
+ private class StringlikeLiteralTree extends StandardPostOrderTree, StringlikeLiteral {
+ StringlikeLiteralTree() { not this instanceof HereDoc }
+
+ final override ControlFlowTree getChildElement(int i) { result = this.getComponent(i) }
+ }
+
+ private class ToplevelTree extends BodyStmtTree, Toplevel {
+ final override AstNode getBodyChild(int i, boolean rescuable) {
+ result = this.getBeginBlock(i) and rescuable = true
+ or
+ result = BodyStmtTree.super.getBodyChild(i - count(this.getABeginBlock()), rescuable)
+ }
+
+ final override predicate first(AstNode first) { this.firstInner(first) }
+
+ final override predicate last(AstNode last, Completion c) { this.lastInner(last, c) }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ BodyStmtTree.super.succ(pred, succ, c)
+ }
+ }
+
+ private class TuplePatternTree extends StandardPostOrderTree, TuplePattern {
+ final override ControlFlowTree getChildElement(int i) { result = this.getElement(i) }
+ }
+
+ private class UndefStmtTree extends StandardPreOrderTree, UndefStmt {
+ final override ControlFlowTree getChildElement(int i) { result = this.getMethodName(i) }
+ }
+
+ private class WhenTree extends PreOrderTree, WhenExpr {
+ final override predicate propagatesAbnormal(AstNode child) { child = this.getAPattern() }
+
+ final Expr getLastPattern() {
+ exists(int i |
+ result = this.getPattern(i) and
+ not exists(this.getPattern(i + 1))
+ )
+ }
+
+ final override predicate last(AstNode last, Completion c) {
+ last(this.getLastPattern(), last, c) and
+ c.(ConditionalCompletion).getValue() = false
+ or
+ last(this.getBody(), last, c)
+ }
+
+ final override predicate succ(AstNode pred, AstNode succ, Completion c) {
+ pred = this and
+ first(this.getPattern(0), succ) and
+ c instanceof SimpleCompletion
+ or
+ exists(int i, Expr p, boolean b |
+ p = this.getPattern(i) and
+ last(p, pred, c) and
+ b = c.(ConditionalCompletion).getValue()
+ |
+ b = true and
+ first(this.getBody(), succ)
+ or
+ b = false and
+ first(this.getPattern(i + 1), succ)
+ )
+ }
+ }
+}
+
+private Scope parent(Scope n) {
+ result = n.getOuterScope() and
+ not n instanceof CfgScope::Range_
+}
+
+/** Gets the CFG scope of node `n`. */
+pragma[inline]
+CfgScope getCfgScope(AstNode n) {
+ exists(AstNode n0 |
+ pragma[only_bind_into](n0) = n and
+ pragma[only_bind_into](result) = getCfgScopeImpl(n0)
+ )
+}
+
+cached
+private module Cached {
+ /** Gets the CFG scope of node `n`. */
+ cached
+ CfgScope getCfgScopeImpl(AstNode n) {
+ forceCachingInSameStage() and
+ result = parent*(ASTInternal::fromGenerated(scopeOf(ASTInternal::toGeneratedInclSynth(n))))
+ }
+
+ cached
+ newtype TSuccessorType =
+ TSuccessorSuccessor() or
+ TBooleanSuccessor(boolean b) { b in [false, true] } or
+ TEmptinessSuccessor(boolean isEmpty) { isEmpty in [false, true] } or
+ TMatchingSuccessor(boolean isMatch) { isMatch in [false, true] } or
+ TReturnSuccessor() or
+ TBreakSuccessor() or
+ TNextSuccessor() or
+ TRedoSuccessor() or
+ TRetrySuccessor() or
+ TRaiseSuccessor() or // TODO: Add exception type?
+ TExitSuccessor()
+}
+
+import Cached
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll
new file mode 100644
index 00000000000..050a9384729
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplShared.qll
@@ -0,0 +1,945 @@
+/** Provides language-independent definitions for AST-to-CFG construction. */
+
+private import ControlFlowGraphImplSpecific
+
+/** An element with associated control flow. */
+abstract class ControlFlowTree extends ControlFlowTreeBase {
+ /** Holds if `first` is the first element executed within this element. */
+ pragma[nomagic]
+ abstract predicate first(ControlFlowElement first);
+
+ /**
+ * Holds if `last` with completion `c` is a potential last element executed
+ * within this element.
+ */
+ pragma[nomagic]
+ abstract predicate last(ControlFlowElement last, Completion c);
+
+ /** Holds if abnormal execution of `child` should propagate upwards. */
+ abstract predicate propagatesAbnormal(ControlFlowElement child);
+
+ /**
+ * Holds if `succ` is a control flow successor for `pred`, given that `pred`
+ * finishes with completion `c`.
+ */
+ pragma[nomagic]
+ abstract predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c);
+}
+
+/**
+ * Holds if `first` is the first element executed within control flow
+ * element `cft`.
+ */
+predicate first(ControlFlowTree cft, ControlFlowElement first) { cft.first(first) }
+
+/**
+ * Holds if `last` with completion `c` is a potential last element executed
+ * within control flow element `cft`.
+ */
+predicate last(ControlFlowTree cft, ControlFlowElement last, Completion c) {
+ cft.last(last, c)
+ or
+ exists(ControlFlowElement cfe |
+ cft.propagatesAbnormal(cfe) and
+ last(cfe, last, c) and
+ not completionIsNormal(c)
+ )
+}
+
+/**
+ * Holds if `succ` is a control flow successor for `pred`, given that `pred`
+ * finishes with completion `c`.
+ */
+pragma[nomagic]
+predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
+ any(ControlFlowTree cft).succ(pred, succ, c)
+}
+
+/** An element that is executed in pre-order. */
+abstract class PreOrderTree extends ControlFlowTree {
+ final override predicate first(ControlFlowElement first) { first = this }
+}
+
+/** An element that is executed in post-order. */
+abstract class PostOrderTree extends ControlFlowTree {
+ override predicate last(ControlFlowElement last, Completion c) {
+ last = this and
+ completionIsValidFor(c, last)
+ }
+}
+
+/**
+ * An element where the children are evaluated following a standard left-to-right
+ * evaluation. The actual evaluation order is determined by the predicate
+ * `getChildElement()`.
+ */
+abstract class StandardTree extends ControlFlowTree {
+ /** Gets the `i`th child element, in order of evaluation. */
+ abstract ControlFlowElement getChildElement(int i);
+
+ private ControlFlowElement getChildElementRanked(int i) {
+ result =
+ rank[i + 1](ControlFlowElement child, int j |
+ child = this.getChildElement(j)
+ |
+ child order by j
+ )
+ }
+
+ /** Gets the first child node of this element. */
+ final ControlFlowElement getFirstChildElement() { result = this.getChildElementRanked(0) }
+
+ /** Gets the last child node of this node. */
+ final ControlFlowElement getLastChildElement() {
+ exists(int last |
+ result = this.getChildElementRanked(last) and
+ not exists(this.getChildElementRanked(last + 1))
+ )
+ }
+
+ /** Holds if this element has no children. */
+ predicate isLeafElement() { not exists(this.getFirstChildElement()) }
+
+ override predicate propagatesAbnormal(ControlFlowElement child) {
+ child = this.getChildElement(_)
+ }
+
+ pragma[nomagic]
+ override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
+ exists(int i |
+ last(this.getChildElementRanked(i), pred, c) and
+ completionIsNormal(c) and
+ first(this.getChildElementRanked(i + 1), succ)
+ )
+ }
+}
+
+/** A standard element that is executed in pre-order. */
+abstract class StandardPreOrderTree extends StandardTree, PreOrderTree {
+ override predicate last(ControlFlowElement last, Completion c) {
+ last(this.getLastChildElement(), last, c)
+ or
+ this.isLeafElement() and
+ completionIsValidFor(c, this) and
+ last = this
+ }
+
+ override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
+ StandardTree.super.succ(pred, succ, c)
+ or
+ pred = this and
+ first(this.getFirstChildElement(), succ) and
+ completionIsSimple(c)
+ }
+}
+
+/** A standard element that is executed in post-order. */
+abstract class StandardPostOrderTree extends StandardTree, PostOrderTree {
+ override predicate first(ControlFlowElement first) {
+ first(this.getFirstChildElement(), first)
+ or
+ not exists(this.getFirstChildElement()) and
+ first = this
+ }
+
+ override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
+ StandardTree.super.succ(pred, succ, c)
+ or
+ last(this.getLastChildElement(), pred, c) and
+ succ = this and
+ completionIsNormal(c)
+ }
+}
+
+/** An element that is a leaf in the control flow graph. */
+abstract class LeafTree extends PreOrderTree, PostOrderTree {
+ override predicate propagatesAbnormal(ControlFlowElement child) { none() }
+
+ override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) { none() }
+}
+
+/**
+ * Holds if split kinds `sk1` and `sk2` may overlap. That is, they may apply
+ * to at least one common AST node inside `scope`.
+ */
+private predicate overlapping(CfgScope scope, SplitKind sk1, SplitKind sk2) {
+ exists(ControlFlowElement e |
+ sk1.appliesTo(e) and
+ sk2.appliesTo(e) and
+ scope = getCfgScope(e)
+ )
+}
+
+/**
+ * A split kind. Each control flow node can have at most one split of a
+ * given kind.
+ */
+abstract class SplitKind extends SplitKindBase {
+ /** Gets a split of this kind. */
+ SplitImpl getASplit() { result.getKind() = this }
+
+ /** Holds if some split of this kind applies to AST node `n`. */
+ predicate appliesTo(ControlFlowElement n) { this.getASplit().appliesTo(n) }
+
+ /**
+ * Gets a unique integer representing this split kind. The integer is used
+ * to represent sets of splits as ordered lists.
+ */
+ abstract int getListOrder();
+
+ /** Gets the rank of this split kind among all overlapping kinds for `c`. */
+ private int getRank(CfgScope scope) {
+ this = rank[result](SplitKind sk | overlapping(scope, this, sk) | sk order by sk.getListOrder())
+ }
+
+ /**
+ * Holds if this split kind is enabled for AST node `n`. For performance reasons,
+ * the number of splits is restricted by the `maxSplits()` predicate.
+ */
+ predicate isEnabled(ControlFlowElement n) {
+ this.appliesTo(n) and
+ this.getRank(getCfgScope(n)) <= maxSplits()
+ }
+
+ /**
+ * Gets the rank of this split kind among all the split kinds that apply to
+ * AST node `n`. The rank is based on the order defined by `getListOrder()`.
+ */
+ int getListRank(ControlFlowElement n) {
+ this.isEnabled(n) and
+ this = rank[result](SplitKind sk | sk.appliesTo(n) | sk order by sk.getListOrder())
+ }
+
+ /** Gets a textual representation of this split kind. */
+ abstract string toString();
+}
+
+/** Provides the interface for implementing an entity to split on. */
+abstract class SplitImpl extends Split {
+ /** Gets the kind of this split. */
+ abstract SplitKind getKind();
+
+ /**
+ * Holds if this split is entered when control passes from `pred` to `succ` with
+ * completion `c`.
+ *
+ * Invariant: `hasEntry(pred, succ, c) implies succ(pred, succ, c)`.
+ */
+ abstract predicate hasEntry(ControlFlowElement pred, ControlFlowElement succ, Completion c);
+
+ /**
+ * Holds if this split is entered when control passes from `scope` to the entry point
+ * `first`.
+ *
+ * Invariant: `hasEntryScope(scope, first) implies scopeFirst(scope, first)`.
+ */
+ abstract predicate hasEntryScope(CfgScope scope, ControlFlowElement first);
+
+ /**
+ * Holds if this split is left when control passes from `pred` to `succ` with
+ * completion `c`.
+ *
+ * Invariant: `hasExit(pred, succ, c) implies succ(pred, succ, c)`.
+ */
+ abstract predicate hasExit(ControlFlowElement pred, ControlFlowElement succ, Completion c);
+
+ /**
+ * Holds if this split is left when control passes from `last` out of the enclosing
+ * scope `scope` with completion `c`.
+ *
+ * Invariant: `hasExitScope(scope, last, c) implies scopeLast(scope, last, c)`
+ */
+ abstract predicate hasExitScope(CfgScope scope, ControlFlowElement last, Completion c);
+
+ /**
+ * Holds if this split is maintained when control passes from `pred` to `succ` with
+ * completion `c`.
+ *
+ * Invariant: `hasSuccessor(pred, succ, c) implies succ(pred, succ, c)`
+ */
+ abstract predicate hasSuccessor(ControlFlowElement pred, ControlFlowElement succ, Completion c);
+
+ /** Holds if this split applies to control flow element `cfe`. */
+ final predicate appliesTo(ControlFlowElement cfe) {
+ this.hasEntry(_, cfe, _)
+ or
+ this.hasEntryScope(_, cfe)
+ or
+ exists(ControlFlowElement pred | this.appliesTo(pred) | this.hasSuccessor(pred, cfe, _))
+ }
+
+ /** The `succ` relation restricted to predecessors `pred` that this split applies to. */
+ pragma[noinline]
+ final predicate appliesSucc(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
+ this.appliesTo(pred) and
+ succ(pred, succ, c)
+ }
+}
+
+/**
+ * A set of control flow node splits. The set is represented by a list of splits,
+ * ordered by ascending rank.
+ */
+class Splits extends TSplits {
+ /** Gets a textual representation of this set of splits. */
+ string toString() { result = splitsToString(this) }
+
+ /** Gets a split belonging to this set of splits. */
+ SplitImpl getASplit() {
+ exists(SplitImpl head, Splits tail | this = TSplitsCons(head, tail) |
+ result = head
+ or
+ result = tail.getASplit()
+ )
+ }
+}
+
+private predicate succEntrySplitsFromRank(
+ CfgScope pred, ControlFlowElement succ, Splits splits, int rnk
+) {
+ splits = TSplitsNil() and
+ scopeFirst(pred, succ) and
+ rnk = 0
+ or
+ exists(SplitImpl head, Splits tail | succEntrySplitsCons(pred, succ, head, tail, rnk) |
+ splits = TSplitsCons(head, tail)
+ )
+}
+
+private predicate succEntrySplitsCons(
+ CfgScope pred, ControlFlowElement succ, SplitImpl head, Splits tail, int rnk
+) {
+ succEntrySplitsFromRank(pred, succ, tail, rnk - 1) and
+ head.hasEntryScope(pred, succ) and
+ rnk = head.getKind().getListRank(succ)
+}
+
+/**
+ * Holds if `succ` with splits `succSplits` is the first element that is executed
+ * when entering callable `pred`.
+ */
+pragma[noinline]
+private predicate succEntrySplits(
+ CfgScope pred, ControlFlowElement succ, Splits succSplits, SuccessorType t
+) {
+ exists(int rnk |
+ scopeFirst(pred, succ) and
+ successorTypeIsSimple(t) and
+ succEntrySplitsFromRank(pred, succ, succSplits, rnk)
+ |
+ rnk = 0 and
+ not any(SplitImpl split).hasEntryScope(pred, succ)
+ or
+ rnk = max(SplitImpl split | split.hasEntryScope(pred, succ) | split.getKind().getListRank(succ))
+ )
+}
+
+/**
+ * Holds if `pred` with splits `predSplits` can exit the enclosing callable
+ * `succ` with type `t`.
+ */
+private predicate succExitSplits(
+ ControlFlowElement pred, Splits predSplits, CfgScope succ, SuccessorType t
+) {
+ exists(Reachability::SameSplitsBlock b, Completion c | pred = b.getAnElement() |
+ b.isReachable(predSplits) and
+ t = getAMatchingSuccessorType(c) and
+ scopeLast(succ, pred, c) and
+ forall(SplitImpl predSplit | predSplit = predSplits.getASplit() |
+ predSplit.hasExitScope(succ, pred, c)
+ )
+ )
+}
+
+/**
+ * Provides a predicate for the successor relation with split information,
+ * as well as logic used to construct the type `TSplits` representing sets
+ * of splits. Only sets of splits that can be reached are constructed, hence
+ * the predicates are mutually recursive.
+ *
+ * For the successor relation
+ *
+ * ```ql
+ * succSplits(ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, Completion c)
+ * ```
+ *
+ * the following invariants are maintained:
+ *
+ * 1. `pred` is reachable with split set `predSplits`.
+ * 2. For all `split` in `predSplits`:
+ * - If `split.hasSuccessor(pred, succ, c)` then `split` in `succSplits`.
+ * 3. For all `split` in `predSplits`:
+ * - If `split.hasExit(pred, succ, c)` and not `split.hasEntry(pred, succ, c)` then
+ * `split` not in `succSplits`.
+ * 4. For all `split` with kind not in `predSplits`:
+ * - If `split.hasEntry(pred, succ, c)` then `split` in `succSplits`.
+ * 5. For all `split` in `succSplits`:
+ * - `split.hasSuccessor(pred, succ, c)` and `split` in `predSplits`, or
+ * - `split.hasEntry(pred, succ, c)`.
+ *
+ * The algorithm divides into four cases:
+ *
+ * 1. The set of splits for the successor is the same as the set of splits
+ * for the predecessor:
+ * a) The successor is in the same `SameSplitsBlock` as the predecessor.
+ * b) The successor is *not* in the same `SameSplitsBlock` as the predecessor.
+ * 2. The set of splits for the successor is different from the set of splits
+ * for the predecessor:
+ * a) The set of splits for the successor is *maybe* non-empty.
+ * b) The set of splits for the successor is *always* empty.
+ *
+ * Only case 2a may introduce new sets of splits, so only predicates from
+ * this case are used in the definition of `TSplits`.
+ *
+ * The predicates in this module are named after the cases above.
+ */
+private module SuccSplits {
+ private predicate succInvariant1(
+ Reachability::SameSplitsBlock b, ControlFlowElement pred, Splits predSplits,
+ ControlFlowElement succ, Completion c
+ ) {
+ pred = b.getAnElement() and
+ b.isReachable(predSplits) and
+ succ(pred, succ, c)
+ }
+
+ private predicate case1b0(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
+ ) {
+ exists(Reachability::SameSplitsBlock b |
+ // Invariant 1
+ succInvariant1(b, pred, predSplits, succ, c)
+ |
+ (succ = b.getAnElement() implies succ = b) and
+ // Invariant 4
+ not exists(SplitImpl split | split.hasEntry(pred, succ, c))
+ )
+ }
+
+ /**
+ * Case 1b.
+ *
+ * Invariants 1 and 4 hold in the base case, and invariants 2, 3, and 5 are
+ * maintained for all splits in `predSplits` (= `succSplits`), except
+ * possibly for the splits in `except`.
+ *
+ * The predicate is written using explicit recursion, as opposed to a `forall`,
+ * to avoid negative recursion.
+ */
+ private predicate case1bForall(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, Splits except
+ ) {
+ case1b0(pred, predSplits, succ, c) and
+ except = predSplits
+ or
+ exists(SplitImpl split |
+ case1bForallCons(pred, predSplits, succ, c, split, except) and
+ split.hasSuccessor(pred, succ, c)
+ )
+ }
+
+ pragma[noinline]
+ private predicate case1bForallCons(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c,
+ SplitImpl exceptHead, Splits exceptTail
+ ) {
+ case1bForall(pred, predSplits, succ, c, TSplitsCons(exceptHead, exceptTail))
+ }
+
+ private predicate case1(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
+ ) {
+ // Case 1a
+ exists(Reachability::SameSplitsBlock b | succInvariant1(b, pred, predSplits, succ, c) |
+ succ = b.getAnElement() and
+ not succ = b
+ )
+ or
+ // Case 1b
+ case1bForall(pred, predSplits, succ, c, TSplitsNil())
+ }
+
+ pragma[noinline]
+ private SplitImpl succInvariant1GetASplit(
+ Reachability::SameSplitsBlock b, ControlFlowElement pred, Splits predSplits,
+ ControlFlowElement succ, Completion c
+ ) {
+ succInvariant1(b, pred, predSplits, succ, c) and
+ result = predSplits.getASplit()
+ }
+
+ private predicate case2aux(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
+ ) {
+ exists(Reachability::SameSplitsBlock b |
+ succInvariant1(b, pred, predSplits, succ, c) and
+ (succ = b.getAnElement() implies succ = b)
+ |
+ succInvariant1GetASplit(b, pred, predSplits, succ, c).hasExit(pred, succ, c)
+ or
+ any(SplitImpl split).hasEntry(pred, succ, c)
+ )
+ }
+
+ /**
+ * Holds if `succSplits` should not inherit a split of kind `sk` from
+ * `predSplits`, except possibly because of a split in `except`.
+ *
+ * The predicate is written using explicit recursion, as opposed to a `forall`,
+ * to avoid negative recursion.
+ */
+ private predicate case2aNoneInheritedOfKindForall(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk,
+ Splits except
+ ) {
+ case2aux(pred, predSplits, succ, c) and
+ sk.appliesTo(succ) and
+ except = predSplits
+ or
+ exists(Splits mid, SplitImpl split |
+ case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk, mid) and
+ mid = TSplitsCons(split, except)
+ |
+ split.getKind() = any(SplitKind sk0 | sk0 != sk and sk0.appliesTo(succ))
+ or
+ split.hasExit(pred, succ, c)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate entryOfKind(
+ ControlFlowElement pred, ControlFlowElement succ, Completion c, SplitImpl split, SplitKind sk
+ ) {
+ split.hasEntry(pred, succ, c) and
+ sk = split.getKind()
+ }
+
+ /** Holds if `succSplits` should not have a split of kind `sk`. */
+ pragma[nomagic]
+ private predicate case2aNoneOfKind(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk
+ ) {
+ // None inherited from predecessor
+ case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk, TSplitsNil()) and
+ // None newly entered into
+ not entryOfKind(pred, succ, c, _, sk)
+ }
+
+ /** Holds if `succSplits` should not have a split of kind `sk` at rank `rnk`. */
+ pragma[nomagic]
+ private predicate case2aNoneAtRank(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk
+ ) {
+ exists(SplitKind sk | case2aNoneOfKind(pred, predSplits, succ, c, sk) |
+ rnk = sk.getListRank(succ)
+ )
+ }
+
+ pragma[nomagic]
+ private SplitImpl case2auxGetAPredecessorSplit(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
+ ) {
+ case2aux(pred, predSplits, succ, c) and
+ result = predSplits.getASplit()
+ }
+
+ /** Gets a split that should be in `succSplits`. */
+ pragma[nomagic]
+ private SplitImpl case2aSome(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk
+ ) {
+ (
+ // Inherited from predecessor
+ result = case2auxGetAPredecessorSplit(pred, predSplits, succ, c) and
+ result.hasSuccessor(pred, succ, c)
+ or
+ // Newly entered into
+ exists(SplitKind sk0 |
+ case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk0, TSplitsNil())
+ |
+ entryOfKind(pred, succ, c, result, sk0)
+ )
+ ) and
+ sk = result.getKind()
+ }
+
+ /** Gets a split that should be in `succSplits` at rank `rnk`. */
+ pragma[nomagic]
+ SplitImpl case2aSomeAtRank(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk
+ ) {
+ exists(SplitKind sk | result = case2aSome(pred, predSplits, succ, c, sk) |
+ rnk = sk.getListRank(succ)
+ )
+ }
+
+ /**
+ * Case 2a.
+ *
+ * As opposed to the other cases, in this case we need to construct a new set
+ * of splits `succSplits`. Since this involves constructing the very IPA type,
+ * we cannot recurse directly over the structure of `succSplits`. Instead, we
+ * recurse over the ranks of all splits that *might* be in `succSplits`.
+ *
+ * - Invariant 1 holds in the base case,
+ * - invariant 2 holds for all splits with rank at least `rnk`,
+ * - invariant 3 holds for all splits in `predSplits`,
+ * - invariant 4 holds for all splits in `succSplits` with rank at least `rnk`,
+ * and
+ * - invariant 4 holds for all splits in `succSplits` with rank at least `rnk`.
+ */
+ predicate case2aFromRank(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ Completion c, int rnk
+ ) {
+ case2aux(pred, predSplits, succ, c) and
+ succSplits = TSplitsNil() and
+ rnk = max(any(SplitKind sk).getListRank(succ)) + 1
+ or
+ case2aFromRank(pred, predSplits, succ, succSplits, c, rnk + 1) and
+ case2aNoneAtRank(pred, predSplits, succ, c, rnk)
+ or
+ exists(Splits mid, SplitImpl split | split = case2aCons(pred, predSplits, succ, mid, c, rnk) |
+ succSplits = TSplitsCons(split, mid)
+ )
+ }
+
+ pragma[noinline]
+ private SplitImpl case2aCons(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ Completion c, int rnk
+ ) {
+ case2aFromRank(pred, predSplits, succ, succSplits, c, rnk + 1) and
+ result = case2aSomeAtRank(pred, predSplits, succ, c, rnk)
+ }
+
+ /**
+ * Case 2b.
+ *
+ * Invariants 1, 4, and 5 hold in the base case, and invariants 2 and 3 are
+ * maintained for all splits in `predSplits`, except possibly for the splits
+ * in `except`.
+ *
+ * The predicate is written using explicit recursion, as opposed to a `forall`,
+ * to avoid negative recursion.
+ */
+ private predicate case2bForall(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, Splits except
+ ) {
+ // Invariant 1
+ case2aux(pred, predSplits, succ, c) and
+ // Invariants 4 and 5
+ not any(SplitKind sk).appliesTo(succ) and
+ except = predSplits
+ or
+ exists(SplitImpl split | case2bForallCons(pred, predSplits, succ, c, split, except) |
+ // Invariants 2 and 3
+ split.hasExit(pred, succ, c)
+ )
+ }
+
+ pragma[noinline]
+ private predicate case2bForallCons(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c,
+ SplitImpl exceptHead, Splits exceptTail
+ ) {
+ case2bForall(pred, predSplits, succ, c, TSplitsCons(exceptHead, exceptTail))
+ }
+
+ private predicate case2(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ Completion c
+ ) {
+ case2aFromRank(pred, predSplits, succ, succSplits, c, 1)
+ or
+ case2bForall(pred, predSplits, succ, c, TSplitsNil()) and
+ succSplits = TSplitsNil()
+ }
+
+ /**
+ * Holds if `succ` with splits `succSplits` is a successor of type `t` for `pred`
+ * with splits `predSplits`.
+ */
+ predicate succSplits(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ Completion c
+ ) {
+ case1(pred, predSplits, succ, c) and
+ succSplits = predSplits
+ or
+ case2(pred, predSplits, succ, succSplits, c)
+ }
+}
+
+import SuccSplits
+
+/** Provides logic for calculating reachable control flow nodes. */
+private module Reachability {
+ /**
+ * Holds if `cfe` is a control flow element where the set of possible splits may
+ * be different from the set of possible splits for one of `cfe`'s predecessors.
+ * That is, `cfe` starts a new block of elements with the same set of splits.
+ */
+ private predicate startsSplits(ControlFlowElement cfe) {
+ scopeFirst(_, cfe)
+ or
+ exists(SplitImpl s |
+ s.hasEntry(_, cfe, _)
+ or
+ s.hasExit(_, cfe, _)
+ )
+ or
+ exists(ControlFlowElement pred, SplitImpl split, Completion c | succ(pred, cfe, c) |
+ split.appliesTo(pred) and
+ not split.hasSuccessor(pred, cfe, c)
+ )
+ }
+
+ private predicate intraSplitsSucc(ControlFlowElement pred, ControlFlowElement succ) {
+ succ(pred, succ, _) and
+ not startsSplits(succ)
+ }
+
+ private predicate splitsBlockContains(ControlFlowElement start, ControlFlowElement cfe) =
+ fastTC(intraSplitsSucc/2)(start, cfe)
+
+ /**
+ * A block of control flow elements where the set of splits is guaranteed
+ * to remain unchanged, represented by the first element in the block.
+ */
+ class SameSplitsBlock extends ControlFlowElement {
+ SameSplitsBlock() { startsSplits(this) }
+
+ /** Gets a control flow element in this block. */
+ ControlFlowElement getAnElement() {
+ splitsBlockContains(this, result)
+ or
+ result = this
+ }
+
+ pragma[noinline]
+ private SameSplitsBlock getASuccessor(Splits predSplits, Splits succSplits) {
+ exists(ControlFlowElement pred | pred = this.getAnElement() |
+ succSplits(pred, predSplits, result, succSplits, _)
+ )
+ }
+
+ /**
+ * Holds if the elements of this block are reachable from a callable entry
+ * point, with the splits `splits`.
+ */
+ predicate isReachable(Splits splits) {
+ // Base case
+ succEntrySplits(_, this, splits, _)
+ or
+ // Recursive case
+ exists(SameSplitsBlock pred, Splits predSplits | pred.isReachable(predSplits) |
+ this = pred.getASuccessor(predSplits, splits)
+ )
+ }
+ }
+}
+
+cached
+private module Cached {
+ /**
+ * If needed, call this predicate from `ControlFlowGraphImplSpecific.qll` in order to
+ * force a stage-dependency on the `ControlFlowGraphImplShared.qll` stage and therby
+ * collapsing the two stages.
+ */
+ cached
+ predicate forceCachingInSameStage() { any() }
+
+ cached
+ newtype TSplits =
+ TSplitsNil() or
+ TSplitsCons(SplitImpl head, Splits tail) {
+ exists(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk
+ |
+ case2aFromRank(pred, predSplits, succ, tail, c, rnk + 1) and
+ head = case2aSomeAtRank(pred, predSplits, succ, c, rnk)
+ )
+ or
+ succEntrySplitsCons(_, _, head, tail, _)
+ }
+
+ cached
+ string splitsToString(Splits splits) {
+ splits = TSplitsNil() and
+ result = ""
+ or
+ exists(SplitImpl head, Splits tail, string headString, string tailString |
+ splits = TSplitsCons(head, tail)
+ |
+ headString = head.toString() and
+ tailString = tail.toString() and
+ if tailString = ""
+ then result = headString
+ else
+ if headString = ""
+ then result = tailString
+ else result = headString + ", " + tailString
+ )
+ }
+
+ /**
+ * Internal representation of control flow nodes in the control flow graph.
+ * The control flow graph is pruned for unreachable nodes.
+ */
+ cached
+ newtype TNode =
+ TEntryNode(CfgScope scope) { succEntrySplits(scope, _, _, _) } or
+ TAnnotatedExitNode(CfgScope scope, boolean normal) {
+ exists(Reachability::SameSplitsBlock b, SuccessorType t | b.isReachable(_) |
+ succExitSplits(b.getAnElement(), _, scope, t) and
+ if isAbnormalExitType(t) then normal = false else normal = true
+ )
+ } or
+ TExitNode(CfgScope scope) {
+ exists(Reachability::SameSplitsBlock b | b.isReachable(_) |
+ succExitSplits(b.getAnElement(), _, scope, _)
+ )
+ } or
+ TElementNode(ControlFlowElement cfe, Splits splits) {
+ exists(Reachability::SameSplitsBlock b | b.isReachable(splits) | cfe = b.getAnElement())
+ }
+
+ /** Gets a successor node of a given flow type, if any. */
+ cached
+ TNode getASuccessor(TNode pred, SuccessorType t) {
+ // Callable entry node -> callable body
+ exists(ControlFlowElement succElement, Splits succSplits, CfgScope scope |
+ result = TElementNode(succElement, succSplits) and
+ pred = TEntryNode(scope) and
+ succEntrySplits(scope, succElement, succSplits, t)
+ )
+ or
+ exists(ControlFlowElement predElement, Splits predSplits |
+ pred = TElementNode(predElement, predSplits)
+ |
+ // Element node -> callable exit (annotated)
+ exists(CfgScope scope, boolean normal |
+ result = TAnnotatedExitNode(scope, normal) and
+ succExitSplits(predElement, predSplits, scope, t) and
+ if isAbnormalExitType(t) then normal = false else normal = true
+ )
+ or
+ // Element node -> element node
+ exists(ControlFlowElement succElement, Splits succSplits, Completion c |
+ result = TElementNode(succElement, succSplits)
+ |
+ succSplits(predElement, predSplits, succElement, succSplits, c) and
+ t = getAMatchingSuccessorType(c)
+ )
+ )
+ or
+ // Callable exit (annotated) -> callable exit
+ exists(CfgScope scope |
+ pred = TAnnotatedExitNode(scope, _) and
+ result = TExitNode(scope) and
+ successorTypeIsSimple(t)
+ )
+ }
+
+ /**
+ * Gets a first control flow element executed within `cfe`.
+ */
+ cached
+ ControlFlowElement getAControlFlowEntryNode(ControlFlowElement cfe) { first(cfe, result) }
+
+ /**
+ * Gets a potential last control flow element executed within `cfe`.
+ */
+ cached
+ ControlFlowElement getAControlFlowExitNode(ControlFlowElement cfe) { last(cfe, result, _) }
+}
+
+import Cached
+
+/**
+ * Import this module into a `.ql` file of `@kind graph` to render a CFG. The
+ * graph is restricted to nodes from `RelevantNode`.
+ */
+module TestOutput {
+ abstract class RelevantNode extends Node { }
+
+ query predicate nodes(RelevantNode n, string attr, string val) {
+ attr = "semmle.order" and
+ val =
+ any(int i |
+ n =
+ rank[i](RelevantNode p, Location l |
+ l = p.getLocation()
+ |
+ p
+ order by
+ l.getFile().getBaseName(), l.getFile().getAbsolutePath(), l.getStartLine(),
+ l.getStartColumn()
+ )
+ ).toString()
+ }
+
+ query predicate edges(RelevantNode pred, RelevantNode succ, string attr, string val) {
+ exists(SuccessorType t | succ = getASuccessor(pred, t) |
+ attr = "semmle.label" and
+ if successorTypeIsSimple(t) then val = "" else val = t.toString()
+ )
+ }
+}
+
+/** Provides a set of splitting-related consistency queries. */
+module Consistency {
+ query predicate nonUniqueSetRepresentation(Splits s1, Splits s2) {
+ forex(Split s | s = s1.getASplit() | s = s2.getASplit()) and
+ forex(Split s | s = s2.getASplit() | s = s1.getASplit()) and
+ s1 != s2
+ }
+
+ query predicate breakInvariant2(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ SplitImpl split, Completion c
+ ) {
+ succSplits(pred, predSplits, succ, succSplits, c) and
+ split = predSplits.getASplit() and
+ split.hasSuccessor(pred, succ, c) and
+ not split = succSplits.getASplit()
+ }
+
+ query predicate breakInvariant3(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ SplitImpl split, Completion c
+ ) {
+ succSplits(pred, predSplits, succ, succSplits, c) and
+ split = predSplits.getASplit() and
+ split.hasExit(pred, succ, c) and
+ not split.hasEntry(pred, succ, c) and
+ split = succSplits.getASplit()
+ }
+
+ query predicate breakInvariant4(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ SplitImpl split, Completion c
+ ) {
+ succSplits(pred, predSplits, succ, succSplits, c) and
+ split.hasEntry(pred, succ, c) and
+ not split.getKind() = predSplits.getASplit().getKind() and
+ not split = succSplits.getASplit()
+ }
+
+ query predicate breakInvariant5(
+ ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
+ SplitImpl split, Completion c
+ ) {
+ succSplits(pred, predSplits, succ, succSplits, c) and
+ split = succSplits.getASplit() and
+ not (split.hasSuccessor(pred, succ, c) and split = predSplits.getASplit()) and
+ not split.hasEntry(pred, succ, c)
+ }
+
+ query predicate multipleSuccessors(Node node, SuccessorType t, Node successor) {
+ not node instanceof TEntryNode and
+ strictcount(getASuccessor(node, t)) > 1 and
+ successor = getASuccessor(node, t)
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplSpecific.qll
new file mode 100644
index 00000000000..2d018ff616a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/ControlFlowGraphImplSpecific.qll
@@ -0,0 +1,74 @@
+private import ruby as rb
+private import ControlFlowGraphImpl as Impl
+private import Completion as Comp
+private import codeql.ruby.ast.internal.Synthesis
+private import Splitting as Splitting
+private import codeql.ruby.CFG as CFG
+
+/** The base class for `ControlFlowTree`. */
+class ControlFlowTreeBase extends rb::AstNode {
+ ControlFlowTreeBase() { not any(Synthesis s).excludeFromControlFlowTree(this) }
+}
+
+class ControlFlowElement = rb::AstNode;
+
+class Completion = Comp::Completion;
+
+/**
+ * Hold if `c` represents normal evaluation of a statement or an
+ * expression.
+ */
+predicate completionIsNormal(Completion c) { c instanceof Comp::NormalCompletion }
+
+/**
+ * Hold if `c` represents simple (normal) evaluation of a statement or an
+ * expression.
+ */
+predicate completionIsSimple(Completion c) { c instanceof Comp::SimpleCompletion }
+
+/** Holds if `c` is a valid completion for `e`. */
+predicate completionIsValidFor(Completion c, ControlFlowElement e) { c.isValidFor(e) }
+
+class CfgScope = CFG::CfgScope;
+
+predicate getCfgScope = Impl::getCfgScope/1;
+
+/** Holds if `first` is first executed when entering `scope`. */
+predicate scopeFirst(CfgScope scope, ControlFlowElement first) {
+ scope.(Impl::CfgScope::Range_).entry(first)
+}
+
+/** Holds if `scope` is exited when `last` finishes with completion `c`. */
+predicate scopeLast(CfgScope scope, ControlFlowElement last, Completion c) {
+ scope.(Impl::CfgScope::Range_).exit(last, c)
+}
+
+/** The maximum number of splits allowed for a given node. */
+int maxSplits() { result = 5 }
+
+class SplitKindBase = Splitting::TSplitKind;
+
+class Split = Splitting::Split;
+
+class SuccessorType = CFG::SuccessorType;
+
+/** Gets a successor type that matches completion `c`. */
+SuccessorType getAMatchingSuccessorType(Completion c) { result = c.getAMatchingSuccessorType() }
+
+/**
+ * Hold if `c` represents simple (normal) evaluation of a statement or an
+ * expression.
+ */
+predicate successorTypeIsSimple(SuccessorType t) {
+ t instanceof CFG::SuccessorTypes::NormalSuccessor
+}
+
+/** Holds if `t` is an abnormal exit type out of a CFG scope. */
+predicate isAbnormalExitType(SuccessorType t) {
+ t instanceof CFG::SuccessorTypes::RaiseSuccessor or
+ t instanceof CFG::SuccessorTypes::ExitSuccessor
+}
+
+class Location = rb::Location;
+
+class Node = CFG::CfgNode;
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/NonReturning.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/NonReturning.qll
new file mode 100644
index 00000000000..e1927a0b1c9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/NonReturning.qll
@@ -0,0 +1,22 @@
+/** Provides a simple analysis for identifying calls that will not return. */
+
+private import codeql.ruby.AST
+private import Completion
+
+/** A call that definitely does not return (conservative analysis). */
+abstract class NonReturningCall extends MethodCall {
+ /** Gets a valid completion for this non-returning call. */
+ abstract Completion getACompletion();
+}
+
+private class RaiseCall extends NonReturningCall {
+ RaiseCall() { this.getMethodName() = "raise" }
+
+ override RaiseCompletion getACompletion() { not result instanceof NestedCompletion }
+}
+
+private class ExitCall extends NonReturningCall {
+ ExitCall() { this.getMethodName() in ["abort", "exit"] }
+
+ override ExitCompletion getACompletion() { not result instanceof NestedCompletion }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Splitting.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Splitting.qll
new file mode 100644
index 00000000000..dd360fe8371
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/controlflow/internal/Splitting.qll
@@ -0,0 +1,336 @@
+/**
+ * Provides classes and predicates relevant for splitting the control flow graph.
+ */
+
+private import codeql.ruby.AST
+private import Completion
+private import ControlFlowGraphImpl
+private import SuccessorTypes
+private import codeql.ruby.controlflow.ControlFlowGraph
+
+cached
+private module Cached {
+ cached
+ newtype TSplitKind =
+ TConditionalCompletionSplitKind() { forceCachingInSameStage() } or
+ TEnsureSplitKind(int nestLevel) { nestLevel = any(Trees::BodyStmtTree t).getNestLevel() }
+
+ cached
+ newtype TSplit =
+ TConditionalCompletionSplit(ConditionalCompletion c) or
+ TEnsureSplit(EnsureSplitting::EnsureSplitType type, int nestLevel) {
+ nestLevel = any(Trees::BodyStmtTree t).getNestLevel()
+ }
+}
+
+import Cached
+
+/** A split for a control flow node. */
+class Split extends TSplit {
+ /** Gets a textual representation of this split. */
+ string toString() { none() }
+}
+
+private module ConditionalCompletionSplitting {
+ /**
+ * A split for conditional completions. For example, in
+ *
+ * ```rb
+ * def method x
+ * if x < 2 and x > 0
+ * puts "x is 1"
+ * end
+ * end
+ * ```
+ *
+ * we record whether `x < 2` and `x > 0` evaluate to `true` or `false`, and
+ * restrict the edges out of `x < 2 and x > 0` accordingly.
+ */
+ class ConditionalCompletionSplit extends Split, TConditionalCompletionSplit {
+ ConditionalCompletion completion;
+
+ ConditionalCompletionSplit() { this = TConditionalCompletionSplit(completion) }
+
+ override string toString() { result = completion.toString() }
+ }
+
+ private class ConditionalCompletionSplitKind extends SplitKind, TConditionalCompletionSplitKind {
+ override int getListOrder() { result = 0 }
+
+ override predicate isEnabled(AstNode n) { this.appliesTo(n) }
+
+ override string toString() { result = "ConditionalCompletion" }
+ }
+
+ int getNextListOrder() { result = 1 }
+
+ private class ConditionalCompletionSplitImpl extends SplitImpl, ConditionalCompletionSplit {
+ override ConditionalCompletionSplitKind getKind() { any() }
+
+ override predicate hasEntry(AstNode pred, AstNode succ, Completion c) {
+ succ(pred, succ, c) and
+ last(succ, _, completion) and
+ (
+ last(succ.(NotExpr).getOperand(), pred, c) and
+ completion.(BooleanCompletion).getDual() = c
+ or
+ last(succ.(LogicalAndExpr).getAnOperand(), pred, c) and
+ completion = c
+ or
+ last(succ.(LogicalOrExpr).getAnOperand(), pred, c) and
+ completion = c
+ or
+ last(succ.(StmtSequence).getLastStmt(), pred, c) and
+ completion = c
+ or
+ last(succ.(ConditionalExpr).getBranch(_), pred, c) and
+ completion = c
+ )
+ }
+
+ override predicate hasEntryScope(CfgScope scope, AstNode succ) { none() }
+
+ override predicate hasExit(AstNode pred, AstNode succ, Completion c) {
+ this.appliesTo(pred) and
+ succ(pred, succ, c) and
+ if c instanceof ConditionalCompletion then completion = c else any()
+ }
+
+ override predicate hasExitScope(CfgScope scope, AstNode last, Completion c) {
+ this.appliesTo(last) and
+ succExit(scope, last, c) and
+ if c instanceof ConditionalCompletion then completion = c else any()
+ }
+
+ override predicate hasSuccessor(AstNode pred, AstNode succ, Completion c) { none() }
+ }
+}
+
+module EnsureSplitting {
+ /**
+ * The type of a split `ensure` node.
+ *
+ * The type represents one of the possible ways of entering an `ensure`
+ * block. For example, if a block ends with a `return` statement, then
+ * the `ensure` block must end with a `return` as well (provided that
+ * the `ensure` block executes normally).
+ */
+ class EnsureSplitType extends SuccessorType {
+ EnsureSplitType() { not this instanceof ConditionalSuccessor }
+
+ /** Holds if this split type matches entry into an `ensure` block with completion `c`. */
+ predicate isSplitForEntryCompletion(Completion c) {
+ if c instanceof NormalCompletion
+ then
+ // If the entry into the `ensure` block completes with any normal completion,
+ // it simply means normal execution after the `ensure` block
+ this instanceof NormalSuccessor
+ else this = c.getAMatchingSuccessorType()
+ }
+ }
+
+ /** A node that belongs to an `ensure` block. */
+ private class EnsureNode extends AstNode {
+ private Trees::BodyStmtTree block;
+
+ EnsureNode() { this = block.getAnEnsureDescendant() }
+
+ int getNestLevel() { result = block.getNestLevel() }
+
+ /** Holds if this node is the entry node in the `ensure` block it belongs to. */
+ predicate isEntryNode() { first(block.getEnsure(), this) }
+ }
+
+ /**
+ * A split for nodes belonging to an `ensure` block, which determines how to
+ * continue execution after leaving the `ensure` block. For example, in
+ *
+ * ```rb
+ * begin
+ * if x
+ * raise "Exception"
+ * end
+ * ensure
+ * puts "Ensure"
+ * end
+ * ```
+ *
+ * all control flow nodes in the `ensure` block have two splits: one representing
+ * normal execution of the body (when `x` evaluates to `true`), and one representing
+ * exceptional execution of the body (when `x` evaluates to `false`).
+ */
+ class EnsureSplit extends Split, TEnsureSplit {
+ private EnsureSplitType type;
+ private int nestLevel;
+
+ EnsureSplit() { this = TEnsureSplit(type, nestLevel) }
+
+ /**
+ * Gets the type of this `ensure` split, that is, how to continue execution after the
+ * `ensure` block.
+ */
+ EnsureSplitType getType() { result = type }
+
+ /** Gets the nesting level. */
+ int getNestLevel() { result = nestLevel }
+
+ override string toString() {
+ if type instanceof NormalSuccessor
+ then result = ""
+ else
+ if nestLevel > 0
+ then result = "ensure(" + nestLevel + "): " + type.toString()
+ else result = "ensure: " + type.toString()
+ }
+ }
+
+ private int getListOrder(EnsureSplitKind kind) {
+ result = ConditionalCompletionSplitting::getNextListOrder() + kind.getNestLevel()
+ }
+
+ int getNextListOrder() {
+ result = max([getListOrder(_) + 1, ConditionalCompletionSplitting::getNextListOrder()])
+ }
+
+ private class EnsureSplitKind extends SplitKind, TEnsureSplitKind {
+ private int nestLevel;
+
+ EnsureSplitKind() { this = TEnsureSplitKind(nestLevel) }
+
+ /** Gets the nesting level. */
+ int getNestLevel() { result = nestLevel }
+
+ override int getListOrder() { result = getListOrder(this) }
+
+ override string toString() { result = "ensure (" + nestLevel + ")" }
+ }
+
+ pragma[noinline]
+ private predicate hasEntry0(AstNode pred, EnsureNode succ, int nestLevel, Completion c) {
+ succ.isEntryNode() and
+ nestLevel = succ.getNestLevel() and
+ succ(pred, succ, c)
+ }
+
+ private class EnsureSplitImpl extends SplitImpl, EnsureSplit {
+ override EnsureSplitKind getKind() { result.getNestLevel() = this.getNestLevel() }
+
+ override predicate hasEntry(AstNode pred, AstNode succ, Completion c) {
+ hasEntry0(pred, succ, this.getNestLevel(), c) and
+ this.getType().isSplitForEntryCompletion(c)
+ }
+
+ override predicate hasEntryScope(CfgScope scope, AstNode first) { none() }
+
+ /**
+ * Holds if this split applies to `pred`, where `pred` is a valid predecessor.
+ */
+ private predicate appliesToPredecessor(AstNode pred) {
+ this.appliesTo(pred) and
+ (succ(pred, _, _) or succExit(_, pred, _))
+ }
+
+ pragma[noinline]
+ private predicate exit0(AstNode pred, Trees::BodyStmtTree block, int nestLevel, Completion c) {
+ this.appliesToPredecessor(pred) and
+ nestLevel = block.getNestLevel() and
+ block.lastInner(pred, c)
+ }
+
+ /**
+ * Holds if `pred` may exit this split with completion `c`. The Boolean
+ * `inherited` indicates whether `c` is an inherited completion from the
+ * body.
+ */
+ private predicate exit(Trees::BodyStmtTree block, AstNode pred, Completion c, boolean inherited) {
+ exists(EnsureSplitType type |
+ exit0(pred, block, this.getNestLevel(), c) and
+ type = this.getType()
+ |
+ if last(block.getEnsure(), pred, c)
+ then
+ // `ensure` block can itself exit with completion `c`: either `c` must
+ // match this split, `c` must be an abnormal completion, or this split
+ // does not require another completion to be recovered
+ inherited = false and
+ (
+ type = c.getAMatchingSuccessorType()
+ or
+ not c instanceof NormalCompletion
+ or
+ type instanceof NormalSuccessor
+ )
+ else (
+ // `ensure` block can exit with inherited completion `c`, which must
+ // match this split
+ inherited = true and
+ type = c.getAMatchingSuccessorType() and
+ not type instanceof NormalSuccessor
+ )
+ )
+ or
+ // If this split is normal, and an outer split can exit based on an inherited
+ // completion, we need to exit this split as well. For example, in
+ //
+ // ```rb
+ // def m(b1, b2)
+ // if b1
+ // return
+ // end
+ // ensure
+ // begin
+ // if b2
+ // raise "Exception"
+ // end
+ // ensure
+ // puts "inner ensure"
+ // end
+ // end
+ // ```
+ //
+ // if the outer split for `puts "inner ensure"` is `return` and the inner split
+ // is "normal" (corresponding to `b1 = true` and `b2 = false`), then the inner
+ // split must be able to exit with a `return` completion.
+ this.appliesToPredecessor(pred) and
+ exists(EnsureSplitImpl outer |
+ outer.getNestLevel() = this.getNestLevel() - 1 and
+ outer.exit(_, pred, c, inherited) and
+ this.getType() instanceof NormalSuccessor and
+ inherited = true
+ )
+ }
+
+ override predicate hasExit(AstNode pred, AstNode succ, Completion c) {
+ succ(pred, succ, c) and
+ (
+ exit(_, pred, c, _)
+ or
+ exit(_, pred, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion(), _)
+ )
+ }
+
+ override predicate hasExitScope(CfgScope scope, AstNode last, Completion c) {
+ succExit(scope, last, c) and
+ (
+ exit(_, last, c, _)
+ or
+ exit(_, last, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion(), _)
+ )
+ }
+
+ override predicate hasSuccessor(AstNode pred, AstNode succ, Completion c) {
+ this.appliesToPredecessor(pred) and
+ succ(pred, succ, c) and
+ succ =
+ any(EnsureNode en |
+ if en.isEntryNode()
+ then
+ // entering a nested `ensure` block
+ en.getNestLevel() > this.getNestLevel()
+ else
+ // staying in the same (possibly nested) `ensure` block as `pred`
+ en.getNestLevel() >= this.getNestLevel()
+ )
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/BarrierGuards.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/BarrierGuards.qll
new file mode 100644
index 00000000000..0c0ca749eac
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/BarrierGuards.qll
@@ -0,0 +1,75 @@
+/** Provides commonly used barriers to dataflow. */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.CFG
+
+/**
+ * A validation of value by comparing with a constant string value, for example
+ * in:
+ *
+ * ```rb
+ * dir = params[:order]
+ * dir = "DESC" unless dir == "ASC"
+ * User.order("name #{dir}")
+ * ```
+ *
+ * the equality operation guards against `dir` taking arbitrary values when used
+ * in the `order` call.
+ */
+class StringConstCompare extends DataFlow::BarrierGuard,
+ CfgNodes::ExprNodes::ComparisonOperationCfgNode {
+ private CfgNode checkedNode;
+ // The value of the condition that results in the node being validated.
+ private boolean checkedBranch;
+
+ StringConstCompare() {
+ exists(CfgNodes::ExprNodes::StringLiteralCfgNode strLitNode |
+ this.getExpr() instanceof EqExpr and checkedBranch = true
+ or
+ this.getExpr() instanceof CaseEqExpr and checkedBranch = true
+ or
+ this.getExpr() instanceof NEExpr and checkedBranch = false
+ |
+ this.getLeftOperand() = strLitNode and this.getRightOperand() = checkedNode
+ or
+ this.getLeftOperand() = checkedNode and this.getRightOperand() = strLitNode
+ )
+ }
+
+ override predicate checks(CfgNode expr, boolean branch) {
+ expr = checkedNode and branch = checkedBranch
+ }
+}
+
+/**
+ * A validation of a value by checking for inclusion in an array of string
+ * literal values, for example in:
+ *
+ * ```rb
+ * name = params[:user_name]
+ * if %w(alice bob charlie).include? name
+ * User.find_by("username = #{name}")
+ * end
+ * ```
+ *
+ * the `include?` call guards against `name` taking arbitrary values when used
+ * in the `find_by` call.
+ */
+//
+class StringConstArrayInclusionCall extends DataFlow::BarrierGuard,
+ CfgNodes::ExprNodes::MethodCallCfgNode {
+ private CfgNode checkedNode;
+
+ StringConstArrayInclusionCall() {
+ exists(ArrayLiteral aLit |
+ this.getExpr().getMethodName() = "include?" and
+ this.getExpr().getReceiver() = aLit
+ |
+ forall(Expr elem | elem = aLit.getAnElement() | elem instanceof StringLiteral) and
+ this.getArgument(0) = checkedNode
+ )
+ }
+
+ override predicate checks(CfgNode expr, boolean branch) { expr = checkedNode and branch = true }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll
new file mode 100644
index 00000000000..ddd44329317
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll
@@ -0,0 +1,125 @@
+/** Provides classes and predicates for defining flow summaries. */
+
+import ruby
+import codeql.ruby.DataFlow
+private import internal.FlowSummaryImpl as Impl
+private import internal.DataFlowDispatch
+
+// import all instances below
+private module Summaries { }
+
+class SummaryComponent = Impl::Public::SummaryComponent;
+
+/** Provides predicates for constructing summary components. */
+module SummaryComponent {
+ private import Impl::Public::SummaryComponent as SC
+
+ predicate parameter = SC::parameter/1;
+
+ predicate argument = SC::argument/1;
+
+ predicate content = SC::content/1;
+
+ /** Gets a summary component that represents a qualifier. */
+ SummaryComponent qualifier() { result = argument(-1) }
+
+ /** Gets a summary component that represents a block argument. */
+ SummaryComponent block() { result = argument(-2) }
+
+ /** Gets a summary component that represents the return value of a call. */
+ SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) }
+}
+
+class SummaryComponentStack = Impl::Public::SummaryComponentStack;
+
+/** Provides predicates for constructing stacks of summary components. */
+module SummaryComponentStack {
+ private import Impl::Public::SummaryComponentStack as SCS
+
+ predicate singleton = SCS::singleton/1;
+
+ predicate push = SCS::push/2;
+
+ predicate argument = SCS::argument/1;
+
+ /** Gets a singleton stack representing a qualifier. */
+ SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) }
+
+ /** Gets a singleton stack representing a block argument. */
+ SummaryComponentStack block() { result = singleton(SummaryComponent::block()) }
+
+ /** Gets a singleton stack representing the return value of a call. */
+ SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
+}
+
+/** A callable with a flow summary, identified by a unique string. */
+abstract class SummarizedCallable extends LibraryCallable {
+ bindingset[this]
+ SummarizedCallable() { any() }
+
+ /**
+ * Holds if data may flow from `input` to `output` through this callable.
+ *
+ * `preservesValue` indicates whether this is a value-preserving step
+ * or a taint-step.
+ *
+ * Input specifications are restricted to stacks that end with
+ * `SummaryComponent::argument(_)`, preceded by zero or more
+ * `SummaryComponent::return()` or `SummaryComponent::content(_)` components.
+ *
+ * Output specifications are restricted to stacks that end with
+ * `SummaryComponent::return()` or `SummaryComponent::argument(_)`.
+ *
+ * Output stacks ending with `SummaryComponent::return()` can be preceded by zero
+ * or more `SummaryComponent::content(_)` components.
+ *
+ * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an
+ * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded
+ * by zero or more `SummaryComponent::content(_)` components.
+ */
+ pragma[nomagic]
+ predicate propagatesFlow(
+ SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ ) {
+ none()
+ }
+
+ /**
+ * Same as
+ *
+ * ```ql
+ * propagatesFlow(
+ * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ * )
+ * ```
+ *
+ * but uses an external (string) representation of the input and output stacks.
+ */
+ pragma[nomagic]
+ predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() }
+
+ /**
+ * Holds if values stored inside `content` are cleared on objects passed as
+ * the `i`th argument to this callable.
+ */
+ pragma[nomagic]
+ predicate clearsContent(int i, DataFlow::Content content) { none() }
+}
+
+private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable {
+ private SummarizedCallable sc;
+
+ SummarizedCallableAdapter() { this = TLibraryCallable(sc) }
+
+ final override predicate propagatesFlow(
+ SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ ) {
+ sc.propagatesFlow(input, output, preservesValue)
+ }
+
+ final override predicate clearsContent(int i, DataFlow::Content content) {
+ sc.clearsContent(i, content)
+ }
+}
+
+class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/RemoteFlowSources.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/RemoteFlowSources.qll
new file mode 100644
index 00000000000..617bfd8678e
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/RemoteFlowSources.qll
@@ -0,0 +1,37 @@
+/**
+ * Provides an extension point for for modeling user-controlled data.
+ * Such data is often used as data-flow sources in security queries.
+ */
+
+private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlow
+// Need to import since frameworks can extend `RemoteFlowSource::Range`
+private import codeql.ruby.Frameworks
+
+/**
+ * A data flow source of remote user input.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `RemoteFlowSource::Range` instead.
+ */
+class RemoteFlowSource extends DataFlow::Node {
+ RemoteFlowSource::Range self;
+
+ RemoteFlowSource() { this = self }
+
+ /** Gets a string that describes the type of this remote flow source. */
+ string getSourceType() { result = self.getSourceType() }
+}
+
+/** Provides a class for modeling new sources of remote user input. */
+module RemoteFlowSource {
+ /**
+ * A data flow source of remote user input.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `RemoteFlowSource` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /** Gets a string that describes the type of this remote flow source. */
+ abstract string getSourceType();
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/SSA.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/SSA.qll
new file mode 100644
index 00000000000..dedfcd4e3de
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/SSA.qll
@@ -0,0 +1,385 @@
+/**
+ * Provides the module `Ssa` for working with static single assignment (SSA) form.
+ */
+
+/**
+ * Provides classes for working with static single assignment (SSA) form.
+ */
+module Ssa {
+ private import codeql.Locations
+ private import codeql.ruby.CFG
+ private import codeql.ruby.ast.Variable
+ private import internal.SsaImplCommon as SsaImplCommon
+ private import internal.SsaImpl as SsaImpl
+ private import CfgNodes::ExprNodes
+
+ /** A static single assignment (SSA) definition. */
+ class Definition extends SsaImplCommon::Definition {
+ /**
+ * Gets the control flow node of this SSA definition, if any. Phi nodes are
+ * examples of SSA definitions without a control flow node, as they are
+ * modelled at index `-1` in the relevant basic block.
+ */
+ final CfgNode getControlFlowNode() {
+ exists(BasicBlock bb, int i | this.definesAt(_, bb, i) | result = bb.getNode(i))
+ }
+
+ /**
+ * Gets a control-flow node that reads the value of this SSA definition.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b # defines b_0
+ * i = 0 # defines i_0
+ * puts i # reads i_0
+ * puts i + 1 # reads i_0
+ * if b # reads b_0
+ * i = 1 # defines i_1
+ * puts i # reads i_1
+ * puts i + 1 # reads i_1
+ * else
+ * i = 2 # defines i_2
+ * puts i # reads i_2
+ * puts i + 1 # reads i_2
+ * end
+ * # defines i_3 = phi(i_1, i_2)
+ * puts i # reads i3
+ * end
+ * ```
+ */
+ final VariableReadAccessCfgNode getARead() { result = SsaImpl::getARead(this) }
+
+ /**
+ * Gets a first control-flow node that reads the value of this SSA definition.
+ * That is, a read that can be reached from this definition without passing
+ * through other reads.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b # defines b_0
+ * i = 0 # defines i_0
+ * puts i # first read of i_0
+ * puts i + 1
+ * if b # first read of b_0
+ * i = 1 # defines i_1
+ * puts i # first read of i_1
+ * puts i + 1
+ * else
+ * i = 2 # defines i_2
+ * puts i # first read of i_2
+ * puts i + 1
+ * end
+ * # defines i_3 = phi(i_1, i_2)
+ * puts i # first read of i3
+ * end
+ * ```
+ */
+ final VariableReadAccessCfgNode getAFirstRead() { SsaImpl::firstRead(this, result) }
+
+ /**
+ * Gets a last control-flow node that reads the value of this SSA definition.
+ * That is, a read that can reach the end of the enclosing CFG scope, or another
+ * SSA definition for the source variable, without passing through any other read.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b # defines b_0
+ * i = 0 # defines i_0
+ * puts i
+ * puts i + 1 # last read of i_0
+ * if b # last read of b_0
+ * i = 1 # defines i_1
+ * puts i
+ * puts i + 1 # last read of i_1
+ * else
+ * i = 2 # defines i_2
+ * puts i
+ * puts i + 1 # last read of i_2
+ * end
+ * # defines i_3 = phi(i_1, i_2)
+ * puts i # last read of i3
+ * end
+ * ```
+ */
+ final VariableReadAccessCfgNode getALastRead() { SsaImpl::lastRead(this, result) }
+
+ /**
+ * Holds if `read1` and `read2` are adjacent reads of this SSA definition.
+ * That is, `read2` can be reached from `read1` without passing through
+ * another read.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * i = 0 # defines i_0
+ * puts i # reads i_0 (read1)
+ * puts i + 1 # reads i_0 (read2)
+ * if b
+ * i = 1 # defines i_1
+ * puts i # reads i_1 (read1)
+ * puts i + 1 # reads i_1 (read2)
+ * else
+ * i = 2 # defines i_2
+ * puts i # reads i_2 (read1)
+ * puts i + 1 # reads i_2 (read2)
+ * end
+ * puts i
+ * end
+ * ```
+ */
+ final predicate hasAdjacentReads(
+ VariableReadAccessCfgNode read1, VariableReadAccessCfgNode read2
+ ) {
+ SsaImpl::adjacentReadPair(this, read1, read2)
+ }
+
+ /**
+ * Gets an SSA definition whose value can flow to this one in one step. This
+ * includes inputs to phi nodes and the prior definitions of uncertain writes.
+ */
+ private Definition getAPhiInputOrPriorDefinition() {
+ result = this.(PhiNode).getAnInput() or
+ result = this.(CapturedCallDefinition).getPriorDefinition()
+ }
+
+ /**
+ * Gets a definition that ultimately defines this SSA definition and is
+ * not itself a phi node.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * i = 0 # defines i_0
+ * puts i
+ * puts i + 1
+ * if b
+ * i = 1 # defines i_1
+ * puts i
+ * puts i + 1
+ * else
+ * i = 2 # defines i_2
+ * puts i
+ * puts i + 1
+ * end
+ * # defines i_3 = phi(i_1, i_2); ultimate definitions are i_1 and i_2
+ * puts i
+ * end
+ * ```
+ */
+ final Definition getAnUltimateDefinition() {
+ result = this.getAPhiInputOrPriorDefinition*() and
+ not result instanceof PhiNode
+ }
+
+ override string toString() { result = this.getControlFlowNode().toString() }
+
+ /** Gets the location of this SSA definition. */
+ Location getLocation() { result = this.getControlFlowNode().getLocation() }
+ }
+
+ /**
+ * An SSA definition that corresponds to a write. For example `x = 10` in
+ *
+ * ```rb
+ * x = 10
+ * puts x
+ * ```
+ */
+ class WriteDefinition extends Definition, SsaImplCommon::WriteDefinition {
+ private VariableWriteAccess write;
+
+ WriteDefinition() {
+ exists(BasicBlock bb, int i, Variable v |
+ this.definesAt(v, bb, i) and
+ SsaImpl::variableWriteActual(bb, i, v, write)
+ )
+ }
+
+ /** Gets the underlying write access. */
+ final VariableWriteAccess getWriteAccess() { result = write }
+
+ /**
+ * Holds if this SSA definition represents a direct assignment of `value`
+ * to the underlying variable.
+ */
+ predicate assigns(CfgNodes::ExprCfgNode value) {
+ exists(CfgNodes::ExprNodes::AssignExprCfgNode a, BasicBlock bb, int i |
+ this.definesAt(_, bb, i) and
+ a = bb.getNode(i) and
+ value = a.getRhs()
+ )
+ }
+
+ final override string toString() { result = Definition.super.toString() }
+
+ final override Location getLocation() { result = this.getControlFlowNode().getLocation() }
+ }
+
+ /**
+ * An SSA definition inserted at the beginning of a scope to represent an
+ * uninitialized local variable. For example, in
+ *
+ * ```rb
+ * def m
+ * x = 10 if b
+ * puts x
+ * end
+ * ```
+ *
+ * since the assignment to `x` is conditional, an unitialized definition for
+ * `x` is inserted at the start of `m`.
+ */
+ class UninitializedDefinition extends Definition, SsaImplCommon::WriteDefinition {
+ UninitializedDefinition() {
+ exists(BasicBlock bb, int i, Variable v |
+ this.definesAt(v, bb, i) and
+ SsaImpl::uninitializedWrite(bb, i, v)
+ )
+ }
+
+ final override string toString() { result = "" }
+
+ final override Location getLocation() { result = this.getBasicBlock().getLocation() }
+ }
+
+ /**
+ * An SSA definition inserted at the beginning of a scope to represent a
+ * captured local variable. For example, in
+ *
+ * ```rb
+ * def m x
+ * y = 0
+ * x.times do |x|
+ * y += x
+ * end
+ * return y
+ * end
+ * ```
+ *
+ * an entry definition for `y` is inserted at the start of the `do` block.
+ */
+ class CapturedEntryDefinition extends Definition, SsaImplCommon::WriteDefinition {
+ CapturedEntryDefinition() {
+ exists(BasicBlock bb, int i, Variable v |
+ this.definesAt(v, bb, i) and
+ SsaImpl::capturedEntryWrite(bb, i, v)
+ )
+ }
+
+ final override string toString() { result = "" }
+
+ override Location getLocation() { result = this.getBasicBlock().getLocation() }
+ }
+
+ /**
+ * An SSA definition inserted at a call that may update the value of a captured
+ * variable. For example, in
+ *
+ * ```rb
+ * def m x
+ * y = 0
+ * x.times do |x|
+ * y += x
+ * end
+ * return y
+ * end
+ * ```
+ *
+ * a definition for `y` is inserted at the call to `times`.
+ */
+ class CapturedCallDefinition extends Definition, SsaImplCommon::UncertainWriteDefinition {
+ CapturedCallDefinition() {
+ exists(Variable v, BasicBlock bb, int i |
+ this.definesAt(v, bb, i) and
+ SsaImpl::capturedCallWrite(bb, i, v)
+ )
+ }
+
+ /**
+ * Gets the immediately preceding definition. Since this update is uncertain,
+ * the value from the preceding definition might still be valid.
+ */
+ final Definition getPriorDefinition() { result = SsaImpl::uncertainWriteDefinitionInput(this) }
+
+ override string toString() { result = this.getControlFlowNode().toString() }
+ }
+
+ /**
+ * A phi node. For example, in
+ *
+ * ```rb
+ * if b
+ * x = 0
+ * else
+ * x = 1
+ * end
+ * puts x
+ * ```
+ *
+ * a phi node for `x` is inserted just before the call `puts x`.
+ */
+ class PhiNode extends Definition, SsaImplCommon::PhiNode {
+ /**
+ * Gets an input of this phi node.
+ *
+ * Example:
+ *
+ * ```rb
+ * def m b
+ * i = 0 # defines i_0
+ * puts i
+ * puts i + 1
+ * if b
+ * i = 1 # defines i_1
+ * puts i
+ * puts i + 1
+ * else
+ * i = 2 # defines i_2
+ * puts i
+ * puts i + 1
+ * end
+ * # defines i_3 = phi(i_1, i_2); inputs are i_1 and i_2
+ * puts i
+ * end
+ * ```
+ */
+ final Definition getAnInput() { this.hasInputFromBlock(result, _) }
+
+ /** Holds if `inp` is an input to this phi node along the edge originating in `bb`. */
+ predicate hasInputFromBlock(Definition inp, BasicBlock bb) {
+ inp = SsaImpl::phiHasInputFromBlock(this, bb)
+ }
+
+ private string getSplitString() {
+ result = this.getBasicBlock().getFirstNode().(CfgNodes::AstCfgNode).getSplitsString()
+ }
+
+ override string toString() {
+ exists(string prefix |
+ prefix = "[" + this.getSplitString() + "] "
+ or
+ not exists(this.getSplitString()) and
+ prefix = ""
+ |
+ result = prefix + "phi"
+ )
+ }
+
+ /*
+ * The location of a phi node is the same as the location of the first node
+ * in the basic block in which it is defined.
+ *
+ * Strictly speaking, the node is *before* the first node, but such a location
+ * does not exist in the source program.
+ */
+
+ final override Location getLocation() {
+ result = this.getBasicBlock().getFirstNode().getLocation()
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll
new file mode 100644
index 00000000000..d3cddf8a3a0
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll
@@ -0,0 +1,459 @@
+private import ruby
+private import codeql.ruby.CFG
+private import DataFlowPrivate
+private import codeql.ruby.typetracking.TypeTracker
+private import codeql.ruby.ast.internal.Module
+private import FlowSummaryImpl as FlowSummaryImpl
+private import codeql.ruby.dataflow.FlowSummary
+
+newtype TReturnKind =
+ TNormalReturnKind() or
+ TBreakReturnKind()
+
+/**
+ * Gets a node that can read the value returned from `call` with return kind
+ * `kind`.
+ */
+OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
+
+/**
+ * A return kind. A return kind describes how a value can be returned
+ * from a callable.
+ */
+abstract class ReturnKind extends TReturnKind {
+ /** Gets a textual representation of this position. */
+ abstract string toString();
+}
+
+/**
+ * A value returned from a callable using a `return` statement or an expression
+ * body, that is, a "normal" return.
+ */
+class NormalReturnKind extends ReturnKind, TNormalReturnKind {
+ override string toString() { result = "return" }
+}
+
+/**
+ * A value returned from a callable using a `break` statement.
+ */
+class BreakReturnKind extends ReturnKind, TBreakReturnKind {
+ override string toString() { result = "break" }
+}
+
+/** A callable defined in library code, identified by a unique string. */
+abstract class LibraryCallable extends string {
+ bindingset[this]
+ LibraryCallable() { any() }
+
+ /** Gets a call to this library callable. */
+ abstract Call getACall();
+}
+
+/**
+ * A callable. This includes callables from source code, as well as callables
+ * defined in library code.
+ */
+class DataFlowCallable extends TDataFlowCallable {
+ /** Gets the underlying source code callable, if any. */
+ Callable asCallable() { this = TCfgScope(result) }
+
+ /** Gets the underlying library callable, if any. */
+ LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
+
+ /** Gets a textual representation of this callable. */
+ string toString() { result = [this.asCallable().toString(), this.asLibraryCallable()] }
+
+ /** Gets the location of this callable. */
+ Location getLocation() { result = this.asCallable().getLocation() }
+}
+
+/**
+ * A call. This includes calls from source code, as well as call(back)s
+ * inside library callables with a flow summary.
+ */
+class DataFlowCall extends TDataFlowCall {
+ /** Gets the enclosing callable. */
+ DataFlowCallable getEnclosingCallable() { none() }
+
+ /** Gets the underlying source code call, if any. */
+ CfgNodes::ExprNodes::CallCfgNode asCall() { none() }
+
+ /** Gets a textual representation of this call. */
+ string toString() { none() }
+
+ /** Gets the location of this call. */
+ Location getLocation() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * A synthesized call inside a callable with a flow summary.
+ *
+ * For example, in
+ * ```rb
+ * ints.each do |i|
+ * puts i
+ * end
+ * ```
+ *
+ * there is a call to the block argument inside `each`.
+ */
+class SummaryCall extends DataFlowCall, TSummaryCall {
+ private FlowSummaryImpl::Public::SummarizedCallable c;
+ private DataFlow::Node receiver;
+
+ SummaryCall() { this = TSummaryCall(c, receiver) }
+
+ /** Gets the data flow node that this call targets. */
+ DataFlow::Node getReceiver() { result = receiver }
+
+ override DataFlowCallable getEnclosingCallable() { result = c }
+
+ override string toString() { result = "[summary] call to " + receiver + " in " + c }
+
+ override Location getLocation() { result = c.getLocation() }
+}
+
+private class NormalCall extends DataFlowCall, TNormalCall {
+ private CfgNodes::ExprNodes::CallCfgNode c;
+
+ NormalCall() { this = TNormalCall(c) }
+
+ override CfgNodes::ExprNodes::CallCfgNode asCall() { result = c }
+
+ override DataFlowCallable getEnclosingCallable() { result = TCfgScope(c.getScope()) }
+
+ override string toString() { result = c.toString() }
+
+ override Location getLocation() { result = c.getLocation() }
+}
+
+pragma[nomagic]
+private predicate methodCall(
+ CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode, string method
+) {
+ exists(DataFlow::Node nodeTo |
+ method = call.getExpr().(MethodCall).getMethodName() and
+ nodeTo.asExpr() = call.getReceiver() and
+ sourceNode.flowsTo(nodeTo)
+ )
+}
+
+private Block yieldCall(CfgNodes::ExprNodes::CallCfgNode call) {
+ call.getExpr() instanceof YieldCall and
+ exists(BlockParameterNode node |
+ node = trackBlock(result) and
+ node.getMethod() = call.getExpr().getEnclosingMethod()
+ )
+}
+
+pragma[nomagic]
+private predicate superCall(CfgNodes::ExprNodes::CallCfgNode call, Module superClass, string method) {
+ call.getExpr() instanceof SuperCall and
+ exists(Module tp |
+ tp = call.getExpr().getEnclosingModule().getModule() and
+ superClass = tp.getSuperClass() and
+ method = call.getExpr().getEnclosingMethod().getName()
+ )
+}
+
+pragma[nomagic]
+private predicate instanceMethodCall(CfgNodes::ExprNodes::CallCfgNode call, Module tp, string method) {
+ exists(DataFlow::LocalSourceNode sourceNode |
+ methodCall(call, sourceNode, method) and
+ sourceNode = trackInstance(tp)
+ )
+}
+
+cached
+private module Cached {
+ cached
+ newtype TDataFlowCallable =
+ TCfgScope(CfgScope scope) or
+ TLibraryCallable(LibraryCallable callable)
+
+ cached
+ newtype TDataFlowCall =
+ TNormalCall(CfgNodes::ExprNodes::CallCfgNode c) or
+ TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, DataFlow::Node receiver) {
+ FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
+ }
+
+ cached
+ CfgScope getTarget(CfgNodes::ExprNodes::CallCfgNode call) {
+ // Temporarily disable operation resolution (due to bad performance)
+ not call.getExpr() instanceof Operation and
+ (
+ exists(string method |
+ exists(Module tp |
+ instanceMethodCall(call, tp, method) and
+ result = lookupMethod(tp, method) and
+ if result.(Method).isPrivate()
+ then
+ exists(Self self |
+ self = call.getReceiver().getExpr() and
+ pragma[only_bind_out](self.getEnclosingModule().getModule().getSuperClass*()) =
+ pragma[only_bind_out](result.getEnclosingModule().getModule())
+ ) and
+ // For now, we restrict the scope of top-level declarations to their file.
+ // This may remove some plausible targets, but also removes a lot of
+ // implausible targets
+ if result.getEnclosingModule() instanceof Toplevel
+ then result.getFile() = call.getFile()
+ else any()
+ else any()
+ )
+ or
+ exists(DataFlow::LocalSourceNode sourceNode |
+ methodCall(call, sourceNode, method) and
+ sourceNode = trackSingletonMethod(result, method)
+ )
+ )
+ or
+ exists(Module superClass, string method |
+ superCall(call, superClass, method) and
+ result = lookupMethod(superClass, method)
+ )
+ or
+ result = yieldCall(call)
+ )
+ }
+}
+
+import Cached
+
+private DataFlow::LocalSourceNode trackInstance(Module tp, TypeTracker t) {
+ t.start() and
+ (
+ result.asExpr().getExpr() instanceof NilLiteral and tp = TResolved("NilClass")
+ or
+ result.asExpr().getExpr().(BooleanLiteral).isFalse() and tp = TResolved("FalseClass")
+ or
+ result.asExpr().getExpr().(BooleanLiteral).isTrue() and tp = TResolved("TrueClass")
+ or
+ result.asExpr().getExpr() instanceof IntegerLiteral and tp = TResolved("Integer")
+ or
+ result.asExpr().getExpr() instanceof FloatLiteral and tp = TResolved("Float")
+ or
+ result.asExpr().getExpr() instanceof RationalLiteral and tp = TResolved("Rational")
+ or
+ result.asExpr().getExpr() instanceof ComplexLiteral and tp = TResolved("Complex")
+ or
+ result.asExpr().getExpr() instanceof StringlikeLiteral and tp = TResolved("String")
+ or
+ exists(ConstantReadAccess array, MethodCall mc |
+ result.asExpr().getExpr() = mc and
+ mc.getMethodName() = "[]" and
+ mc.getReceiver() = array and
+ array.getName() = "Array" and
+ array.hasGlobalScope() and
+ tp = TResolved("Array")
+ )
+ or
+ result.asExpr().getExpr() instanceof HashLiteral and tp = TResolved("Hash")
+ or
+ result.asExpr().getExpr() instanceof MethodBase and tp = TResolved("Symbol")
+ or
+ result.asParameter() instanceof BlockParameter and tp = TResolved("Proc")
+ or
+ result.asExpr().getExpr() instanceof Lambda and tp = TResolved("Proc")
+ or
+ exists(CfgNodes::ExprNodes::CallCfgNode call, DataFlow::Node nodeTo |
+ call.getExpr().(MethodCall).getMethodName() = "new" and
+ nodeTo.asExpr() = call.getReceiver() and
+ trackModule(tp).flowsTo(nodeTo) and
+ result.asExpr() = call
+ )
+ or
+ // `self` in method
+ exists(Self self, Method enclosing |
+ self = result.asExpr().getExpr() and
+ enclosing = self.getEnclosingMethod() and
+ tp = enclosing.getEnclosingModule().getModule() and
+ not self.getEnclosingModule().getEnclosingMethod() = enclosing
+ )
+ or
+ // `self` in singleton method
+ exists(Self self, MethodBase enclosing |
+ self = result.asExpr().getExpr() and
+ flowsToSingletonMethodObject(trackInstance(tp), enclosing) and
+ enclosing = self.getEnclosingMethod() and
+ not self.getEnclosingModule().getEnclosingMethod() = enclosing
+ )
+ or
+ // `self` in top-level
+ exists(Self self, Toplevel enclosing |
+ self = result.asExpr().getExpr() and
+ enclosing = self.getEnclosingModule() and
+ tp = TResolved("Object") and
+ not self.getEnclosingMethod().getEnclosingModule() = enclosing
+ )
+ or
+ // a module or class
+ exists(Module m |
+ result = trackModule(m) and
+ if m.isClass() then tp = TResolved("Class") else tp = TResolved("Module")
+ )
+ )
+ or
+ exists(TypeTracker t2, StepSummary summary |
+ result = trackInstanceRec(tp, t2, summary) and t = t2.append(summary)
+ )
+}
+
+pragma[nomagic]
+private DataFlow::LocalSourceNode trackInstanceRec(Module tp, TypeTracker t, StepSummary summary) {
+ StepSummary::step(trackInstance(tp, t), result, summary)
+}
+
+private DataFlow::LocalSourceNode trackInstance(Module tp) {
+ result = trackInstance(tp, TypeTracker::end())
+}
+
+private DataFlow::LocalSourceNode trackBlock(Block block, TypeTracker t) {
+ t.start() and result.asExpr().getExpr() = block
+ or
+ exists(TypeTracker t2, StepSummary summary |
+ result = trackBlockRec(block, t2, summary) and t = t2.append(summary)
+ )
+}
+
+pragma[nomagic]
+private DataFlow::LocalSourceNode trackBlockRec(Block block, TypeTracker t, StepSummary summary) {
+ StepSummary::step(trackBlock(block, t), result, summary)
+}
+
+private DataFlow::LocalSourceNode trackBlock(Block block) {
+ result = trackBlock(block, TypeTracker::end())
+}
+
+private predicate singletonMethod(MethodBase method, Expr object) {
+ object = method.(SingletonMethod).getObject()
+ or
+ exists(SingletonClass cls |
+ object = cls.getValue() and method instanceof Method and method = cls.getAMethod()
+ )
+}
+
+pragma[nomagic]
+private predicate flowsToSingletonMethodObject(DataFlow::LocalSourceNode nodeFrom, MethodBase method) {
+ exists(DataFlow::LocalSourceNode nodeTo |
+ nodeFrom.flowsTo(nodeTo) and
+ singletonMethod(method, nodeTo.asExpr().getExpr())
+ )
+}
+
+pragma[nomagic]
+private predicate moduleFlowsToSingletonMethodObject(Module m, MethodBase method) {
+ flowsToSingletonMethodObject(trackModule(m), method)
+}
+
+pragma[nomagic]
+private DataFlow::LocalSourceNode trackSingletonMethod0(MethodBase method, TypeTracker t) {
+ t.start() and
+ (
+ flowsToSingletonMethodObject(result, method)
+ or
+ exists(Module m | result = trackModule(m) and moduleFlowsToSingletonMethodObject(m, method))
+ )
+ or
+ exists(TypeTracker t2, StepSummary summary |
+ result = trackSingletonMethod0Rec(method, t2, summary) and t = t2.append(summary)
+ )
+}
+
+pragma[nomagic]
+private DataFlow::LocalSourceNode trackSingletonMethod0Rec(
+ MethodBase method, TypeTracker t, StepSummary summary
+) {
+ StepSummary::step(trackSingletonMethod0(method, t), result, summary)
+}
+
+pragma[nomagic]
+private DataFlow::LocalSourceNode trackSingletonMethod(MethodBase m, string name) {
+ result = trackSingletonMethod0(m, TypeTracker::end()) and
+ name = m.getName()
+}
+
+private DataFlow::Node selfInModule(Module tp) {
+ exists(Self self, ModuleBase enclosing |
+ self = result.asExpr().getExpr() and
+ enclosing = self.getEnclosingModule() and
+ tp = enclosing.getModule() and
+ not self.getEnclosingMethod().getEnclosingModule() = enclosing
+ )
+}
+
+private DataFlow::LocalSourceNode trackModule(Module tp, TypeTracker t) {
+ t.start() and
+ (
+ // ConstantReadAccess to Module
+ resolveScopeExpr(result.asExpr().getExpr()) = tp
+ or
+ // `self` reference to Module
+ result = selfInModule(tp)
+ )
+ or
+ exists(TypeTracker t2, StepSummary summary |
+ result = trackModuleRec(tp, t2, summary) and t = t2.append(summary)
+ )
+}
+
+pragma[nomagic]
+private DataFlow::LocalSourceNode trackModuleRec(Module tp, TypeTracker t, StepSummary summary) {
+ StepSummary::step(trackModule(tp, t), result, summary)
+}
+
+private DataFlow::LocalSourceNode trackModule(Module tp) {
+ result = trackModule(tp, TypeTracker::end())
+}
+
+/** Gets a viable run-time target for the call `call`. */
+DataFlowCallable viableCallable(DataFlowCall call) {
+ result = TCfgScope(getTarget(call.asCall())) and
+ not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
+ or
+ exists(LibraryCallable callable |
+ result = TLibraryCallable(callable) and
+ call.asCall().getExpr() = callable.getACall()
+ )
+}
+
+/**
+ * Holds if the set of viable implementations that can be called by `call`
+ * might be improved by knowing the call context. This is the case if the
+ * qualifier accesses a parameter of the enclosing callable `c` (including
+ * the implicit `self` parameter).
+ */
+predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
+
+/**
+ * Gets a viable dispatch target of `call` in the context `ctx`. This is
+ * restricted to those `call`s for which a context might make a difference.
+ */
+DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }
+
+/**
+ * Holds if `e` is an `ExprNode` that may be returned by a call to `c`.
+ */
+predicate exprNodeReturnedFrom(DataFlow::ExprNode e, Callable c) {
+ exists(ReturningNode r |
+ r.getEnclosingCallable().asCallable() = c and
+ (
+ r.(ExplicitReturnNode).getReturningNode().getReturnedValueNode() = e.asExpr() or
+ r.(ExprReturnNode) = e
+ )
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll
new file mode 100644
index 00000000000..4ca06c93362
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll
@@ -0,0 +1,4559 @@
+/**
+ * Provides an implementation of global (interprocedural) data flow. This file
+ * re-exports the local (intraprocedural) data flow analysis from
+ * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
+ * through the `Configuration` class. This file exists in several identical
+ * copies, allowing queries to use multiple `Configuration` classes that depend
+ * on each other without introducing mutual recursion among those configurations.
+ */
+
+private import DataFlowImplCommon
+private import DataFlowImplSpecific::Private
+import DataFlowImplSpecific::Public
+
+/**
+ * A configuration of interprocedural data flow analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the global data flow library must define its own unique extension
+ * of this abstract class. To create a configuration, extend this class with
+ * a subclass whose characteristic predicate is a unique singleton string.
+ * For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends DataFlow::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isBarrier`.
+ * // Optionally override `isAdditionalFlowStep`.
+ * }
+ * ```
+ * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
+ * the edges are those data-flow steps that preserve the value of the node
+ * along with any additional edges defined by `isAdditionalFlowStep`.
+ * Specifying nodes in `isBarrier` will remove those nodes from the graph, and
+ * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
+ * and/or out-going edges from those nodes, respectively.
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but two classes extending
+ * `DataFlow::Configuration` should never depend on each other. One of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
+ */
+abstract class Configuration extends string {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant data flow source.
+ */
+ abstract predicate isSource(Node source);
+
+ /**
+ * Holds if `sink` is a relevant data flow sink.
+ */
+ abstract predicate isSink(Node sink);
+
+ /**
+ * Holds if data flow through `node` is prohibited. This completely removes
+ * `node` from the data flow graph.
+ */
+ predicate isBarrier(Node node) { none() }
+
+ /** Holds if data flow into `node` is prohibited. */
+ predicate isBarrierIn(Node node) { none() }
+
+ /** Holds if data flow out of `node` is prohibited. */
+ predicate isBarrierOut(Node node) { none() }
+
+ /** Holds if data flow through nodes guarded by `guard` is prohibited. */
+ predicate isBarrierGuard(BarrierGuard guard) { none() }
+
+ /**
+ * Holds if the additional flow step from `node1` to `node2` must be taken
+ * into account in the analysis.
+ */
+ predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
+
+ /**
+ * Holds if an arbitrary number of implicit read steps of content `c` may be
+ * taken at `node`.
+ */
+ predicate allowImplicitRead(Node node, Content c) { none() }
+
+ /**
+ * Gets the virtual dispatch branching limit when calculating field flow.
+ * This can be overridden to a smaller value to improve performance (a
+ * value of 0 disables field flow), or a larger value to get more results.
+ */
+ int fieldFlowBranchLimit() { result = 2 }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ */
+ predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ *
+ * The corresponding paths are generated from the end-points and the graph
+ * included in the module `PathGraph`.
+ */
+ predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowTo(Node sink) { hasFlow(_, sink) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) }
+
+ /**
+ * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
+ * measured in approximate number of interprocedural steps.
+ */
+ int explorationLimit() { none() }
+
+ /**
+ * Holds if there is a partial data flow path from `source` to `node`. The
+ * approximate distance between `node` and the closest source is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards sink definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sources is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ */
+ final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) {
+ partialFlow(source, node, this) and
+ dist = node.getSourceDistance()
+ }
+
+ /**
+ * Holds if there is a partial data flow path from `node` to `sink`. The
+ * approximate distance between `node` and the closest sink is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards source definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sinks is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ *
+ * Note that reverse flow has slightly lower precision than the corresponding
+ * forward flow, as reverse flow disregards type pruning among other features.
+ */
+ final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
+ revPartialFlow(node, sink, this) and
+ dist = node.getSinkDistance()
+ }
+}
+
+/**
+ * This class exists to prevent mutual recursion between the user-overridden
+ * member predicates of `Configuration` and the rest of the data-flow library.
+ * Good performance cannot be guaranteed in the presence of such recursion, so
+ * it should be replaced by using more than one copy of the data flow library.
+ */
+abstract private class ConfigurationRecursionPrevention extends Configuration {
+ bindingset[this]
+ ConfigurationRecursionPrevention() { any() }
+
+ override predicate hasFlow(Node source, Node sink) {
+ strictcount(Node n | this.isSource(n)) < 0
+ or
+ strictcount(Node n | this.isSink(n)) < 0
+ or
+ strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
+ or
+ super.hasFlow(source, sink)
+ }
+}
+
+private newtype TNodeEx =
+ TNodeNormal(Node n) or
+ TNodeImplicitRead(Node n, boolean hasRead) {
+ any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true]
+ }
+
+private class NodeEx extends TNodeEx {
+ string toString() {
+ result = this.asNode().toString()
+ or
+ exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]")
+ }
+
+ Node asNode() { this = TNodeNormal(result) }
+
+ predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) }
+
+ Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) }
+
+ pragma[nomagic]
+ private DataFlowCallable getEnclosingCallable0() {
+ nodeEnclosingCallable(this.projectToNode(), result)
+ }
+
+ pragma[inline]
+ DataFlowCallable getEnclosingCallable() {
+ pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result)
+ }
+
+ pragma[nomagic]
+ private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) }
+
+ pragma[inline]
+ DataFlowType getDataFlowType() {
+ pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result)
+ }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+private class ArgNodeEx extends NodeEx {
+ ArgNodeEx() { this.asNode() instanceof ArgNode }
+}
+
+private class ParamNodeEx extends NodeEx {
+ ParamNodeEx() { this.asNode() instanceof ParamNode }
+
+ predicate isParameterOf(DataFlowCallable c, int i) {
+ this.asNode().(ParamNode).isParameterOf(c, i)
+ }
+
+ int getPosition() { this.isParameterOf(_, result) }
+}
+
+private class RetNodeEx extends NodeEx {
+ RetNodeEx() { this.asNode() instanceof ReturnNodeExt }
+
+ ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) }
+
+ ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
+}
+
+private predicate inBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierIn(n) and
+ config.isSource(n)
+ )
+}
+
+private predicate outBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierOut(n) and
+ config.isSink(n)
+ )
+}
+
+private predicate fullBarrier(NodeEx node, Configuration config) {
+ exists(Node n | node.asNode() = n |
+ config.isBarrier(n)
+ or
+ config.isBarrierIn(n) and
+ not config.isSource(n)
+ or
+ config.isBarrierOut(n) and
+ not config.isSink(n)
+ or
+ exists(BarrierGuard g |
+ config.isBarrierGuard(g) and
+ n = g.getAGuardedNode()
+ )
+ )
+}
+
+pragma[nomagic]
+private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) }
+
+pragma[nomagic]
+private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) }
+
+/**
+ * Holds if data can flow in one local step from `node1` to `node2`.
+ */
+private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ simpleLocalFlowStepExt(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.asNode() = n and
+ node2.isImplicitReadNode(n, false)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` does not jump between callables.
+ */
+private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.isImplicitReadNode(n, true) and
+ node2.asNode() = n
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
+ */
+private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ jumpStepCached(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` jumps between callables.
+ */
+private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ read(node1.asNode(), c, node2.asNode())
+ or
+ exists(Node n |
+ node2.isImplicitReadNode(n, true) and
+ node1.isImplicitReadNode(n, _) and
+ config.allowImplicitRead(n, c)
+ )
+}
+
+private predicate store(
+ NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
+) {
+ store(node1.asNode(), tc, node2.asNode(), contentType) and
+ read(_, tc.getContent(), _, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) {
+ viableReturnPosOut(call, pos, out.asNode())
+}
+
+pragma[nomagic]
+private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) {
+ viableParamArg(call, p.asNode(), arg.asNode())
+}
+
+/**
+ * Holds if field flow should be used for the given configuration.
+ */
+private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 }
+
+private module Stage1 {
+ class ApApprox = Unit;
+
+ class Ap = Unit;
+
+ class ApOption = Unit;
+
+ class Cc = boolean;
+
+ /* Begin: Stage 1 logic. */
+ /**
+ * Holds if `node` is reachable from a source in the configuration `config`.
+ *
+ * The Boolean `cc` records whether the node is reached through an
+ * argument in a call.
+ */
+ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) {
+ not fullBarrier(node, config) and
+ (
+ sourceNode(node, config) and
+ cc = false
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ localFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ additionalLocalFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ jumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ additionalJumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ // store
+ exists(NodeEx mid |
+ useFieldFlow(config) and
+ fwdFlow(mid, cc, config) and
+ store(mid, _, node, _, config) and
+ not outBarrier(mid, config)
+ )
+ or
+ // read
+ exists(Content c |
+ fwdFlowRead(c, node, cc, config) and
+ fwdFlowConsCand(c, config) and
+ not inBarrier(node, config)
+ )
+ or
+ // flow into a callable
+ exists(NodeEx arg |
+ fwdFlow(arg, _, config) and
+ viableParamArgEx(_, node, arg) and
+ cc = true
+ )
+ or
+ // flow out of a callable
+ exists(DataFlowCall call |
+ fwdFlowOut(call, node, false, config) and
+ cc = false
+ or
+ fwdFlowOutFromArg(call, node, config) and
+ fwdFlowIsEntered(call, cc, config)
+ )
+ )
+ }
+
+ private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ read(mid, c, node, config)
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a store in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node, TypedContent tc |
+ not fullBarrier(node, config) and
+ useFieldFlow(config) and
+ fwdFlow(mid, _, config) and
+ store(mid, tc, node, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
+ exists(RetNodeEx ret |
+ fwdFlow(ret, cc, config) and
+ ret.getReturnPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) {
+ exists(ReturnPosition pos |
+ fwdFlowReturnPosition(pos, cc, config) and
+ viableReturnPosOutEx(call, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) {
+ fwdFlowOut(call, out, true, config)
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) {
+ exists(ArgNodeEx arg |
+ fwdFlow(arg, cc, config) and
+ viableParamArgEx(call, _, arg)
+ )
+ }
+
+ /**
+ * Holds if `node` is part of a path from a source to a sink in the
+ * configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from
+ * the enclosing callable in order to reach a sink.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, Configuration config) {
+ revFlow0(node, toReturn, config) and
+ fwdFlow(node, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) {
+ fwdFlow(node, config) and
+ sinkNode(node, config) and
+ toReturn = false
+ or
+ exists(NodeEx mid |
+ localFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ additionalLocalFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ exists(NodeEx mid |
+ additionalJumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ // store
+ exists(Content c |
+ revFlowStore(c, node, toReturn, config) and
+ revFlowConsCand(c, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Content c |
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(mid, toReturn, pragma[only_bind_into](config))
+ )
+ or
+ // flow into a callable
+ exists(DataFlowCall call |
+ revFlowIn(call, node, false, config) and
+ toReturn = false
+ or
+ revFlowInToReturn(call, node, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ or
+ // flow out of a callable
+ exists(ReturnPosition pos |
+ revFlowOut(pos, config) and
+ node.(RetNodeEx).getReturnPosition() = pos and
+ toReturn = true
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a read in the flow covered by `revFlow`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node |
+ fwdFlow(node, pragma[only_bind_into](config)) and
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) {
+ exists(NodeEx mid, TypedContent tc |
+ revFlow(mid, toReturn, pragma[only_bind_into](config)) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ store(node, tc, mid, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of both a read and a store in the flow covered
+ * by `revFlow`.
+ */
+ private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
+ revFlowConsCand(c, conf) and
+ revFlowStore(c, _, _, conf)
+ }
+
+ pragma[nomagic]
+ predicate viableReturnPosOutNodeCandFwd1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+ ) {
+ fwdFlowReturnPosition(pos, _, config) and
+ viableReturnPosOutEx(call, pos, out)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(ReturnPosition pos, Configuration config) {
+ exists(DataFlowCall call, NodeEx out |
+ revFlow(out, _, config) and
+ viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate viableParamArgNodeCandFwd1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+ ) {
+ viableParamArgEx(call, p, arg) and
+ fwdFlow(arg, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowIn(
+ DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ revFlow(p, toReturn, config) and
+ viableParamArgNodeCandFwd1(call, p, arg, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) {
+ revFlowIn(call, arg, true, config)
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) {
+ exists(NodeEx out |
+ revFlow(out, toReturn, config) and
+ fwdFlowOutFromArg(call, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Content c |
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(node2, pragma[only_bind_into](config)) and
+ store(node1, tc, node2, contentType, config) and
+ c = tc.getContent() and
+ exists(ap1)
+ )
+ }
+
+ pragma[nomagic]
+ predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(n2, pragma[only_bind_into](config)) and
+ read(n1, c, n2, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) }
+
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow(node, toReturn, config) and exists(returnAp) and exists(ap)
+ }
+
+ private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
+ revFlow(node, true, config) and
+ fwdFlow(node, true, config) and
+ not inBarrier(node, config) and
+ not outBarrier(node, config)
+ }
+
+ /** Holds if flow may return from `callable`. */
+ pragma[nomagic]
+ private predicate returnFlowCallableNodeCand(
+ DataFlowCallable callable, ReturnKindExt kind, Configuration config
+ ) {
+ exists(RetNodeEx ret |
+ throughFlowNodeCand(ret, config) and
+ callable = ret.getEnclosingCallable() and
+ kind = ret.getKind()
+ )
+ }
+
+ /**
+ * Holds if flow may enter through `p` and reach a return node making `p` a
+ * candidate for the origin of a summary.
+ */
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(ReturnKindExt kind |
+ throughFlowNodeCand(p, config) and
+ returnFlowCallableNodeCand(c, kind, config) and
+ p.getEnclosingCallable() = c and
+ exists(ap) and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition()
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(ArgNodeEx arg, boolean toReturn |
+ revFlow(arg, toReturn, config) and
+ revFlowInToReturn(call, arg, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, config)) and
+ fields = count(Content f0 | fwdFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, config)) and
+ fields = count(Content f0 | revFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | revFlow(n, b, config))
+ }
+ /* End: Stage 1 logic. */
+}
+
+pragma[noinline]
+private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ localFlowStep(node1, node2, config)
+}
+
+pragma[noinline]
+private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ additionalLocalFlowStep(node1, node2, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutNodeCand1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+) {
+ Stage1::revFlow(out, config) and
+ Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config
+) {
+ viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
+ Stage1::revFlow(ret, config) and
+ not outBarrier(ret, config) and
+ not inBarrier(out, config)
+}
+
+pragma[nomagic]
+private predicate viableParamArgNodeCand1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+) {
+ Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and
+ Stage1::revFlow(arg, config)
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config
+) {
+ viableParamArgNodeCand1(call, p, arg, config) and
+ Stage1::revFlow(p, config) and
+ not outBarrier(arg, config) and
+ not inBarrier(p, config)
+}
+
+/**
+ * Gets the amount of forward branching on the origin of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int branch(NodeEx n1, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf)
+ )
+}
+
+/**
+ * Gets the amount of backward branching on the target of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int join(NodeEx n2, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf)
+ )
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink. The
+ * `allowsFieldFlow` flag indicates whether the branching is within the limit
+ * specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, ret, out, config) and
+ exists(int b, int j |
+ b = branch(ret, config) and
+ j = join(out, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink. The `allowsFieldFlow` flag indicates whether
+ * the branching is within the limit specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+) {
+ flowIntoCallNodeCand1(call, arg, p, config) and
+ exists(int b, int j |
+ b = branch(arg, config) and
+ j = join(p, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+private module Stage2 {
+ module PrevStage = Stage1;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = boolean;
+
+ class ApNil extends Ap {
+ ApNil() { this = false }
+ }
+
+ bindingset[result, ap]
+ private ApApprox getApprox(Ap ap) { any() }
+
+ private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) }
+
+ pragma[inline]
+ private Content getHeadContent(Ap ap) { exists(result) and ap = true }
+
+ class ApOption = BooleanOption;
+
+ ApOption apNone() { result = TBooleanNone() }
+
+ ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSiteDispatch(call, c)
+ then result = TSpecificCall(call)
+ else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ (
+ preservesValue = true and
+ localFlowStepNodeCand1(node1, node2, config)
+ or
+ preservesValue = false and
+ additionalLocalFlowStepNodeCand1(node1, node2, config)
+ ) and
+ exists(ap) and
+ exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand1/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand1/5;
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 2 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 2 logic. */
+}
+
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand2(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+pragma[nomagic]
+private predicate flowIntoCallNodeCand2(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+) {
+ flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+private module LocalFlowBigStep {
+ /**
+ * A node where some checking is required, and hence the big-step relation
+ * is not allowed to step over.
+ */
+ private class FlowCheckNode extends NodeEx {
+ FlowCheckNode() {
+ castNode(this.asNode()) or
+ clearsContentCached(this.asNode(), _)
+ }
+ }
+
+ /**
+ * Holds if `node` can be the first node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ predicate localFlowEntry(NodeEx node, Configuration config) {
+ Stage2::revFlow(node, config) and
+ (
+ sourceNode(node, config) or
+ jumpStep(_, node, config) or
+ additionalJumpStep(_, node, config) or
+ node instanceof ParamNodeEx or
+ node.asNode() instanceof OutNodeExt or
+ store(_, _, node, _, config) or
+ read(_, _, node, config) or
+ node instanceof FlowCheckNode
+ )
+ }
+
+ /**
+ * Holds if `node` can be the last node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ private predicate localFlowExit(NodeEx node, Configuration config) {
+ exists(NodeEx next | Stage2::revFlow(next, config) |
+ jumpStep(node, next, config) or
+ additionalJumpStep(node, next, config) or
+ flowIntoCallNodeCand1(_, node, next, config) or
+ flowOutOfCallNodeCand1(_, node, next, config) or
+ store(node, _, next, _, config) or
+ read(node, _, next, config)
+ )
+ or
+ node instanceof FlowCheckNode
+ or
+ sinkNode(node, config)
+ }
+
+ pragma[noinline]
+ private predicate additionalLocalFlowStepNodeCand2(
+ NodeEx node1, NodeEx node2, Configuration config
+ ) {
+ additionalLocalFlowStepNodeCand1(node1, node2, config) and
+ Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if the local path from `node1` to `node2` is a prefix of a maximal
+ * subsequence of local flow steps in a dataflow path.
+ *
+ * This is the transitive closure of `[additional]localFlowStep` beginning
+ * at `localFlowEntry`.
+ */
+ pragma[nomagic]
+ private predicate localFlowStepPlus(
+ NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config,
+ LocalCallContext cc
+ ) {
+ not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ (
+ localFlowEntry(node1, pragma[only_bind_into](config)) and
+ (
+ localFlowStepNodeCand1(node1, node2, config) and
+ preservesValue = true and
+ t = node1.getDataFlowType() // irrelevant dummy value
+ or
+ additionalLocalFlowStepNodeCand2(node1, node2, config) and
+ preservesValue = false and
+ t = node2.getDataFlowType()
+ ) and
+ node1 != node2 and
+ cc.relevantFor(node1.getEnclosingCallable()) and
+ not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and
+ localFlowStepNodeCand1(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and
+ additionalLocalFlowStepNodeCand2(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ preservesValue = false and
+ t = node2.getDataFlowType() and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ )
+ }
+
+ /**
+ * Holds if `node1` can step to `node2` in one or more local steps and this
+ * path can occur as a maximal subsequence of local steps in a dataflow path.
+ */
+ pragma[nomagic]
+ predicate localFlowBigStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf,
+ Configuration config, LocalCallContext callContext
+ ) {
+ localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and
+ localFlowExit(node2, config)
+ }
+}
+
+private import LocalFlowBigStep
+
+private module Stage3 {
+ module PrevStage = Stage2;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathFront;
+
+ class ApNil = AccessPathFrontNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathFrontOption;
+
+ ApOption apNone() { result = TAccessPathFrontNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) }
+
+ class Cc = boolean;
+
+ class CcCall extends Cc {
+ CcCall() { this = true }
+
+ /** Holds if this call context may be `call`. */
+ predicate matchesCall(DataFlowCall call) { any() }
+ }
+
+ class CcNoCall extends Cc {
+ CcNoCall() { this = false }
+ }
+
+ Cc ccNone() { result = false }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand2/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand2/5;
+
+ pragma[nomagic]
+ private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
+
+ pragma[nomagic]
+ private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) {
+ not clear(node, ap) and
+ if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
+ }
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) {
+ // We need to typecheck stores here, since reverse flow through a getter
+ // might have a different type here compared to inside the getter.
+ compatibleTypes(ap.getType(), contentType)
+ }
+
+ /* Begin: Stage 3 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 3 logic. */
+}
+
+/**
+ * Holds if `argApf` is recorded as the summary context for flow reaching `node`
+ * and remains relevant for the following pruning stage.
+ */
+private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
+ exists(AccessPathFront apf |
+ Stage3::revFlow(node, true, _, apf, config) and
+ Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation with the head `tc` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) {
+ exists(int tails, int nodes, int apLimit, int tupleLimit |
+ tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and
+ nodes =
+ strictcount(NodeEx n |
+ Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ or
+ flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ ) and
+ accessPathApproxCostLimits(apLimit, tupleLimit) and
+ apLimit < tails and
+ tupleLimit < (tails - 1) * nodes and
+ not tc.forceHighPrecision()
+ )
+}
+
+private newtype TAccessPathApprox =
+ TNil(DataFlowType t) or
+ TConsNil(TypedContent tc, DataFlowType t) {
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ not expensiveLen2unfolding(tc, _)
+ } or
+ TConsCons(TypedContent tc1, TypedContent tc2, int len) {
+ Stage3::consCand(tc1, TFrontHead(tc2), _) and
+ len in [2 .. accessPathLimit()] and
+ not expensiveLen2unfolding(tc1, _)
+ } or
+ TCons1(TypedContent tc, int len) {
+ len in [1 .. accessPathLimit()] and
+ expensiveLen2unfolding(tc, _)
+ }
+
+/**
+ * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only
+ * the first two elements of the list and its length are tracked. If data flows
+ * from a source to a given node with a given `AccessPathApprox`, this indicates
+ * the sequence of dereference operations needed to get from the value in the node
+ * to the tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPathApprox extends TAccessPathApprox {
+ abstract string toString();
+
+ abstract TypedContent getHead();
+
+ abstract int len();
+
+ abstract DataFlowType getType();
+
+ abstract AccessPathFront getFront();
+
+ /** Gets the access path obtained by popping `head` from this path, if any. */
+ abstract AccessPathApprox pop(TypedContent head);
+}
+
+private class AccessPathApproxNil extends AccessPathApprox, TNil {
+ private DataFlowType t;
+
+ AccessPathApproxNil() { this = TNil(t) }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+
+ override TypedContent getHead() { none() }
+
+ override int len() { result = 0 }
+
+ override DataFlowType getType() { result = t }
+
+ override AccessPathFront getFront() { result = TFrontNil(t) }
+
+ override AccessPathApprox pop(TypedContent head) { none() }
+}
+
+abstract private class AccessPathApproxCons extends AccessPathApprox { }
+
+private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil {
+ private TypedContent tc;
+ private DataFlowType t;
+
+ AccessPathApproxConsNil() { this = TConsNil(tc, t) }
+
+ override string toString() {
+ // The `concat` becomes "" if `ppReprType` has no result.
+ result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t))
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = 1 }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) }
+}
+
+private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons {
+ private TypedContent tc1;
+ private TypedContent tc2;
+ private int len;
+
+ AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + tc1.toString() + ", " + tc2.toString() + "]"
+ else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc1 }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc1.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc1) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc1 and
+ (
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ }
+}
+
+private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 {
+ private TypedContent tc;
+ private int len;
+
+ AccessPathApproxCons1() { this = TCons1(tc, len) }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc and
+ (
+ exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) |
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ or
+ exists(DataFlowType t |
+ len = 1 and
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ result = TNil(t)
+ )
+ )
+ }
+}
+
+/** Gets the access path obtained by popping `tc` from `ap`, if any. */
+private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) }
+
+/** Gets the access path obtained by pushing `tc` onto `ap`. */
+private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) }
+
+private newtype TAccessPathApproxOption =
+ TAccessPathApproxNone() or
+ TAccessPathApproxSome(AccessPathApprox apa)
+
+private class AccessPathApproxOption extends TAccessPathApproxOption {
+ string toString() {
+ this = TAccessPathApproxNone() and result = ""
+ or
+ this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString()))
+ }
+}
+
+private module Stage4 {
+ module PrevStage = Stage3;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathApprox;
+
+ class ApNil = AccessPathApproxNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.getFront() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathApproxOption;
+
+ ApOption apNone() { result = TAccessPathApproxNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = LocalCallContext;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
+ localFlowEntry(node, config) and
+ result =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ node.getEnclosingCallable())
+ }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc)
+ }
+
+ pragma[nomagic]
+ private predicate flowOutOfCall(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ private predicate flowIntoCall(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+ ) {
+ flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) { any() }
+
+ // Type checking is not necessary here as it has already been done in stage 3.
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 4 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 4 logic. */
+}
+
+bindingset[conf, result]
+private Configuration unbindConf(Configuration conf) {
+ exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
+}
+
+private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) {
+ exists(DataFlowCallable c, AccessPathApprox apa0 |
+ Stage4::parameterMayFlowThrough(_, c, apa, _) and
+ Stage4::revFlow(n, true, _, apa0, config) and
+ Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
+ n.getEnclosingCallable() = c
+ )
+}
+
+private newtype TSummaryCtx =
+ TSummaryCtxNone() or
+ TSummaryCtxSome(ParamNodeEx p, AccessPath ap) {
+ Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _)
+ }
+
+/**
+ * A context for generating flow summaries. This represents flow entry through
+ * a specific parameter with an access path of a specific shape.
+ *
+ * Summaries are only created for parameters that may flow through.
+ */
+abstract private class SummaryCtx extends TSummaryCtx {
+ abstract string toString();
+}
+
+/** A summary context from which no flow summary can be generated. */
+private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone {
+ override string toString() { result = "" }
+}
+
+/** A summary context from which a flow summary can be generated. */
+private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome {
+ private ParamNodeEx p;
+ private AccessPath ap;
+
+ SummaryCtxSome() { this = TSummaryCtxSome(p, ap) }
+
+ int getParameterPos() { p.isParameterOf(_, result) }
+
+ override string toString() { result = p + ": " + ap }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * Gets the number of length 2 access path approximations that correspond to `apa`.
+ */
+private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) {
+ exists(TypedContent tc, int len |
+ tc = apa.getHead() and
+ len = apa.len() and
+ result =
+ strictcount(AccessPathFront apf |
+ Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1),
+ config)
+ )
+ )
+}
+
+private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) {
+ result =
+ strictcount(NodeEx n |
+ Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation matching `apa` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) {
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = count1to2unfold(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ apLimit < aps and
+ tupleLimit < (aps - 1) * nodes
+ )
+}
+
+private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
+ exists(TypedContent head |
+ apa.pop(head) = result and
+ Stage4::consCand(head, result, config)
+ )
+}
+
+/**
+ * Holds with `unfold = false` if a precise head-tail representation of `apa` is
+ * expected to be expensive. Holds with `unfold = true` otherwise.
+ */
+private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
+ if apa.getHead().forceHighPrecision()
+ then unfold = true
+ else
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = countPotentialAps(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
+ )
+}
+
+/**
+ * Gets the number of `AccessPath`s that correspond to `apa`.
+ */
+private int countAps(AccessPathApprox apa, Configuration config) {
+ evalUnfold(apa, false, config) and
+ result = 1 and
+ (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config))
+ or
+ evalUnfold(apa, false, config) and
+ result = count1to2unfold(apa, config) and
+ not expensiveLen1to2unfolding(apa, config)
+ or
+ evalUnfold(apa, true, config) and
+ result = countPotentialAps(apa, config)
+}
+
+/**
+ * Gets the number of `AccessPath`s that would correspond to `apa` assuming
+ * that it is expanded to a precise head-tail representation.
+ */
+language[monotonicAggregates]
+private int countPotentialAps(AccessPathApprox apa, Configuration config) {
+ apa instanceof AccessPathApproxNil and result = 1
+ or
+ result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config))
+}
+
+private newtype TAccessPath =
+ TAccessPathNil(DataFlowType t) or
+ TAccessPathCons(TypedContent head, AccessPath tail) {
+ exists(AccessPathApproxCons apa |
+ not evalUnfold(apa, false, _) and
+ head = apa.getHead() and
+ tail.getApprox() = getATail(apa, _)
+ )
+ } or
+ TAccessPathCons2(TypedContent head1, TypedContent head2, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ not expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head1 = apa.getHead() and
+ head2 = getATail(apa, _).getHead()
+ )
+ } or
+ TAccessPathCons1(TypedContent head, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head = apa.getHead()
+ )
+ }
+
+private newtype TPathNode =
+ TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) {
+ // A PathNode is introduced by a source ...
+ Stage4::revFlow(node, config) and
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ // ... or a step from an existing PathNode to another node.
+ exists(PathNodeMid mid |
+ pathStep(mid, node, cc, sc, ap) and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config))
+ )
+ } or
+ TPathNodeSink(NodeEx node, Configuration config) {
+ sinkNode(node, pragma[only_bind_into](config)) and
+ Stage4::revFlow(node, pragma[only_bind_into](config)) and
+ (
+ // A sink that is also a source ...
+ sourceNode(node, config)
+ or
+ // ... or a sink that can be reached from a source
+ exists(PathNodeMid mid |
+ pathStep(mid, node, _, _, TAccessPathNil(_)) and
+ pragma[only_bind_into](config) = mid.getConfiguration()
+ )
+ )
+ }
+
+/**
+ * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a
+ * source to a given node with a given `AccessPath`, this indicates the sequence
+ * of dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPath extends TAccessPath {
+ /** Gets the head of this access path, if any. */
+ abstract TypedContent getHead();
+
+ /** Gets the tail of this access path, if any. */
+ abstract AccessPath getTail();
+
+ /** Gets the front of this access path. */
+ abstract AccessPathFront getFront();
+
+ /** Gets the approximation of this access path. */
+ abstract AccessPathApprox getApprox();
+
+ /** Gets the length of this access path. */
+ abstract int length();
+
+ /** Gets a textual representation of this access path. */
+ abstract string toString();
+
+ /** Gets the access path obtained by popping `tc` from this access path, if any. */
+ final AccessPath pop(TypedContent tc) {
+ result = this.getTail() and
+ tc = this.getHead()
+ }
+
+ /** Gets the access path obtained by pushing `tc` onto this access path. */
+ final AccessPath push(TypedContent tc) { this = result.pop(tc) }
+}
+
+private class AccessPathNil extends AccessPath, TAccessPathNil {
+ private DataFlowType t;
+
+ AccessPathNil() { this = TAccessPathNil(t) }
+
+ DataFlowType getType() { result = t }
+
+ override TypedContent getHead() { none() }
+
+ override AccessPath getTail() { none() }
+
+ override AccessPathFrontNil getFront() { result = TFrontNil(t) }
+
+ override AccessPathApproxNil getApprox() { result = TNil(t) }
+
+ override int length() { result = 0 }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+}
+
+private class AccessPathCons extends AccessPath, TAccessPathCons {
+ private TypedContent head;
+ private AccessPath tail;
+
+ AccessPathCons() { this = TAccessPathCons(head, tail) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() { result = tail }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsNil(head, tail.(AccessPathNil).getType())
+ or
+ result = TConsCons(head, tail.getHead(), this.length())
+ or
+ result = TCons1(head, this.length())
+ }
+
+ override int length() { result = 1 + tail.length() }
+
+ private string toStringImpl(boolean needsSuffix) {
+ exists(DataFlowType t |
+ tail = TAccessPathNil(t) and
+ needsSuffix = false and
+ result = head.toString() + "]" + concat(" : " + ppReprType(t))
+ )
+ or
+ result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix)
+ or
+ exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) |
+ result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false
+ )
+ or
+ exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) |
+ result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false
+ )
+ }
+
+ override string toString() {
+ result = "[" + this.toStringImpl(true) + length().toString() + ")]"
+ or
+ result = "[" + this.toStringImpl(false)
+ }
+}
+
+private class AccessPathCons2 extends AccessPath, TAccessPathCons2 {
+ private TypedContent head1;
+ private TypedContent head2;
+ private int len;
+
+ AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) }
+
+ override TypedContent getHead() { result = head1 }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head1, result.getApprox(), _) and
+ result.getHead() = head2 and
+ result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head1) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsCons(head1, head2, len) or
+ result = TCons1(head1, len)
+ }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + head1.toString() + ", " + head2.toString() + "]"
+ else
+ result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
+ private TypedContent head;
+ private int len;
+
+ AccessPathCons1() { this = TAccessPathCons1(head, len) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() { result = TCons1(head, len) }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + head.toString() + "]"
+ else result = "[" + head.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+/**
+ * A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
+ * Only those `PathNode`s that are reachable from a source are generated.
+ */
+class PathNode extends TPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ private PathNode getASuccessorIfHidden() {
+ this.(PathNodeImpl).isHidden() and
+ result = this.(PathNodeImpl).getASuccessorImpl()
+ }
+
+ /** Gets a successor of this node, if any. */
+ final PathNode getASuccessor() {
+ result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
+ not this.(PathNodeImpl).isHidden() and
+ not result.(PathNodeImpl).isHidden()
+ }
+
+ /** Holds if this node is a source. */
+ predicate isSource() { none() }
+}
+
+abstract private class PathNodeImpl extends PathNode {
+ abstract PathNode getASuccessorImpl();
+
+ abstract NodeEx getNodeEx();
+
+ predicate isHidden() {
+ hiddenNode(this.getNodeEx().asNode()) and
+ not this.isSource() and
+ not this instanceof PathNodeSink
+ or
+ this.getNodeEx() instanceof TNodeImplicitRead
+ }
+
+ private string ppAp() {
+ this instanceof PathNodeSink and result = ""
+ or
+ exists(string s | s = this.(PathNodeMid).getAp().toString() |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ this instanceof PathNodeSink and result = ""
+ or
+ result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
+ }
+
+ override string toString() { result = this.getNodeEx().toString() + ppAp() }
+
+ override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** Holds if `n` can reach a sink. */
+private predicate directReach(PathNode n) {
+ n instanceof PathNodeSink or directReach(n.getASuccessor())
+}
+
+/** Holds if `n` can reach a sink or is used in a subpath. */
+private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
+
+/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
+private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
+
+private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
+
+/**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+module PathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
+
+ /** Holds if `n` is a node in the graph of data flow path explanations. */
+ query predicate nodes(PathNode n, string key, string val) {
+ reach(n) and key = "semmle.label" and val = n.toString()
+ }
+
+ query predicate subpaths = Subpaths::subpaths/4;
+}
+
+/**
+ * An intermediate flow graph node. This is a triple consisting of a `Node`,
+ * a `CallContext`, and a `Configuration`.
+ */
+private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
+ NodeEx node;
+ CallContext cc;
+ SummaryCtx sc;
+ AccessPath ap;
+ Configuration config;
+
+ PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ SummaryCtx getSummaryCtx() { result = sc }
+
+ AccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ private PathNodeMid getSuccMid() {
+ pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(),
+ result.getAp()) and
+ result.getConfiguration() = unbindConf(this.getConfiguration())
+ }
+
+ override PathNodeImpl getASuccessorImpl() {
+ // an intermediate step to another intermediate node
+ result = getSuccMid()
+ or
+ // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges
+ exists(PathNodeMid mid, PathNodeSink sink |
+ mid = getSuccMid() and
+ mid.getNodeEx() = sink.getNodeEx() and
+ mid.getAp() instanceof AccessPathNil and
+ sink.getConfiguration() = unbindConf(mid.getConfiguration()) and
+ result = sink
+ )
+ }
+
+ override predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap instanceof AccessPathNil
+ }
+}
+
+/**
+ * A flow graph node corresponding to a sink. This is disjoint from the
+ * intermediate nodes in order to uniquely correspond to a given sink by
+ * excluding the `CallContext`.
+ */
+private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
+ NodeEx node;
+ Configuration config;
+
+ PathNodeSink() { this = TPathNodeSink(node, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PathNode getASuccessorImpl() { none() }
+
+ override predicate isSource() { sourceNode(node, config) }
+}
+
+/**
+ * Holds if data may flow from `mid` to `node`. The last step in or out of
+ * a callable is recorded by `cc`.
+ */
+private predicate pathStep(
+ PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap
+) {
+ exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC |
+ midnode = mid.getNodeEx() and
+ conf = mid.getConfiguration() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ localCC =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ midnode.getEnclosingCallable()) and
+ ap0 = mid.getAp()
+ |
+ localFlowBigStep(midnode, node, true, _, conf, localCC) and
+ ap = ap0
+ or
+ localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and
+ ap0 instanceof AccessPathNil
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = mid.getAp()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ mid.getAp() instanceof AccessPathNil and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp()
+ or
+ pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone
+ or
+ pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx()
+}
+
+pragma[nomagic]
+private predicate pathReadStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ tc = ap0.getHead() and
+ Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+pragma[nomagic]
+private predicate pathStoreStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+private predicate pathOutOfCallable0(
+ PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa,
+ Configuration config
+) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ apa = mid.getAp().getApprox() and
+ config = mid.getConfiguration()
+}
+
+pragma[nomagic]
+private predicate pathOutOfCallable1(
+ PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ pathOutOfCallable0(mid, pos, innercc, apa, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+}
+
+pragma[noinline]
+private NodeEx getAnOutNodeFlow(
+ ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config
+) {
+ result.asNode() = kind.getAnOutNode(call) and
+ Stage4::revFlow(result, _, _, apa, config)
+}
+
+/**
+ * Holds if data may flow from `mid` to `out`. The last step of this path
+ * is a return from a callable and is recorded by `cc`, if needed.
+ */
+pragma[noinline]
+private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) {
+ exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config |
+ pathOutOfCallable1(mid, call, kind, cc, apa, config) and
+ out = getAnOutNodeFlow(kind, call, apa, config)
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`.
+ */
+pragma[noinline]
+private predicate pathIntoArg(
+ PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa
+) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ apa = ap.getApprox()
+ )
+}
+
+pragma[noinline]
+private predicate parameterCand(
+ DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config
+) {
+ exists(ParamNodeEx p |
+ Stage4::revFlow(p, _, _, apa, config) and
+ p.isParameterOf(callable, i)
+ )
+}
+
+pragma[nomagic]
+private predicate pathIntoCallable0(
+ PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call,
+ AccessPath ap
+) {
+ exists(AccessPathApprox apa |
+ pathIntoArg(mid, i, outercc, call, ap, apa) and
+ callable = resolveCall(call, outercc) and
+ parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration())
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to `p` through `call`. The contexts
+ * before and after entering the callable are `outercc` and `innercc`,
+ * respectively.
+ */
+private predicate pathIntoCallable(
+ PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc,
+ DataFlowCall call
+) {
+ exists(int i, DataFlowCallable callable, AccessPath ap |
+ pathIntoCallable0(mid, callable, i, outercc, call, ap) and
+ p.isParameterOf(callable, i) and
+ (
+ sc = TSummaryCtxSome(p, ap)
+ or
+ not exists(TSummaryCtxSome(p, ap)) and
+ sc = TSummaryCtxNone()
+ )
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+}
+
+/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */
+pragma[nomagic]
+private predicate paramFlowsThrough(
+ ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(PathNodeMid mid, RetNodeEx ret, int pos |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp() and
+ apa = ap.getApprox() and
+ pos = sc.getParameterPos() and
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+}
+
+pragma[nomagic]
+private predicate pathThroughCallable0(
+ DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap,
+ AccessPathApprox apa
+) {
+ exists(CallContext innercc, SummaryCtx sc |
+ pathIntoCallable(mid, _, cc, innercc, sc, call) and
+ paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` through a callable to the node `out`.
+ * The context `cc` is restored to its value prior to entering the callable.
+ */
+pragma[noinline]
+private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) {
+ exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa |
+ pathThroughCallable0(call, mid, kind, cc, ap, apa) and
+ out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+private module Subpaths {
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths01(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
+ pathIntoCallable(arg, par, _, innercc, sc, _) and
+ paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
+ unbindConf(arg.getConfiguration()))
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths02(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ subpaths01(arg, par, sc, innercc, kind, out, apout) and
+ out.asNode() = kind.getAnOutNode(_)
+ }
+
+ pragma[nomagic]
+ private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple.
+ */
+ pragma[nomagic]
+ private predicate subpaths03(
+ PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
+ ) {
+ exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
+ subpaths02(arg, par, sc, innercc, kind, out, apout) and
+ ret.getNodeEx() = retnode and
+ kind = retnode.getKind() and
+ innercc = ret.getCallContext() and
+ sc = ret.getSummaryCtx() and
+ ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
+ apout = ret.getAp() and
+ not ret.isHidden()
+ )
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
+ * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
+ * `ret -> out` is summarized as the edge `arg -> out`.
+ */
+ predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
+ exists(ParamNodeEx p, NodeEx o, AccessPath apout |
+ pragma[only_bind_into](arg).getASuccessor() = par and
+ pragma[only_bind_into](arg).getASuccessor() = out and
+ subpaths03(arg, p, ret, o, apout) and
+ par.getNodeEx() = p and
+ out.getNodeEx() = o and
+ out.getAp() = apout
+ )
+ }
+
+ /**
+ * Holds if `n` can reach a return node in a summarized subpath.
+ */
+ predicate retReach(PathNode n) {
+ subpaths(_, _, n, _)
+ or
+ exists(PathNode mid |
+ retReach(mid) and
+ n.getASuccessor() = mid and
+ not subpaths(_, mid, _, _)
+ )
+ }
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+private predicate flowsTo(
+ PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
+) {
+ flowsource.isSource() and
+ flowsource.getConfiguration() = configuration and
+ flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
+ (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
+ flowsink.getNodeEx().asNode() = sink
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+predicate flowsTo(Node source, Node sink, Configuration configuration) {
+ flowsTo(_, _, source, sink, configuration)
+}
+
+private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) {
+ fwd = true and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
+ tuples = count(PathNode pn)
+ or
+ fwd = false and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
+ tuples = count(PathNode pn | reach(pn))
+}
+
+/**
+ * INTERNAL: Only for debugging.
+ *
+ * Calculates per-stage metrics for data flow.
+ */
+predicate stageStats(
+ int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config
+) {
+ stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples)
+ or
+ stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples)
+}
+
+private module FlowExploration {
+ private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) {
+ exists(NodeEx node1, NodeEx node2 |
+ jumpStep(node1, node2, config)
+ or
+ additionalJumpStep(node1, node2, config)
+ or
+ // flow into callable
+ viableParamArgEx(_, node2, node1)
+ or
+ // flow out of a callable
+ viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2)
+ |
+ c1 = node1.getEnclosingCallable() and
+ c2 = node2.getEnclosingCallable() and
+ c1 != c2
+ )
+ }
+
+ private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSrc(mid, config) and callableStep(mid, c, config)
+ )
+ }
+
+ private predicate interestingCallableSink(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSink(mid, config) and callableStep(c, mid, config)
+ )
+ }
+
+ private newtype TCallableExt =
+ TCallable(DataFlowCallable c, Configuration config) {
+ interestingCallableSrc(c, config) or
+ interestingCallableSink(c, config)
+ } or
+ TCallableSrc() or
+ TCallableSink()
+
+ private predicate callableExtSrc(TCallableSrc src) { any() }
+
+ private predicate callableExtSink(TCallableSink sink) { any() }
+
+ private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) {
+ exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config |
+ callableStep(c1, c2, config) and
+ ce1 = TCallable(c1, pragma[only_bind_into](config)) and
+ ce2 = TCallable(c2, pragma[only_bind_into](config))
+ )
+ or
+ exists(Node n, Configuration config |
+ ce1 = TCallableSrc() and
+ config.isSource(n) and
+ ce2 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ or
+ exists(Node n, Configuration config |
+ ce2 = TCallableSink() and
+ config.isSink(n) and
+ ce1 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ }
+
+ private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) {
+ callableExtStepFwd(ce2, ce1)
+ }
+
+ private int distSrcExt(TCallableExt c) =
+ shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result)
+
+ private int distSinkExt(TCallableExt c) =
+ shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result)
+
+ private int distSrc(DataFlowCallable c, Configuration config) {
+ result = distSrcExt(TCallable(c, config)) - 1
+ }
+
+ private int distSink(DataFlowCallable c, Configuration config) {
+ result = distSinkExt(TCallable(c, config)) - 1
+ }
+
+ private newtype TPartialAccessPath =
+ TPartialNil(DataFlowType t) or
+ TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first
+ * element of the list and its length are tracked. If data flows from a source to
+ * a given node with a given `AccessPath`, this indicates the sequence of
+ * dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+ private class PartialAccessPath extends TPartialAccessPath {
+ abstract string toString();
+
+ TypedContent getHead() { this = TPartialCons(result, _) }
+
+ int len() {
+ this = TPartialNil(_) and result = 0
+ or
+ this = TPartialCons(_, result)
+ }
+
+ DataFlowType getType() {
+ this = TPartialNil(result)
+ or
+ exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType())
+ }
+ }
+
+ private class PartialAccessPathNil extends PartialAccessPath, TPartialNil {
+ override string toString() {
+ exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t)))
+ }
+ }
+
+ private class PartialAccessPathCons extends PartialAccessPath, TPartialCons {
+ override string toString() {
+ exists(TypedContent tc, int len | this = TPartialCons(tc, len) |
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TRevPartialAccessPath =
+ TRevPartialNil() or
+ TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `Content`s, but only the first
+ * element of the list and its length are tracked.
+ */
+ private class RevPartialAccessPath extends TRevPartialAccessPath {
+ abstract string toString();
+
+ Content getHead() { this = TRevPartialCons(result, _) }
+
+ int len() {
+ this = TRevPartialNil() and result = 0
+ or
+ this = TRevPartialCons(_, result)
+ }
+ }
+
+ private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil {
+ override string toString() { result = "" }
+ }
+
+ private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons {
+ override string toString() {
+ exists(Content c, int len | this = TRevPartialCons(c, len) |
+ if len = 1
+ then result = "[" + c.toString() + "]"
+ else result = "[" + c.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TSummaryCtx1 =
+ TSummaryCtx1None() or
+ TSummaryCtx1Param(ParamNodeEx p)
+
+ private newtype TSummaryCtx2 =
+ TSummaryCtx2None() or
+ TSummaryCtx2Some(PartialAccessPath ap)
+
+ private newtype TRevSummaryCtx1 =
+ TRevSummaryCtx1None() or
+ TRevSummaryCtx1Some(ReturnPosition pos)
+
+ private newtype TRevSummaryCtx2 =
+ TRevSummaryCtx2None() or
+ TRevSummaryCtx2Some(RevPartialAccessPath ap)
+
+ private newtype TPartialPathNode =
+ TPartialPathNodeFwd(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = TPartialNil(node.getDataFlowType()) and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and
+ distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ } or
+ TPartialPathNodeRev(
+ NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil() and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ exists(PartialPathNodeRev mid |
+ revPartialPathStep(mid, node, sc1, sc2, ap, config) and
+ not clearsContentCached(node.asNode(), ap.getHead()) and
+ not fullBarrier(node, config) and
+ distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathNodeMk0(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStep(mid, node, cc, sc1, sc2, ap, config) and
+ not fullBarrier(node, config) and
+ not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
+ if node.asNode() instanceof CastingNode
+ then compatibleTypes(node.getDataFlowType(), ap.getType())
+ else any()
+ )
+ }
+
+ /**
+ * A `Node` augmented with a call context, an access path, and a configuration.
+ */
+ class PartialPathNode extends TPartialPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { result = this.getNodeEx().toString() + this.ppAp() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() {
+ result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.getNodeEx().projectToNode() = result }
+
+ private NodeEx getNodeEx() {
+ result = this.(PartialPathNodeFwd).getNodeEx() or
+ result = this.(PartialPathNodeRev).getNodeEx()
+ }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ /** Gets a successor of this node, if any. */
+ PartialPathNode getASuccessor() { none() }
+
+ /**
+ * Gets the approximate distance to the nearest source measured in number
+ * of interprocedural steps.
+ */
+ int getSourceDistance() {
+ result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ /**
+ * Gets the approximate distance to the nearest sink measured in number
+ * of interprocedural steps.
+ */
+ int getSinkDistance() {
+ result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ private string ppAp() {
+ exists(string s |
+ s = this.(PartialPathNodeFwd).getAp().toString() or
+ s = this.(PartialPathNodeRev).getAp().toString()
+ |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">"
+ }
+
+ /** Holds if this is a source in a forward-flow path. */
+ predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() }
+
+ /** Holds if this is a sink in a reverse-flow path. */
+ predicate isRevSink() { this.(PartialPathNodeRev).isSink() }
+ }
+
+ /**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+ module PartialPathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b }
+ }
+
+ private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd {
+ NodeEx node;
+ CallContext cc;
+ TSummaryCtx1 sc1;
+ TSummaryCtx2 sc2;
+ PartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ TSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ PartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeFwd getASuccessor() {
+ partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(),
+ result.getSummaryCtx2(), result.getAp(), result.getConfiguration())
+ }
+
+ predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap instanceof TPartialNil
+ }
+ }
+
+ private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev {
+ NodeEx node;
+ TRevSummaryCtx1 sc1;
+ TRevSummaryCtx2 sc2;
+ RevPartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ TRevSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TRevSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ RevPartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeRev getASuccessor() {
+ revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(),
+ this.getAp(), this.getConfiguration())
+ }
+
+ predicate isSink() {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil()
+ }
+ }
+
+ private predicate partialPathStep(
+ PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and
+ (
+ localFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ or
+ partialPathStoreStep(mid, _, _, node, ap) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(PartialAccessPath ap0, TypedContent tc |
+ partialPathReadStep(mid, ap0, tc, node, cc, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsFwd(ap, tc, ap0, config)
+ )
+ or
+ partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config)
+ or
+ partialPathOutOfCallable(mid, node, cc, ap, config) and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None()
+ or
+ partialPathThroughCallable(mid, node, cc, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ bindingset[result, i]
+ private int unbindInt(int i) { i <= result and i >= result }
+
+ pragma[inline]
+ private predicate partialPathStoreStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node,
+ PartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode, DataFlowType contentType |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ store(midNode, tc, node, contentType, mid.getConfiguration()) and
+ ap2.getHead() = tc and
+ ap2.len() = unbindInt(ap1.len() + 1) and
+ compatibleTypes(ap1.getType(), contentType)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsFwd(
+ PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStoreStep(mid, ap1, tc, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathReadStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc,
+ Configuration config
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and
+ ap.getHead() = tc and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ cc = mid.getCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable0(
+ PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap,
+ Configuration config
+ ) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ }
+
+ pragma[nomagic]
+ private predicate partialPathOutOfCallable1(
+ PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ partialPathOutOfCallable0(mid, pos, innercc, ap, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnKindExt kind, DataFlowCall call |
+ partialPathOutOfCallable1(mid, call, kind, cc, ap, config)
+ |
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathIntoArg(
+ PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathIntoCallable0(
+ PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc,
+ DataFlowCall call, PartialAccessPath ap, Configuration config
+ ) {
+ partialPathIntoArg(mid, i, outercc, call, ap, config) and
+ callable = resolveCall(call, outercc)
+ }
+
+ private predicate partialPathIntoCallable(
+ PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc,
+ TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(int i, DataFlowCallable callable |
+ partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and
+ p.isParameterOf(callable, i) and
+ sc1 = TSummaryCtx1Param(p) and
+ sc2 = TSummaryCtx2Some(ap)
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate paramFlowsThroughInPartialPath(
+ ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid, RetNodeEx ret |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp()
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 |
+ partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and
+ paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config)
+ )
+ }
+
+ private predicate partialPathThroughCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, ReturnKindExt kind |
+ partialPathThroughCallable0(call, mid, kind, cc, ap, config) and
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ private predicate revPartialPathStep(
+ PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
+ RevPartialAccessPath ap, Configuration config
+ ) {
+ localFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ jumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ revPartialPathReadStep(mid, _, _, node, ap) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(RevPartialAccessPath ap0, Content c |
+ revPartialPathStoreStep(mid, ap0, c, node, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsRev(ap, c, ap0, config)
+ )
+ or
+ exists(ParamNodeEx p |
+ mid.getNodeEx() = p and
+ viableParamArgEx(_, p, node) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ or
+ exists(ReturnPosition pos |
+ revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and
+ pos = getReturnPosition(node.asNode())
+ )
+ or
+ revPartialPathThroughCallable(mid, node, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ pragma[inline]
+ private predicate revPartialPathReadStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node,
+ RevPartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ read(node, c, midNode, mid.getConfiguration()) and
+ ap2.getHead() = c and
+ ap2.len() = unbindInt(ap1.len() + 1)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsRev(
+ RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeRev mid |
+ revPartialPathReadStep(mid, ap1, c, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathStoreStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config
+ ) {
+ exists(NodeEx midNode, TypedContent tc |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ store(node, tc, midNode, _, config) and
+ ap.getHead() = c and
+ config = mid.getConfiguration() and
+ tc.getContent() = c
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathIntoReturn(
+ PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2,
+ DataFlowCall call, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(NodeEx out |
+ mid.getNodeEx() = out and
+ viableReturnPosOutEx(call, pos, out) and
+ sc1 = TRevSummaryCtx1Some(pos) and
+ sc2 = TRevSummaryCtx2Some(ap) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathFlowsThrough(
+ int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeRev mid, ParamNodeEx p |
+ mid.getNodeEx() = p and
+ p.getPosition() = pos and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 |
+ revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and
+ revPartialPathFlowsThrough(pos, sc1, sc2, ap, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable(
+ PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, int pos |
+ revPartialPathThroughCallable0(call, mid, pos, ap, config) and
+ node.asNode().(ArgNode).argumentOf(call, pos)
+ )
+ }
+}
+
+import FlowExploration
+
+private predicate partialFlow(
+ PartialPathNode source, PartialPathNode node, Configuration configuration
+) {
+ source.getConfiguration() = configuration and
+ source.isFwdSource() and
+ node = source.getASuccessor+()
+}
+
+private predicate revPartialFlow(
+ PartialPathNode node, PartialPathNode sink, Configuration configuration
+) {
+ sink.getConfiguration() = configuration and
+ sink.isRevSink() and
+ node.getASuccessor+() = sink
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll
new file mode 100644
index 00000000000..4ca06c93362
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll
@@ -0,0 +1,4559 @@
+/**
+ * Provides an implementation of global (interprocedural) data flow. This file
+ * re-exports the local (intraprocedural) data flow analysis from
+ * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
+ * through the `Configuration` class. This file exists in several identical
+ * copies, allowing queries to use multiple `Configuration` classes that depend
+ * on each other without introducing mutual recursion among those configurations.
+ */
+
+private import DataFlowImplCommon
+private import DataFlowImplSpecific::Private
+import DataFlowImplSpecific::Public
+
+/**
+ * A configuration of interprocedural data flow analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the global data flow library must define its own unique extension
+ * of this abstract class. To create a configuration, extend this class with
+ * a subclass whose characteristic predicate is a unique singleton string.
+ * For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends DataFlow::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isBarrier`.
+ * // Optionally override `isAdditionalFlowStep`.
+ * }
+ * ```
+ * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and
+ * the edges are those data-flow steps that preserve the value of the node
+ * along with any additional edges defined by `isAdditionalFlowStep`.
+ * Specifying nodes in `isBarrier` will remove those nodes from the graph, and
+ * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going
+ * and/or out-going edges from those nodes, respectively.
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but two classes extending
+ * `DataFlow::Configuration` should never depend on each other. One of them
+ * should instead depend on a `DataFlow2::Configuration`, a
+ * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`.
+ */
+abstract class Configuration extends string {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant data flow source.
+ */
+ abstract predicate isSource(Node source);
+
+ /**
+ * Holds if `sink` is a relevant data flow sink.
+ */
+ abstract predicate isSink(Node sink);
+
+ /**
+ * Holds if data flow through `node` is prohibited. This completely removes
+ * `node` from the data flow graph.
+ */
+ predicate isBarrier(Node node) { none() }
+
+ /** Holds if data flow into `node` is prohibited. */
+ predicate isBarrierIn(Node node) { none() }
+
+ /** Holds if data flow out of `node` is prohibited. */
+ predicate isBarrierOut(Node node) { none() }
+
+ /** Holds if data flow through nodes guarded by `guard` is prohibited. */
+ predicate isBarrierGuard(BarrierGuard guard) { none() }
+
+ /**
+ * Holds if the additional flow step from `node1` to `node2` must be taken
+ * into account in the analysis.
+ */
+ predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
+
+ /**
+ * Holds if an arbitrary number of implicit read steps of content `c` may be
+ * taken at `node`.
+ */
+ predicate allowImplicitRead(Node node, Content c) { none() }
+
+ /**
+ * Gets the virtual dispatch branching limit when calculating field flow.
+ * This can be overridden to a smaller value to improve performance (a
+ * value of 0 disables field flow), or a larger value to get more results.
+ */
+ int fieldFlowBranchLimit() { result = 2 }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ */
+ predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) }
+
+ /**
+ * Holds if data may flow from `source` to `sink` for this configuration.
+ *
+ * The corresponding paths are generated from the end-points and the graph
+ * included in the module `PathGraph`.
+ */
+ predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowTo(Node sink) { hasFlow(_, sink) }
+
+ /**
+ * Holds if data may flow from some source to `sink` for this configuration.
+ */
+ predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) }
+
+ /**
+ * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
+ * measured in approximate number of interprocedural steps.
+ */
+ int explorationLimit() { none() }
+
+ /**
+ * Holds if there is a partial data flow path from `source` to `node`. The
+ * approximate distance between `node` and the closest source is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards sink definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sources is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ */
+ final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) {
+ partialFlow(source, node, this) and
+ dist = node.getSourceDistance()
+ }
+
+ /**
+ * Holds if there is a partial data flow path from `node` to `sink`. The
+ * approximate distance between `node` and the closest sink is `dist` and
+ * is restricted to be less than or equal to `explorationLimit()`. This
+ * predicate completely disregards source definitions.
+ *
+ * This predicate is intended for data-flow exploration and debugging and may
+ * perform poorly if the number of sinks is too big and/or the exploration
+ * limit is set too high without using barriers.
+ *
+ * This predicate is disabled (has no results) by default. Override
+ * `explorationLimit()` with a suitable number to enable this predicate.
+ *
+ * To use this in a `path-problem` query, import the module `PartialPathGraph`.
+ *
+ * Note that reverse flow has slightly lower precision than the corresponding
+ * forward flow, as reverse flow disregards type pruning among other features.
+ */
+ final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) {
+ revPartialFlow(node, sink, this) and
+ dist = node.getSinkDistance()
+ }
+}
+
+/**
+ * This class exists to prevent mutual recursion between the user-overridden
+ * member predicates of `Configuration` and the rest of the data-flow library.
+ * Good performance cannot be guaranteed in the presence of such recursion, so
+ * it should be replaced by using more than one copy of the data flow library.
+ */
+abstract private class ConfigurationRecursionPrevention extends Configuration {
+ bindingset[this]
+ ConfigurationRecursionPrevention() { any() }
+
+ override predicate hasFlow(Node source, Node sink) {
+ strictcount(Node n | this.isSource(n)) < 0
+ or
+ strictcount(Node n | this.isSink(n)) < 0
+ or
+ strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
+ or
+ super.hasFlow(source, sink)
+ }
+}
+
+private newtype TNodeEx =
+ TNodeNormal(Node n) or
+ TNodeImplicitRead(Node n, boolean hasRead) {
+ any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true]
+ }
+
+private class NodeEx extends TNodeEx {
+ string toString() {
+ result = this.asNode().toString()
+ or
+ exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]")
+ }
+
+ Node asNode() { this = TNodeNormal(result) }
+
+ predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) }
+
+ Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) }
+
+ pragma[nomagic]
+ private DataFlowCallable getEnclosingCallable0() {
+ nodeEnclosingCallable(this.projectToNode(), result)
+ }
+
+ pragma[inline]
+ DataFlowCallable getEnclosingCallable() {
+ pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result)
+ }
+
+ pragma[nomagic]
+ private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) }
+
+ pragma[inline]
+ DataFlowType getDataFlowType() {
+ pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result)
+ }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+private class ArgNodeEx extends NodeEx {
+ ArgNodeEx() { this.asNode() instanceof ArgNode }
+}
+
+private class ParamNodeEx extends NodeEx {
+ ParamNodeEx() { this.asNode() instanceof ParamNode }
+
+ predicate isParameterOf(DataFlowCallable c, int i) {
+ this.asNode().(ParamNode).isParameterOf(c, i)
+ }
+
+ int getPosition() { this.isParameterOf(_, result) }
+}
+
+private class RetNodeEx extends NodeEx {
+ RetNodeEx() { this.asNode() instanceof ReturnNodeExt }
+
+ ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) }
+
+ ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
+}
+
+private predicate inBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierIn(n) and
+ config.isSource(n)
+ )
+}
+
+private predicate outBarrier(NodeEx node, Configuration config) {
+ exists(Node n |
+ node.asNode() = n and
+ config.isBarrierOut(n) and
+ config.isSink(n)
+ )
+}
+
+private predicate fullBarrier(NodeEx node, Configuration config) {
+ exists(Node n | node.asNode() = n |
+ config.isBarrier(n)
+ or
+ config.isBarrierIn(n) and
+ not config.isSource(n)
+ or
+ config.isBarrierOut(n) and
+ not config.isSink(n)
+ or
+ exists(BarrierGuard g |
+ config.isBarrierGuard(g) and
+ n = g.getAGuardedNode()
+ )
+ )
+}
+
+pragma[nomagic]
+private predicate sourceNode(NodeEx node, Configuration config) { config.isSource(node.asNode()) }
+
+pragma[nomagic]
+private predicate sinkNode(NodeEx node, Configuration config) { config.isSink(node.asNode()) }
+
+/**
+ * Holds if data can flow in one local step from `node1` to `node2`.
+ */
+private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ simpleLocalFlowStepExt(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.asNode() = n and
+ node2.isImplicitReadNode(n, false)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` does not jump between callables.
+ */
+private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+ or
+ exists(Node n |
+ config.allowImplicitRead(n, _) and
+ node1.isImplicitReadNode(n, true) and
+ node2.asNode() = n
+ )
+}
+
+/**
+ * Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
+ */
+private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ jumpStepCached(n1, n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+/**
+ * Holds if the additional step from `node1` to `node2` jumps between callables.
+ */
+private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) {
+ exists(Node n1, Node n2 |
+ node1.asNode() = n1 and
+ node2.asNode() = n2 and
+ config.isAdditionalFlowStep(n1, n2) and
+ getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
+ not outBarrier(node1, config) and
+ not inBarrier(node2, config) and
+ not fullBarrier(node1, config) and
+ not fullBarrier(node2, config)
+ )
+}
+
+private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ read(node1.asNode(), c, node2.asNode())
+ or
+ exists(Node n |
+ node2.isImplicitReadNode(n, true) and
+ node1.isImplicitReadNode(n, _) and
+ config.allowImplicitRead(n, c)
+ )
+}
+
+private predicate store(
+ NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config
+) {
+ store(node1.asNode(), tc, node2.asNode(), contentType) and
+ read(_, tc.getContent(), _, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) {
+ viableReturnPosOut(call, pos, out.asNode())
+}
+
+pragma[nomagic]
+private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) {
+ viableParamArg(call, p.asNode(), arg.asNode())
+}
+
+/**
+ * Holds if field flow should be used for the given configuration.
+ */
+private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 }
+
+private module Stage1 {
+ class ApApprox = Unit;
+
+ class Ap = Unit;
+
+ class ApOption = Unit;
+
+ class Cc = boolean;
+
+ /* Begin: Stage 1 logic. */
+ /**
+ * Holds if `node` is reachable from a source in the configuration `config`.
+ *
+ * The Boolean `cc` records whether the node is reached through an
+ * argument in a call.
+ */
+ predicate fwdFlow(NodeEx node, Cc cc, Configuration config) {
+ not fullBarrier(node, config) and
+ (
+ sourceNode(node, config) and
+ cc = false
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ localFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ additionalLocalFlowStep(mid, node, config)
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ jumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, config) and
+ additionalJumpStep(mid, node, config) and
+ cc = false
+ )
+ or
+ // store
+ exists(NodeEx mid |
+ useFieldFlow(config) and
+ fwdFlow(mid, cc, config) and
+ store(mid, _, node, _, config) and
+ not outBarrier(mid, config)
+ )
+ or
+ // read
+ exists(Content c |
+ fwdFlowRead(c, node, cc, config) and
+ fwdFlowConsCand(c, config) and
+ not inBarrier(node, config)
+ )
+ or
+ // flow into a callable
+ exists(NodeEx arg |
+ fwdFlow(arg, _, config) and
+ viableParamArgEx(_, node, arg) and
+ cc = true
+ )
+ or
+ // flow out of a callable
+ exists(DataFlowCall call |
+ fwdFlowOut(call, node, false, config) and
+ cc = false
+ or
+ fwdFlowOutFromArg(call, node, config) and
+ fwdFlowIsEntered(call, cc, config)
+ )
+ )
+ }
+
+ private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(Content c, NodeEx node, Cc cc, Configuration config) {
+ exists(NodeEx mid |
+ fwdFlow(mid, cc, config) and
+ read(mid, c, node, config)
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a store in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node, TypedContent tc |
+ not fullBarrier(node, config) and
+ useFieldFlow(config) and
+ fwdFlow(mid, _, config) and
+ store(mid, tc, node, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) {
+ exists(RetNodeEx ret |
+ fwdFlow(ret, cc, config) and
+ ret.getReturnPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) {
+ exists(ReturnPosition pos |
+ fwdFlowReturnPosition(pos, cc, config) and
+ viableReturnPosOutEx(call, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) {
+ fwdFlowOut(call, out, true, config)
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) {
+ exists(ArgNodeEx arg |
+ fwdFlow(arg, cc, config) and
+ viableParamArgEx(call, _, arg)
+ )
+ }
+
+ /**
+ * Holds if `node` is part of a path from a source to a sink in the
+ * configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from
+ * the enclosing callable in order to reach a sink.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, Configuration config) {
+ revFlow0(node, toReturn, config) and
+ fwdFlow(node, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) {
+ fwdFlow(node, config) and
+ sinkNode(node, config) and
+ toReturn = false
+ or
+ exists(NodeEx mid |
+ localFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ additionalLocalFlowStep(node, mid, config) and
+ revFlow(mid, toReturn, config)
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ exists(NodeEx mid |
+ additionalJumpStep(node, mid, config) and
+ revFlow(mid, _, config) and
+ toReturn = false
+ )
+ or
+ // store
+ exists(Content c |
+ revFlowStore(c, node, toReturn, config) and
+ revFlowConsCand(c, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Content c |
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(mid, toReturn, pragma[only_bind_into](config))
+ )
+ or
+ // flow into a callable
+ exists(DataFlowCall call |
+ revFlowIn(call, node, false, config) and
+ toReturn = false
+ or
+ revFlowInToReturn(call, node, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ or
+ // flow out of a callable
+ exists(ReturnPosition pos |
+ revFlowOut(pos, config) and
+ node.(RetNodeEx).getReturnPosition() = pos and
+ toReturn = true
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of a read in the flow covered by `revFlow`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Content c, Configuration config) {
+ exists(NodeEx mid, NodeEx node |
+ fwdFlow(node, pragma[only_bind_into](config)) and
+ read(node, c, mid, config) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) {
+ exists(NodeEx mid, TypedContent tc |
+ revFlow(mid, toReturn, pragma[only_bind_into](config)) and
+ fwdFlowConsCand(c, pragma[only_bind_into](config)) and
+ store(node, tc, mid, _, config) and
+ c = tc.getContent()
+ )
+ }
+
+ /**
+ * Holds if `c` is the target of both a read and a store in the flow covered
+ * by `revFlow`.
+ */
+ private predicate revFlowIsReadAndStored(Content c, Configuration conf) {
+ revFlowConsCand(c, conf) and
+ revFlowStore(c, _, _, conf)
+ }
+
+ pragma[nomagic]
+ predicate viableReturnPosOutNodeCandFwd1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+ ) {
+ fwdFlowReturnPosition(pos, _, config) and
+ viableReturnPosOutEx(call, pos, out)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(ReturnPosition pos, Configuration config) {
+ exists(DataFlowCall call, NodeEx out |
+ revFlow(out, _, config) and
+ viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate viableParamArgNodeCandFwd1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+ ) {
+ viableParamArgEx(call, p, arg) and
+ fwdFlow(arg, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlowIn(
+ DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ revFlow(p, toReturn, config) and
+ viableParamArgNodeCandFwd1(call, p, arg, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) {
+ revFlowIn(call, arg, true, config)
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) {
+ exists(NodeEx out |
+ revFlow(out, toReturn, config) and
+ fwdFlowOutFromArg(call, out, config)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Content c |
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(node2, pragma[only_bind_into](config)) and
+ store(node1, tc, node2, contentType, config) and
+ c = tc.getContent() and
+ exists(ap1)
+ )
+ }
+
+ pragma[nomagic]
+ predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) {
+ revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and
+ revFlow(n2, pragma[only_bind_into](config)) and
+ read(n1, c, n2, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) }
+
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow(node, toReturn, config) and exists(returnAp) and exists(ap)
+ }
+
+ private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
+ revFlow(node, true, config) and
+ fwdFlow(node, true, config) and
+ not inBarrier(node, config) and
+ not outBarrier(node, config)
+ }
+
+ /** Holds if flow may return from `callable`. */
+ pragma[nomagic]
+ private predicate returnFlowCallableNodeCand(
+ DataFlowCallable callable, ReturnKindExt kind, Configuration config
+ ) {
+ exists(RetNodeEx ret |
+ throughFlowNodeCand(ret, config) and
+ callable = ret.getEnclosingCallable() and
+ kind = ret.getKind()
+ )
+ }
+
+ /**
+ * Holds if flow may enter through `p` and reach a return node making `p` a
+ * candidate for the origin of a summary.
+ */
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(ReturnKindExt kind |
+ throughFlowNodeCand(p, config) and
+ returnFlowCallableNodeCand(c, kind, config) and
+ p.getEnclosingCallable() = c and
+ exists(ap) and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition()
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(ArgNodeEx arg, boolean toReturn |
+ revFlow(arg, toReturn, config) and
+ revFlowInToReturn(call, arg, config) and
+ revFlowIsReturned(call, toReturn, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, config)) and
+ fields = count(Content f0 | fwdFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, config)) and
+ fields = count(Content f0 | revFlowConsCand(f0, config)) and
+ conscand = -1 and
+ tuples = count(NodeEx n, boolean b | revFlow(n, b, config))
+ }
+ /* End: Stage 1 logic. */
+}
+
+pragma[noinline]
+private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ localFlowStep(node1, node2, config)
+}
+
+pragma[noinline]
+private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) {
+ Stage1::revFlow(node2, config) and
+ additionalLocalFlowStep(node1, node2, config)
+}
+
+pragma[nomagic]
+private predicate viableReturnPosOutNodeCand1(
+ DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config
+) {
+ Stage1::revFlow(out, config) and
+ Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config)
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config
+) {
+ viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
+ Stage1::revFlow(ret, config) and
+ not outBarrier(ret, config) and
+ not inBarrier(out, config)
+}
+
+pragma[nomagic]
+private predicate viableParamArgNodeCand1(
+ DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config
+) {
+ Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and
+ Stage1::revFlow(arg, config)
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config
+) {
+ viableParamArgNodeCand1(call, p, arg, config) and
+ Stage1::revFlow(p, config) and
+ not outBarrier(arg, config) and
+ not inBarrier(p, config)
+}
+
+/**
+ * Gets the amount of forward branching on the origin of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int branch(NodeEx n1, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf)
+ )
+}
+
+/**
+ * Gets the amount of backward branching on the target of a cross-call path
+ * edge in the graph of paths between sources and sinks that ignores call
+ * contexts.
+ */
+private int join(NodeEx n2, Configuration conf) {
+ result =
+ strictcount(NodeEx n |
+ flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf)
+ )
+}
+
+/**
+ * Holds if data can flow out of `call` from `ret` to `out`, either
+ * through a `ReturnNode` or through an argument that has been mutated, and
+ * that this step is part of a path from a source to a sink. The
+ * `allowsFieldFlow` flag indicates whether the branching is within the limit
+ * specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand1(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, ret, out, config) and
+ exists(int b, int j |
+ b = branch(ret, config) and
+ j = join(out, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+/**
+ * Holds if data can flow into `call` and that this step is part of a
+ * path from a source to a sink. The `allowsFieldFlow` flag indicates whether
+ * the branching is within the limit specified by the configuration.
+ */
+pragma[nomagic]
+private predicate flowIntoCallNodeCand1(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+) {
+ flowIntoCallNodeCand1(call, arg, p, config) and
+ exists(int b, int j |
+ b = branch(arg, config) and
+ j = join(p, config) and
+ if b.minimum(j) <= config.fieldFlowBranchLimit()
+ then allowsFieldFlow = true
+ else allowsFieldFlow = false
+ )
+}
+
+private module Stage2 {
+ module PrevStage = Stage1;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = boolean;
+
+ class ApNil extends Ap {
+ ApNil() { this = false }
+ }
+
+ bindingset[result, ap]
+ private ApApprox getApprox(Ap ap) { any() }
+
+ private ApNil getApNil(NodeEx node) { PrevStage::revFlow(node, _) and exists(result) }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) }
+
+ pragma[inline]
+ private Content getHeadContent(Ap ap) { exists(result) and ap = true }
+
+ class ApOption = BooleanOption;
+
+ ApOption apNone() { result = TBooleanNone() }
+
+ ApOption apSome(Ap ap) { result = TBooleanSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSiteDispatch(call, c)
+ then result = TSpecificCall(call)
+ else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ (
+ preservesValue = true and
+ localFlowStepNodeCand1(node1, node2, config)
+ or
+ preservesValue = false and
+ additionalLocalFlowStepNodeCand1(node1, node2, config)
+ ) and
+ exists(ap) and
+ exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand1/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand1/5;
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 2 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 2 logic. */
+}
+
+pragma[nomagic]
+private predicate flowOutOfCallNodeCand2(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+) {
+ flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+pragma[nomagic]
+private predicate flowIntoCallNodeCand2(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+) {
+ flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node1, pragma[only_bind_into](config))
+}
+
+private module LocalFlowBigStep {
+ /**
+ * A node where some checking is required, and hence the big-step relation
+ * is not allowed to step over.
+ */
+ private class FlowCheckNode extends NodeEx {
+ FlowCheckNode() {
+ castNode(this.asNode()) or
+ clearsContentCached(this.asNode(), _)
+ }
+ }
+
+ /**
+ * Holds if `node` can be the first node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ predicate localFlowEntry(NodeEx node, Configuration config) {
+ Stage2::revFlow(node, config) and
+ (
+ sourceNode(node, config) or
+ jumpStep(_, node, config) or
+ additionalJumpStep(_, node, config) or
+ node instanceof ParamNodeEx or
+ node.asNode() instanceof OutNodeExt or
+ store(_, _, node, _, config) or
+ read(_, _, node, config) or
+ node instanceof FlowCheckNode
+ )
+ }
+
+ /**
+ * Holds if `node` can be the last node in a maximal subsequence of local
+ * flow steps in a dataflow path.
+ */
+ private predicate localFlowExit(NodeEx node, Configuration config) {
+ exists(NodeEx next | Stage2::revFlow(next, config) |
+ jumpStep(node, next, config) or
+ additionalJumpStep(node, next, config) or
+ flowIntoCallNodeCand1(_, node, next, config) or
+ flowOutOfCallNodeCand1(_, node, next, config) or
+ store(node, _, next, _, config) or
+ read(node, _, next, config)
+ )
+ or
+ node instanceof FlowCheckNode
+ or
+ sinkNode(node, config)
+ }
+
+ pragma[noinline]
+ private predicate additionalLocalFlowStepNodeCand2(
+ NodeEx node1, NodeEx node2, Configuration config
+ ) {
+ additionalLocalFlowStepNodeCand1(node1, node2, config) and
+ Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and
+ Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if the local path from `node1` to `node2` is a prefix of a maximal
+ * subsequence of local flow steps in a dataflow path.
+ *
+ * This is the transitive closure of `[additional]localFlowStep` beginning
+ * at `localFlowEntry`.
+ */
+ pragma[nomagic]
+ private predicate localFlowStepPlus(
+ NodeEx node1, NodeEx node2, boolean preservesValue, DataFlowType t, Configuration config,
+ LocalCallContext cc
+ ) {
+ not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ (
+ localFlowEntry(node1, pragma[only_bind_into](config)) and
+ (
+ localFlowStepNodeCand1(node1, node2, config) and
+ preservesValue = true and
+ t = node1.getDataFlowType() // irrelevant dummy value
+ or
+ additionalLocalFlowStepNodeCand2(node1, node2, config) and
+ preservesValue = false and
+ t = node2.getDataFlowType()
+ ) and
+ node1 != node2 and
+ cc.relevantFor(node1.getEnclosingCallable()) and
+ not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and
+ localFlowStepNodeCand1(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ or
+ exists(NodeEx mid |
+ localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and
+ additionalLocalFlowStepNodeCand2(mid, node2, config) and
+ not mid instanceof FlowCheckNode and
+ preservesValue = false and
+ t = node2.getDataFlowType() and
+ Stage2::revFlow(node2, pragma[only_bind_into](config))
+ )
+ )
+ }
+
+ /**
+ * Holds if `node1` can step to `node2` in one or more local steps and this
+ * path can occur as a maximal subsequence of local steps in a dataflow path.
+ */
+ pragma[nomagic]
+ predicate localFlowBigStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, AccessPathFrontNil apf,
+ Configuration config, LocalCallContext callContext
+ ) {
+ localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and
+ localFlowExit(node2, config)
+ }
+}
+
+private import LocalFlowBigStep
+
+private module Stage3 {
+ module PrevStage = Stage2;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathFront;
+
+ class ApNil = AccessPathFrontNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathFrontOption;
+
+ ApOption apNone() { result = TAccessPathFrontNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) }
+
+ class Cc = boolean;
+
+ class CcCall extends Cc {
+ CcCall() { this = true }
+
+ /** Holds if this call context may be `call`. */
+ predicate matchesCall(DataFlowCall call) { any() }
+ }
+
+ class CcNoCall extends Cc {
+ CcNoCall() { this = false }
+ }
+
+ Cc ccNone() { result = false }
+
+ private class LocalCc = Unit;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc)
+ }
+
+ private predicate flowOutOfCall = flowOutOfCallNodeCand2/5;
+
+ private predicate flowIntoCall = flowIntoCallNodeCand2/5;
+
+ pragma[nomagic]
+ private predicate clear(NodeEx node, Ap ap) { ap.isClearedAt(node.asNode()) }
+
+ pragma[nomagic]
+ private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) {
+ not clear(node, ap) and
+ if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()
+ }
+
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) {
+ // We need to typecheck stores here, since reverse flow through a getter
+ // might have a different type here compared to inside the getter.
+ compatibleTypes(ap.getType(), contentType)
+ }
+
+ /* Begin: Stage 3 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 3 logic. */
+}
+
+/**
+ * Holds if `argApf` is recorded as the summary context for flow reaching `node`
+ * and remains relevant for the following pruning stage.
+ */
+private predicate flowCandSummaryCtx(NodeEx node, AccessPathFront argApf, Configuration config) {
+ exists(AccessPathFront apf |
+ Stage3::revFlow(node, true, _, apf, config) and
+ Stage3::fwdFlow(node, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation with the head `tc` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) {
+ exists(int tails, int nodes, int apLimit, int tupleLimit |
+ tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and
+ nodes =
+ strictcount(NodeEx n |
+ Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ or
+ flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config)
+ ) and
+ accessPathApproxCostLimits(apLimit, tupleLimit) and
+ apLimit < tails and
+ tupleLimit < (tails - 1) * nodes and
+ not tc.forceHighPrecision()
+ )
+}
+
+private newtype TAccessPathApprox =
+ TNil(DataFlowType t) or
+ TConsNil(TypedContent tc, DataFlowType t) {
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ not expensiveLen2unfolding(tc, _)
+ } or
+ TConsCons(TypedContent tc1, TypedContent tc2, int len) {
+ Stage3::consCand(tc1, TFrontHead(tc2), _) and
+ len in [2 .. accessPathLimit()] and
+ not expensiveLen2unfolding(tc1, _)
+ } or
+ TCons1(TypedContent tc, int len) {
+ len in [1 .. accessPathLimit()] and
+ expensiveLen2unfolding(tc, _)
+ }
+
+/**
+ * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only
+ * the first two elements of the list and its length are tracked. If data flows
+ * from a source to a given node with a given `AccessPathApprox`, this indicates
+ * the sequence of dereference operations needed to get from the value in the node
+ * to the tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPathApprox extends TAccessPathApprox {
+ abstract string toString();
+
+ abstract TypedContent getHead();
+
+ abstract int len();
+
+ abstract DataFlowType getType();
+
+ abstract AccessPathFront getFront();
+
+ /** Gets the access path obtained by popping `head` from this path, if any. */
+ abstract AccessPathApprox pop(TypedContent head);
+}
+
+private class AccessPathApproxNil extends AccessPathApprox, TNil {
+ private DataFlowType t;
+
+ AccessPathApproxNil() { this = TNil(t) }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+
+ override TypedContent getHead() { none() }
+
+ override int len() { result = 0 }
+
+ override DataFlowType getType() { result = t }
+
+ override AccessPathFront getFront() { result = TFrontNil(t) }
+
+ override AccessPathApprox pop(TypedContent head) { none() }
+}
+
+abstract private class AccessPathApproxCons extends AccessPathApprox { }
+
+private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil {
+ private TypedContent tc;
+ private DataFlowType t;
+
+ AccessPathApproxConsNil() { this = TConsNil(tc, t) }
+
+ override string toString() {
+ // The `concat` becomes "" if `ppReprType` has no result.
+ result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t))
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = 1 }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) }
+}
+
+private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons {
+ private TypedContent tc1;
+ private TypedContent tc2;
+ private int len;
+
+ AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + tc1.toString() + ", " + tc2.toString() + "]"
+ else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc1 }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc1.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc1) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc1 and
+ (
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ }
+}
+
+private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 {
+ private TypedContent tc;
+ private int len;
+
+ AccessPathApproxCons1() { this = TCons1(tc, len) }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ }
+
+ override TypedContent getHead() { result = tc }
+
+ override int len() { result = len }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override AccessPathFront getFront() { result = TFrontHead(tc) }
+
+ override AccessPathApprox pop(TypedContent head) {
+ head = tc and
+ (
+ exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) |
+ result = TConsCons(tc2, _, len - 1)
+ or
+ len = 2 and
+ result = TConsNil(tc2, _)
+ or
+ result = TCons1(tc2, len - 1)
+ )
+ or
+ exists(DataFlowType t |
+ len = 1 and
+ Stage3::consCand(tc, TFrontNil(t), _) and
+ result = TNil(t)
+ )
+ )
+ }
+}
+
+/** Gets the access path obtained by popping `tc` from `ap`, if any. */
+private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) }
+
+/** Gets the access path obtained by pushing `tc` onto `ap`. */
+private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) }
+
+private newtype TAccessPathApproxOption =
+ TAccessPathApproxNone() or
+ TAccessPathApproxSome(AccessPathApprox apa)
+
+private class AccessPathApproxOption extends TAccessPathApproxOption {
+ string toString() {
+ this = TAccessPathApproxNone() and result = ""
+ or
+ this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString()))
+ }
+}
+
+private module Stage4 {
+ module PrevStage = Stage3;
+
+ class ApApprox = PrevStage::Ap;
+
+ class Ap = AccessPathApprox;
+
+ class ApNil = AccessPathApproxNil;
+
+ private ApApprox getApprox(Ap ap) { result = ap.getFront() }
+
+ private ApNil getApNil(NodeEx node) {
+ PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType())
+ }
+
+ bindingset[tc, tail]
+ private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) }
+
+ pragma[noinline]
+ private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() }
+
+ class ApOption = AccessPathApproxOption;
+
+ ApOption apNone() { result = TAccessPathApproxNone() }
+
+ ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) }
+
+ class Cc = CallContext;
+
+ class CcCall = CallContextCall;
+
+ class CcNoCall = CallContextNoCall;
+
+ Cc ccNone() { result instanceof CallContextAny }
+
+ private class LocalCc = LocalCallContext;
+
+ bindingset[call, c, outercc]
+ private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
+ checkCallContextCall(outercc, call, c) and
+ if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
+ }
+
+ bindingset[call, c, innercc]
+ private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+ checkCallContextReturn(innercc, c, call) and
+ if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
+ }
+
+ bindingset[node, cc, config]
+ private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
+ localFlowEntry(node, config) and
+ result =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ node.getEnclosingCallable())
+ }
+
+ private predicate localStep(
+ NodeEx node1, NodeEx node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc
+ ) {
+ localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc)
+ }
+
+ pragma[nomagic]
+ private predicate flowOutOfCall(
+ DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ pragma[nomagic]
+ private predicate flowIntoCall(
+ DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow,
+ Configuration config
+ ) {
+ flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and
+ PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config))
+ }
+
+ bindingset[node, ap]
+ private predicate filter(NodeEx node, Ap ap) { any() }
+
+ // Type checking is not necessary here as it has already been done in stage 3.
+ bindingset[ap, contentType]
+ private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() }
+
+ /* Begin: Stage 4 logic. */
+ private predicate flowCand(NodeEx node, ApApprox apa, Configuration config) {
+ PrevStage::revFlow(node, _, _, apa, config)
+ }
+
+ bindingset[result, apa]
+ private ApApprox unbindApa(ApApprox apa) {
+ exists(ApApprox apa0 |
+ apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughOutOfCall(
+ DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and
+ PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _,
+ pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` is reachable with access path `ap` from a source in the
+ * configuration `config`.
+ *
+ * The call context `cc` records whether the node is reached through an
+ * argument in a call, and if so, `argAp` records the access path of that
+ * argument.
+ */
+ pragma[nomagic]
+ predicate fwdFlow(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ fwdFlow0(node, cc, argAp, ap, config) and
+ flowCand(node, unbindApa(getApprox(ap)), config) and
+ filter(node, ap)
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlow0(NodeEx node, Cc cc, ApOption argAp, Ap ap, Configuration config) {
+ flowCand(node, _, config) and
+ sourceNode(node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ or
+ exists(NodeEx mid, Ap ap0, LocalCc localCc |
+ fwdFlow(mid, cc, argAp, ap0, config) and
+ localCc = getLocalCc(mid, cc, config)
+ |
+ localStep(mid, node, true, _, config, localCc) and
+ ap = ap0
+ or
+ localStep(mid, node, false, ap, config, localCc) and
+ ap0 instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ jumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and
+ flowCand(node, _, pragma[only_bind_into](config)) and
+ additionalJumpStep(mid, node, config) and
+ cc = ccNone() and
+ argAp = apNone() and
+ ap = getApNil(node)
+ )
+ or
+ // store
+ exists(TypedContent tc, Ap ap0 |
+ fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and
+ ap = apCons(tc, ap0)
+ )
+ or
+ // read
+ exists(Ap ap0, Content c |
+ fwdFlowRead(ap0, c, _, node, cc, argAp, config) and
+ fwdFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // flow into a callable
+ exists(ApApprox apa |
+ fwdFlowIn(_, node, _, cc, _, ap, config) and
+ apa = getApprox(ap) and
+ if PrevStage::parameterMayFlowThrough(node, _, apa, config)
+ then argAp = apSome(ap)
+ else argAp = apNone()
+ )
+ or
+ // flow out of a callable
+ fwdFlowOutNotFromArg(node, cc, argAp, ap, config)
+ or
+ exists(DataFlowCall call, Ap argAp0 |
+ fwdFlowOutFromArg(call, node, argAp0, ap, config) and
+ fwdFlowIsEntered(call, cc, argAp, argAp0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowStore(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ exists(DataFlowType contentType |
+ fwdFlow(node1, cc, argAp, ap1, config) and
+ PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and
+ typecheckStore(ap1, contentType)
+ )
+ }
+
+ /**
+ * Holds if forward flow with access path `tail` reaches a store of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(TypedContent tc |
+ fwdFlowStore(_, tail, tc, _, _, _, config) and
+ tc.getContent() = c and
+ cons = apCons(tc, tail)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowRead(
+ Ap ap, Content c, NodeEx node1, NodeEx node2, Cc cc, ApOption argAp, Configuration config
+ ) {
+ fwdFlow(node1, cc, argAp, ap, config) and
+ PrevStage::readStepCand(node1, c, node2, config) and
+ getHeadContent(ap) = c
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowIn(
+ DataFlowCall call, ParamNodeEx p, Cc outercc, Cc innercc, ApOption argAp, Ap ap,
+ Configuration config
+ ) {
+ exists(ArgNodeEx arg, boolean allowsFieldFlow |
+ fwdFlow(arg, outercc, argAp, ap, config) and
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutNotFromArg(
+ NodeEx out, Cc ccOut, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(
+ DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc,
+ DataFlowCallable inner
+ |
+ fwdFlow(ret, innercc, argAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ inner = ret.getEnclosingCallable() and
+ ccOut = getCallContextReturn(inner, call, innercc)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate fwdFlowOutFromArg(
+ DataFlowCall call, NodeEx out, Ap argAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc |
+ fwdFlow(ret, ccc, apSome(argAp), ap, config) and
+ flowThroughOutOfCall(call, ret, out, allowsFieldFlow, config) and
+ ccc.matchesCall(call)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`
+ * and data might flow through the target callable and back out at `call`.
+ */
+ pragma[nomagic]
+ private predicate fwdFlowIsEntered(
+ DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p |
+ fwdFlowIn(call, p, cc, _, argAp, ap, config) and
+ PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate storeStepFwd(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config
+ ) {
+ fwdFlowStore(node1, ap1, tc, node2, _, _, config) and
+ ap2 = apCons(tc, ap1) and
+ fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config)
+ }
+
+ private predicate readStepFwd(
+ NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config
+ ) {
+ fwdFlowRead(ap1, c, n1, n2, _, _, config) and
+ fwdFlowConsCand(ap1, c, ap2, config)
+ }
+
+ pragma[nomagic]
+ private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) {
+ exists(Ap argAp0, NodeEx out, Cc cc, ApOption argAp, Ap ap |
+ fwdFlow(out, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap,
+ pragma[only_bind_into](config)) and
+ fwdFlowOutFromArg(call, out, argAp0, ap, config) and
+ fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc),
+ pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0),
+ pragma[only_bind_into](config))
+ )
+ }
+
+ pragma[nomagic]
+ private predicate flowThroughIntoCall(
+ DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config
+ ) {
+ flowIntoCall(call, arg, p, allowsFieldFlow, config) and
+ fwdFlow(arg, _, _, _, pragma[only_bind_into](config)) and
+ PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and
+ callMayFlowThroughFwd(call, pragma[only_bind_into](config))
+ }
+
+ /**
+ * Holds if `node` with access path `ap` is part of a path from a source to a
+ * sink in the configuration `config`.
+ *
+ * The Boolean `toReturn` records whether the node must be returned from the
+ * enclosing callable in order to reach a sink, and if so, `returnAp` records
+ * the access path of the returned value.
+ */
+ pragma[nomagic]
+ predicate revFlow(NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) {
+ revFlow0(node, toReturn, returnAp, ap, config) and
+ fwdFlow(node, _, _, ap, config)
+ }
+
+ pragma[nomagic]
+ private predicate revFlow0(
+ NodeEx node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ fwdFlow(node, _, _, ap, config) and
+ sinkNode(node, config) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ or
+ exists(NodeEx mid |
+ localStep(node, mid, true, _, config, _) and
+ revFlow(mid, toReturn, returnAp, ap, config)
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ localStep(node, mid, false, _, config, _) and
+ revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and
+ ap instanceof ApNil
+ )
+ or
+ exists(NodeEx mid |
+ jumpStep(node, mid, config) and
+ revFlow(mid, _, _, ap, config) and
+ toReturn = false and
+ returnAp = apNone()
+ )
+ or
+ exists(NodeEx mid, ApNil nil |
+ fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and
+ additionalJumpStep(node, mid, config) and
+ revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and
+ toReturn = false and
+ returnAp = apNone() and
+ ap instanceof ApNil
+ )
+ or
+ // store
+ exists(Ap ap0, Content c |
+ revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and
+ revFlowConsCand(ap0, c, ap, config)
+ )
+ or
+ // read
+ exists(NodeEx mid, Ap ap0 |
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ readStepFwd(node, ap, _, mid, ap0, config)
+ )
+ or
+ // flow into a callable
+ revFlowInNotToReturn(node, returnAp, ap, config) and
+ toReturn = false
+ or
+ exists(DataFlowCall call, Ap returnAp0 |
+ revFlowInToReturn(call, node, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ or
+ // flow out of a callable
+ revFlowOut(_, node, _, _, ap, config) and
+ toReturn = true and
+ if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config)
+ then returnAp = apSome(ap)
+ else returnAp = apNone()
+ }
+
+ pragma[nomagic]
+ private predicate revFlowStore(
+ Ap ap0, Content c, Ap ap, NodeEx node, TypedContent tc, NodeEx mid, boolean toReturn,
+ ApOption returnAp, Configuration config
+ ) {
+ revFlow(mid, toReturn, returnAp, ap0, config) and
+ storeStepFwd(node, ap, tc, mid, ap0, config) and
+ tc.getContent() = c
+ }
+
+ /**
+ * Holds if reverse flow with access path `tail` reaches a read of `c`
+ * resulting in access path `cons`.
+ */
+ pragma[nomagic]
+ private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) {
+ exists(NodeEx mid, Ap tail0 |
+ revFlow(mid, _, _, tail, config) and
+ tail = pragma[only_bind_into](tail0) and
+ readStepFwd(_, cons, c, mid, tail0, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowOut(
+ DataFlowCall call, RetNodeEx ret, boolean toReturn, ApOption returnAp, Ap ap,
+ Configuration config
+ ) {
+ exists(NodeEx out, boolean allowsFieldFlow |
+ revFlow(out, toReturn, returnAp, ap, config) and
+ flowOutOfCall(call, ret, out, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInNotToReturn(
+ ArgNodeEx arg, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, false, returnAp, ap, config) and
+ flowIntoCall(_, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revFlowInToReturn(
+ DataFlowCall call, ArgNodeEx arg, Ap returnAp, Ap ap, Configuration config
+ ) {
+ exists(ParamNodeEx p, boolean allowsFieldFlow |
+ revFlow(p, true, apSome(returnAp), ap, config) and
+ flowThroughIntoCall(call, arg, p, allowsFieldFlow, config)
+ |
+ ap instanceof ApNil or allowsFieldFlow = true
+ )
+ }
+
+ /**
+ * Holds if an output from `call` is reached in the flow covered by `revFlow`
+ * and data might flow through the target callable resulting in reverse flow
+ * reaching an argument of `call`.
+ */
+ pragma[nomagic]
+ private predicate revFlowIsReturned(
+ DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config
+ ) {
+ exists(RetNodeEx ret, CcCall ccc |
+ revFlowOut(call, ret, toReturn, returnAp, ap, config) and
+ fwdFlow(ret, ccc, apSome(_), ap, config) and
+ ccc.matchesCall(call)
+ )
+ }
+
+ pragma[nomagic]
+ predicate storeStepCand(
+ NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType,
+ Configuration config
+ ) {
+ exists(Ap ap2, Content c |
+ store(node1, tc, node2, contentType, config) and
+ revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and
+ revFlowConsCand(ap2, c, ap1, config)
+ )
+ }
+
+ predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) {
+ exists(Ap ap1, Ap ap2 |
+ revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and
+ readStepFwd(node1, ap1, c, node2, ap2, config) and
+ revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _,
+ pragma[only_bind_into](config))
+ )
+ }
+
+ predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, config) }
+
+ private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepFwd(_, ap, tc, _, _, config)
+ }
+
+ predicate consCand(TypedContent tc, Ap ap, Configuration config) {
+ storeStepCand(_, ap, tc, _, _, config)
+ }
+
+ pragma[noinline]
+ private predicate parameterFlow(
+ ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config
+ ) {
+ revFlow(p, true, apSome(ap0), ap, config) and
+ c = p.getEnclosingCallable()
+ }
+
+ predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) {
+ exists(RetNodeEx ret, Ap ap0, ReturnKindExt kind, int pos |
+ parameterFlow(p, ap, ap0, c, config) and
+ c = ret.getEnclosingCallable() and
+ revFlow(pragma[only_bind_into](ret), true, apSome(_), pragma[only_bind_into](ap0),
+ pragma[only_bind_into](config)) and
+ fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and
+ kind = ret.getKind() and
+ p.getPosition() = pos and
+ // we don't expect a parameter to return stored in itself
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+ }
+
+ pragma[nomagic]
+ predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) {
+ exists(Ap returnAp0, ArgNodeEx arg, boolean toReturn, ApOption returnAp, Ap ap |
+ revFlow(arg, toReturn, returnAp, ap, config) and
+ revFlowInToReturn(call, arg, returnAp0, ap, config) and
+ revFlowIsReturned(call, toReturn, returnAp, returnAp0, config)
+ )
+ }
+
+ predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) {
+ fwd = true and
+ nodes = count(NodeEx node | fwdFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and
+ tuples = count(NodeEx n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config))
+ or
+ fwd = false and
+ nodes = count(NodeEx node | revFlow(node, _, _, _, config)) and
+ fields = count(TypedContent f0 | consCand(f0, _, config)) and
+ conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and
+ tuples = count(NodeEx n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config))
+ }
+ /* End: Stage 4 logic. */
+}
+
+bindingset[conf, result]
+private Configuration unbindConf(Configuration conf) {
+ exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c))
+}
+
+private predicate nodeMayUseSummary(NodeEx n, AccessPathApprox apa, Configuration config) {
+ exists(DataFlowCallable c, AccessPathApprox apa0 |
+ Stage4::parameterMayFlowThrough(_, c, apa, _) and
+ Stage4::revFlow(n, true, _, apa0, config) and
+ Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and
+ n.getEnclosingCallable() = c
+ )
+}
+
+private newtype TSummaryCtx =
+ TSummaryCtxNone() or
+ TSummaryCtxSome(ParamNodeEx p, AccessPath ap) {
+ Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _)
+ }
+
+/**
+ * A context for generating flow summaries. This represents flow entry through
+ * a specific parameter with an access path of a specific shape.
+ *
+ * Summaries are only created for parameters that may flow through.
+ */
+abstract private class SummaryCtx extends TSummaryCtx {
+ abstract string toString();
+}
+
+/** A summary context from which no flow summary can be generated. */
+private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone {
+ override string toString() { result = "" }
+}
+
+/** A summary context from which a flow summary can be generated. */
+private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome {
+ private ParamNodeEx p;
+ private AccessPath ap;
+
+ SummaryCtxSome() { this = TSummaryCtxSome(p, ap) }
+
+ int getParameterPos() { p.isParameterOf(_, result) }
+
+ override string toString() { result = p + ": " + ap }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * Gets the number of length 2 access path approximations that correspond to `apa`.
+ */
+private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) {
+ exists(TypedContent tc, int len |
+ tc = apa.getHead() and
+ len = apa.len() and
+ result =
+ strictcount(AccessPathFront apf |
+ Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1),
+ config)
+ )
+ )
+}
+
+private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) {
+ result =
+ strictcount(NodeEx n |
+ Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)
+ )
+}
+
+/**
+ * Holds if a length 2 access path approximation matching `apa` is expected
+ * to be expensive.
+ */
+private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) {
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = count1to2unfold(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ apLimit < aps and
+ tupleLimit < (aps - 1) * nodes
+ )
+}
+
+private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) {
+ exists(TypedContent head |
+ apa.pop(head) = result and
+ Stage4::consCand(head, result, config)
+ )
+}
+
+/**
+ * Holds with `unfold = false` if a precise head-tail representation of `apa` is
+ * expected to be expensive. Holds with `unfold = true` otherwise.
+ */
+private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) {
+ if apa.getHead().forceHighPrecision()
+ then unfold = true
+ else
+ exists(int aps, int nodes, int apLimit, int tupleLimit |
+ aps = countPotentialAps(apa, config) and
+ nodes = countNodesUsingAccessPath(apa, config) and
+ accessPathCostLimits(apLimit, tupleLimit) and
+ if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true
+ )
+}
+
+/**
+ * Gets the number of `AccessPath`s that correspond to `apa`.
+ */
+private int countAps(AccessPathApprox apa, Configuration config) {
+ evalUnfold(apa, false, config) and
+ result = 1 and
+ (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config))
+ or
+ evalUnfold(apa, false, config) and
+ result = count1to2unfold(apa, config) and
+ not expensiveLen1to2unfolding(apa, config)
+ or
+ evalUnfold(apa, true, config) and
+ result = countPotentialAps(apa, config)
+}
+
+/**
+ * Gets the number of `AccessPath`s that would correspond to `apa` assuming
+ * that it is expanded to a precise head-tail representation.
+ */
+language[monotonicAggregates]
+private int countPotentialAps(AccessPathApprox apa, Configuration config) {
+ apa instanceof AccessPathApproxNil and result = 1
+ or
+ result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config))
+}
+
+private newtype TAccessPath =
+ TAccessPathNil(DataFlowType t) or
+ TAccessPathCons(TypedContent head, AccessPath tail) {
+ exists(AccessPathApproxCons apa |
+ not evalUnfold(apa, false, _) and
+ head = apa.getHead() and
+ tail.getApprox() = getATail(apa, _)
+ )
+ } or
+ TAccessPathCons2(TypedContent head1, TypedContent head2, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ not expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head1 = apa.getHead() and
+ head2 = getATail(apa, _).getHead()
+ )
+ } or
+ TAccessPathCons1(TypedContent head, int len) {
+ exists(AccessPathApproxCons apa |
+ evalUnfold(apa, false, _) and
+ expensiveLen1to2unfolding(apa, _) and
+ apa.len() = len and
+ head = apa.getHead()
+ )
+ }
+
+private newtype TPathNode =
+ TPathNodeMid(NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) {
+ // A PathNode is introduced by a source ...
+ Stage4::revFlow(node, config) and
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ // ... or a step from an existing PathNode to another node.
+ exists(PathNodeMid mid |
+ pathStep(mid, node, cc, sc, ap) and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config))
+ )
+ } or
+ TPathNodeSink(NodeEx node, Configuration config) {
+ sinkNode(node, pragma[only_bind_into](config)) and
+ Stage4::revFlow(node, pragma[only_bind_into](config)) and
+ (
+ // A sink that is also a source ...
+ sourceNode(node, config)
+ or
+ // ... or a sink that can be reached from a source
+ exists(PathNodeMid mid |
+ pathStep(mid, node, _, _, TAccessPathNil(_)) and
+ pragma[only_bind_into](config) = mid.getConfiguration()
+ )
+ )
+ }
+
+/**
+ * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a
+ * source to a given node with a given `AccessPath`, this indicates the sequence
+ * of dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+abstract private class AccessPath extends TAccessPath {
+ /** Gets the head of this access path, if any. */
+ abstract TypedContent getHead();
+
+ /** Gets the tail of this access path, if any. */
+ abstract AccessPath getTail();
+
+ /** Gets the front of this access path. */
+ abstract AccessPathFront getFront();
+
+ /** Gets the approximation of this access path. */
+ abstract AccessPathApprox getApprox();
+
+ /** Gets the length of this access path. */
+ abstract int length();
+
+ /** Gets a textual representation of this access path. */
+ abstract string toString();
+
+ /** Gets the access path obtained by popping `tc` from this access path, if any. */
+ final AccessPath pop(TypedContent tc) {
+ result = this.getTail() and
+ tc = this.getHead()
+ }
+
+ /** Gets the access path obtained by pushing `tc` onto this access path. */
+ final AccessPath push(TypedContent tc) { this = result.pop(tc) }
+}
+
+private class AccessPathNil extends AccessPath, TAccessPathNil {
+ private DataFlowType t;
+
+ AccessPathNil() { this = TAccessPathNil(t) }
+
+ DataFlowType getType() { result = t }
+
+ override TypedContent getHead() { none() }
+
+ override AccessPath getTail() { none() }
+
+ override AccessPathFrontNil getFront() { result = TFrontNil(t) }
+
+ override AccessPathApproxNil getApprox() { result = TNil(t) }
+
+ override int length() { result = 0 }
+
+ override string toString() { result = concat(": " + ppReprType(t)) }
+}
+
+private class AccessPathCons extends AccessPath, TAccessPathCons {
+ private TypedContent head;
+ private AccessPath tail;
+
+ AccessPathCons() { this = TAccessPathCons(head, tail) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() { result = tail }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsNil(head, tail.(AccessPathNil).getType())
+ or
+ result = TConsCons(head, tail.getHead(), this.length())
+ or
+ result = TCons1(head, this.length())
+ }
+
+ override int length() { result = 1 + tail.length() }
+
+ private string toStringImpl(boolean needsSuffix) {
+ exists(DataFlowType t |
+ tail = TAccessPathNil(t) and
+ needsSuffix = false and
+ result = head.toString() + "]" + concat(" : " + ppReprType(t))
+ )
+ or
+ result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix)
+ or
+ exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) |
+ result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false
+ )
+ or
+ exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) |
+ result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true
+ or
+ result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false
+ )
+ }
+
+ override string toString() {
+ result = "[" + this.toStringImpl(true) + length().toString() + ")]"
+ or
+ result = "[" + this.toStringImpl(false)
+ }
+}
+
+private class AccessPathCons2 extends AccessPath, TAccessPathCons2 {
+ private TypedContent head1;
+ private TypedContent head2;
+ private int len;
+
+ AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) }
+
+ override TypedContent getHead() { result = head1 }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head1, result.getApprox(), _) and
+ result.getHead() = head2 and
+ result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head1) }
+
+ override AccessPathApproxCons getApprox() {
+ result = TConsCons(head1, head2, len) or
+ result = TCons1(head1, len)
+ }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 2
+ then result = "[" + head1.toString() + ", " + head2.toString() + "]"
+ else
+ result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+private class AccessPathCons1 extends AccessPath, TAccessPathCons1 {
+ private TypedContent head;
+ private int len;
+
+ AccessPathCons1() { this = TAccessPathCons1(head, len) }
+
+ override TypedContent getHead() { result = head }
+
+ override AccessPath getTail() {
+ Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1
+ }
+
+ override AccessPathFrontHead getFront() { result = TFrontHead(head) }
+
+ override AccessPathApproxCons getApprox() { result = TCons1(head, len) }
+
+ override int length() { result = len }
+
+ override string toString() {
+ if len = 1
+ then result = "[" + head.toString() + "]"
+ else result = "[" + head.toString() + ", ... (" + len.toString() + ")]"
+ }
+}
+
+/**
+ * A `Node` augmented with a call context (except for sinks), an access path, and a configuration.
+ * Only those `PathNode`s that are reachable from a source are generated.
+ */
+class PathNode extends TPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { none() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() { none() }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ private PathNode getASuccessorIfHidden() {
+ this.(PathNodeImpl).isHidden() and
+ result = this.(PathNodeImpl).getASuccessorImpl()
+ }
+
+ /** Gets a successor of this node, if any. */
+ final PathNode getASuccessor() {
+ result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and
+ not this.(PathNodeImpl).isHidden() and
+ not result.(PathNodeImpl).isHidden()
+ }
+
+ /** Holds if this node is a source. */
+ predicate isSource() { none() }
+}
+
+abstract private class PathNodeImpl extends PathNode {
+ abstract PathNode getASuccessorImpl();
+
+ abstract NodeEx getNodeEx();
+
+ predicate isHidden() {
+ hiddenNode(this.getNodeEx().asNode()) and
+ not this.isSource() and
+ not this instanceof PathNodeSink
+ or
+ this.getNodeEx() instanceof TNodeImplicitRead
+ }
+
+ private string ppAp() {
+ this instanceof PathNodeSink and result = ""
+ or
+ exists(string s | s = this.(PathNodeMid).getAp().toString() |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ this instanceof PathNodeSink and result = ""
+ or
+ result = " <" + this.(PathNodeMid).getCallContext().toString() + ">"
+ }
+
+ override string toString() { result = this.getNodeEx().toString() + ppAp() }
+
+ override string toStringWithContext() { result = this.getNodeEx().toString() + ppAp() + ppCtx() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** Holds if `n` can reach a sink. */
+private predicate directReach(PathNode n) {
+ n instanceof PathNodeSink or directReach(n.getASuccessor())
+}
+
+/** Holds if `n` can reach a sink or is used in a subpath. */
+private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) }
+
+/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */
+private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and directReach(n2) }
+
+private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2)
+
+/**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+module PathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
+
+ /** Holds if `n` is a node in the graph of data flow path explanations. */
+ query predicate nodes(PathNode n, string key, string val) {
+ reach(n) and key = "semmle.label" and val = n.toString()
+ }
+
+ query predicate subpaths = Subpaths::subpaths/4;
+}
+
+/**
+ * An intermediate flow graph node. This is a triple consisting of a `Node`,
+ * a `CallContext`, and a `Configuration`.
+ */
+private class PathNodeMid extends PathNodeImpl, TPathNodeMid {
+ NodeEx node;
+ CallContext cc;
+ SummaryCtx sc;
+ AccessPath ap;
+ Configuration config;
+
+ PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ SummaryCtx getSummaryCtx() { result = sc }
+
+ AccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ private PathNodeMid getSuccMid() {
+ pathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx(),
+ result.getAp()) and
+ result.getConfiguration() = unbindConf(this.getConfiguration())
+ }
+
+ override PathNodeImpl getASuccessorImpl() {
+ // an intermediate step to another intermediate node
+ result = getSuccMid()
+ or
+ // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges
+ exists(PathNodeMid mid, PathNodeSink sink |
+ mid = getSuccMid() and
+ mid.getNodeEx() = sink.getNodeEx() and
+ mid.getAp() instanceof AccessPathNil and
+ sink.getConfiguration() = unbindConf(mid.getConfiguration()) and
+ result = sink
+ )
+ }
+
+ override predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap instanceof AccessPathNil
+ }
+}
+
+/**
+ * A flow graph node corresponding to a sink. This is disjoint from the
+ * intermediate nodes in order to uniquely correspond to a given sink by
+ * excluding the `CallContext`.
+ */
+private class PathNodeSink extends PathNodeImpl, TPathNodeSink {
+ NodeEx node;
+ Configuration config;
+
+ PathNodeSink() { this = TPathNodeSink(node, config) }
+
+ override NodeEx getNodeEx() { result = node }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PathNode getASuccessorImpl() { none() }
+
+ override predicate isSource() { sourceNode(node, config) }
+}
+
+/**
+ * Holds if data may flow from `mid` to `node`. The last step in or out of
+ * a callable is recorded by `cc`.
+ */
+private predicate pathStep(
+ PathNodeMid mid, NodeEx node, CallContext cc, SummaryCtx sc, AccessPath ap
+) {
+ exists(AccessPath ap0, NodeEx midnode, Configuration conf, LocalCallContext localCC |
+ midnode = mid.getNodeEx() and
+ conf = mid.getConfiguration() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ localCC =
+ getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
+ midnode.getEnclosingCallable()) and
+ ap0 = mid.getAp()
+ |
+ localFlowBigStep(midnode, node, true, _, conf, localCC) and
+ ap = ap0
+ or
+ localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and
+ ap0 instanceof AccessPathNil
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ ap = mid.getAp()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and
+ cc instanceof CallContextAny and
+ sc instanceof SummaryCtxNone and
+ mid.getAp() instanceof AccessPathNil and
+ ap = TAccessPathNil(node.getDataFlowType())
+ or
+ exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and
+ sc = mid.getSummaryCtx()
+ or
+ pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp()
+ or
+ pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone
+ or
+ pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx()
+}
+
+pragma[nomagic]
+private predicate pathReadStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ tc = ap0.getHead() and
+ Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+pragma[nomagic]
+private predicate pathStoreStep(
+ PathNodeMid mid, NodeEx node, AccessPath ap0, TypedContent tc, CallContext cc
+) {
+ ap0 = mid.getAp() and
+ Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and
+ cc = mid.getCallContext()
+}
+
+private predicate pathOutOfCallable0(
+ PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa,
+ Configuration config
+) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ apa = mid.getAp().getApprox() and
+ config = mid.getConfiguration()
+}
+
+pragma[nomagic]
+private predicate pathOutOfCallable1(
+ PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ pathOutOfCallable0(mid, pos, innercc, apa, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+}
+
+pragma[noinline]
+private NodeEx getAnOutNodeFlow(
+ ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config
+) {
+ result.asNode() = kind.getAnOutNode(call) and
+ Stage4::revFlow(result, _, _, apa, config)
+}
+
+/**
+ * Holds if data may flow from `mid` to `out`. The last step of this path
+ * is a return from a callable and is recorded by `cc`, if needed.
+ */
+pragma[noinline]
+private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, CallContext cc) {
+ exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config |
+ pathOutOfCallable1(mid, call, kind, cc, apa, config) and
+ out = getAnOutNodeFlow(kind, call, apa, config)
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`.
+ */
+pragma[noinline]
+private predicate pathIntoArg(
+ PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa
+) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ apa = ap.getApprox()
+ )
+}
+
+pragma[noinline]
+private predicate parameterCand(
+ DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config
+) {
+ exists(ParamNodeEx p |
+ Stage4::revFlow(p, _, _, apa, config) and
+ p.isParameterOf(callable, i)
+ )
+}
+
+pragma[nomagic]
+private predicate pathIntoCallable0(
+ PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call,
+ AccessPath ap
+) {
+ exists(AccessPathApprox apa |
+ pathIntoArg(mid, i, outercc, call, ap, apa) and
+ callable = resolveCall(call, outercc) and
+ parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration())
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` to `p` through `call`. The contexts
+ * before and after entering the callable are `outercc` and `innercc`,
+ * respectively.
+ */
+private predicate pathIntoCallable(
+ PathNodeMid mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc, SummaryCtx sc,
+ DataFlowCall call
+) {
+ exists(int i, DataFlowCallable callable, AccessPath ap |
+ pathIntoCallable0(mid, callable, i, outercc, call, ap) and
+ p.isParameterOf(callable, i) and
+ (
+ sc = TSummaryCtxSome(p, ap)
+ or
+ not exists(TSummaryCtxSome(p, ap)) and
+ sc = TSummaryCtxNone()
+ )
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+}
+
+/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */
+pragma[nomagic]
+private predicate paramFlowsThrough(
+ ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa,
+ Configuration config
+) {
+ exists(PathNodeMid mid, RetNodeEx ret, int pos |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc = mid.getSummaryCtx() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp() and
+ apa = ap.getApprox() and
+ pos = sc.getParameterPos() and
+ not kind.(ParamUpdateReturnKind).getPosition() = pos
+ )
+}
+
+pragma[nomagic]
+private predicate pathThroughCallable0(
+ DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap,
+ AccessPathApprox apa
+) {
+ exists(CallContext innercc, SummaryCtx sc |
+ pathIntoCallable(mid, _, cc, innercc, sc, call) and
+ paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+/**
+ * Holds if data may flow from `mid` through a callable to the node `out`.
+ * The context `cc` is restored to its value prior to entering the callable.
+ */
+pragma[noinline]
+private predicate pathThroughCallable(PathNodeMid mid, NodeEx out, CallContext cc, AccessPath ap) {
+ exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa |
+ pathThroughCallable0(call, mid, kind, cc, ap, apa) and
+ out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration()))
+ )
+}
+
+private module Subpaths {
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths01(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ pathThroughCallable(arg, out, _, pragma[only_bind_into](apout)) and
+ pathIntoCallable(arg, par, _, innercc, sc, _) and
+ paramFlowsThrough(kind, innercc, sc, pragma[only_bind_into](apout), _,
+ unbindConf(arg.getConfiguration()))
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by
+ * `kind`, `sc`, `apout`, and `innercc`.
+ */
+ pragma[nomagic]
+ private predicate subpaths02(
+ PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind,
+ NodeEx out, AccessPath apout
+ ) {
+ subpaths01(arg, par, sc, innercc, kind, out, apout) and
+ out.asNode() = kind.getAnOutNode(_)
+ }
+
+ pragma[nomagic]
+ private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple.
+ */
+ pragma[nomagic]
+ private predicate subpaths03(
+ PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, AccessPath apout
+ ) {
+ exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode |
+ subpaths02(arg, par, sc, innercc, kind, out, apout) and
+ ret.getNodeEx() = retnode and
+ kind = retnode.getKind() and
+ innercc = ret.getCallContext() and
+ sc = ret.getSummaryCtx() and
+ ret.getConfiguration() = unbindConf(getPathNodeConf(arg)) and
+ apout = ret.getAp() and
+ not ret.isHidden()
+ )
+ }
+
+ /**
+ * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
+ * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
+ * `ret -> out` is summarized as the edge `arg -> out`.
+ */
+ predicate subpaths(PathNode arg, PathNodeImpl par, PathNodeMid ret, PathNodeMid out) {
+ exists(ParamNodeEx p, NodeEx o, AccessPath apout |
+ pragma[only_bind_into](arg).getASuccessor() = par and
+ pragma[only_bind_into](arg).getASuccessor() = out and
+ subpaths03(arg, p, ret, o, apout) and
+ par.getNodeEx() = p and
+ out.getNodeEx() = o and
+ out.getAp() = apout
+ )
+ }
+
+ /**
+ * Holds if `n` can reach a return node in a summarized subpath.
+ */
+ predicate retReach(PathNode n) {
+ subpaths(_, _, n, _)
+ or
+ exists(PathNode mid |
+ retReach(mid) and
+ n.getASuccessor() = mid and
+ not subpaths(_, mid, _, _)
+ )
+ }
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+private predicate flowsTo(
+ PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration
+) {
+ flowsource.isSource() and
+ flowsource.getConfiguration() = configuration and
+ flowsource.(PathNodeImpl).getNodeEx().asNode() = source and
+ (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and
+ flowsink.getNodeEx().asNode() = sink
+}
+
+/**
+ * Holds if data can flow (inter-procedurally) from `source` to `sink`.
+ *
+ * Will only have results if `configuration` has non-empty sources and
+ * sinks.
+ */
+predicate flowsTo(Node source, Node sink, Configuration configuration) {
+ flowsTo(_, _, source, sink, configuration)
+}
+
+private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) {
+ fwd = true and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and
+ tuples = count(PathNode pn)
+ or
+ fwd = false and
+ nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and
+ fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and
+ conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and
+ tuples = count(PathNode pn | reach(pn))
+}
+
+/**
+ * INTERNAL: Only for debugging.
+ *
+ * Calculates per-stage metrics for data flow.
+ */
+predicate stageStats(
+ int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config
+) {
+ stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config)
+ or
+ stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config)
+ or
+ stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples)
+ or
+ stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples)
+}
+
+private module FlowExploration {
+ private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) {
+ exists(NodeEx node1, NodeEx node2 |
+ jumpStep(node1, node2, config)
+ or
+ additionalJumpStep(node1, node2, config)
+ or
+ // flow into callable
+ viableParamArgEx(_, node2, node1)
+ or
+ // flow out of a callable
+ viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2)
+ |
+ c1 = node1.getEnclosingCallable() and
+ c2 = node2.getEnclosingCallable() and
+ c1 != c2
+ )
+ }
+
+ private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSrc(mid, config) and callableStep(mid, c, config)
+ )
+ }
+
+ private predicate interestingCallableSink(DataFlowCallable c, Configuration config) {
+ exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n))
+ or
+ exists(DataFlowCallable mid |
+ interestingCallableSink(mid, config) and callableStep(c, mid, config)
+ )
+ }
+
+ private newtype TCallableExt =
+ TCallable(DataFlowCallable c, Configuration config) {
+ interestingCallableSrc(c, config) or
+ interestingCallableSink(c, config)
+ } or
+ TCallableSrc() or
+ TCallableSink()
+
+ private predicate callableExtSrc(TCallableSrc src) { any() }
+
+ private predicate callableExtSink(TCallableSink sink) { any() }
+
+ private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) {
+ exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config |
+ callableStep(c1, c2, config) and
+ ce1 = TCallable(c1, pragma[only_bind_into](config)) and
+ ce2 = TCallable(c2, pragma[only_bind_into](config))
+ )
+ or
+ exists(Node n, Configuration config |
+ ce1 = TCallableSrc() and
+ config.isSource(n) and
+ ce2 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ or
+ exists(Node n, Configuration config |
+ ce2 = TCallableSink() and
+ config.isSink(n) and
+ ce1 = TCallable(getNodeEnclosingCallable(n), config)
+ )
+ }
+
+ private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) {
+ callableExtStepFwd(ce2, ce1)
+ }
+
+ private int distSrcExt(TCallableExt c) =
+ shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result)
+
+ private int distSinkExt(TCallableExt c) =
+ shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result)
+
+ private int distSrc(DataFlowCallable c, Configuration config) {
+ result = distSrcExt(TCallable(c, config)) - 1
+ }
+
+ private int distSink(DataFlowCallable c, Configuration config) {
+ result = distSinkExt(TCallable(c, config)) - 1
+ }
+
+ private newtype TPartialAccessPath =
+ TPartialNil(DataFlowType t) or
+ TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first
+ * element of the list and its length are tracked. If data flows from a source to
+ * a given node with a given `AccessPath`, this indicates the sequence of
+ * dereference operations needed to get from the value in the node to the
+ * tracked object. The final type indicates the type of the tracked object.
+ */
+ private class PartialAccessPath extends TPartialAccessPath {
+ abstract string toString();
+
+ TypedContent getHead() { this = TPartialCons(result, _) }
+
+ int len() {
+ this = TPartialNil(_) and result = 0
+ or
+ this = TPartialCons(_, result)
+ }
+
+ DataFlowType getType() {
+ this = TPartialNil(result)
+ or
+ exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType())
+ }
+ }
+
+ private class PartialAccessPathNil extends PartialAccessPath, TPartialNil {
+ override string toString() {
+ exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t)))
+ }
+ }
+
+ private class PartialAccessPathCons extends PartialAccessPath, TPartialCons {
+ override string toString() {
+ exists(TypedContent tc, int len | this = TPartialCons(tc, len) |
+ if len = 1
+ then result = "[" + tc.toString() + "]"
+ else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TRevPartialAccessPath =
+ TRevPartialNil() or
+ TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] }
+
+ /**
+ * Conceptually a list of `Content`s, but only the first
+ * element of the list and its length are tracked.
+ */
+ private class RevPartialAccessPath extends TRevPartialAccessPath {
+ abstract string toString();
+
+ Content getHead() { this = TRevPartialCons(result, _) }
+
+ int len() {
+ this = TRevPartialNil() and result = 0
+ or
+ this = TRevPartialCons(_, result)
+ }
+ }
+
+ private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil {
+ override string toString() { result = "" }
+ }
+
+ private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons {
+ override string toString() {
+ exists(Content c, int len | this = TRevPartialCons(c, len) |
+ if len = 1
+ then result = "[" + c.toString() + "]"
+ else result = "[" + c.toString() + ", ... (" + len.toString() + ")]"
+ )
+ }
+ }
+
+ private newtype TSummaryCtx1 =
+ TSummaryCtx1None() or
+ TSummaryCtx1Param(ParamNodeEx p)
+
+ private newtype TSummaryCtx2 =
+ TSummaryCtx2None() or
+ TSummaryCtx2Some(PartialAccessPath ap)
+
+ private newtype TRevSummaryCtx1 =
+ TRevSummaryCtx1None() or
+ TRevSummaryCtx1Some(ReturnPosition pos)
+
+ private newtype TRevSummaryCtx2 =
+ TRevSummaryCtx2None() or
+ TRevSummaryCtx2Some(RevPartialAccessPath ap)
+
+ private newtype TPartialPathNode =
+ TPartialPathNodeFwd(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = TPartialNil(node.getDataFlowType()) and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and
+ distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ } or
+ TPartialPathNodeRev(
+ NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil() and
+ not fullBarrier(node, config) and
+ exists(config.explorationLimit())
+ or
+ exists(PartialPathNodeRev mid |
+ revPartialPathStep(mid, node, sc1, sc2, ap, config) and
+ not clearsContentCached(node.asNode(), ap.getHead()) and
+ not fullBarrier(node, config) and
+ distSink(node.getEnclosingCallable(), config) <= config.explorationLimit()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathNodeMk0(
+ NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStep(mid, node, cc, sc1, sc2, ap, config) and
+ not fullBarrier(node, config) and
+ not clearsContentCached(node.asNode(), ap.getHead().getContent()) and
+ if node.asNode() instanceof CastingNode
+ then compatibleTypes(node.getDataFlowType(), ap.getType())
+ else any()
+ )
+ }
+
+ /**
+ * A `Node` augmented with a call context, an access path, and a configuration.
+ */
+ class PartialPathNode extends TPartialPathNode {
+ /** Gets a textual representation of this element. */
+ string toString() { result = this.getNodeEx().toString() + this.ppAp() }
+
+ /**
+ * Gets a textual representation of this element, including a textual
+ * representation of the call context.
+ */
+ string toStringWithContext() {
+ result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx()
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets the underlying `Node`. */
+ final Node getNode() { this.getNodeEx().projectToNode() = result }
+
+ private NodeEx getNodeEx() {
+ result = this.(PartialPathNodeFwd).getNodeEx() or
+ result = this.(PartialPathNodeRev).getNodeEx()
+ }
+
+ /** Gets the associated configuration. */
+ Configuration getConfiguration() { none() }
+
+ /** Gets a successor of this node, if any. */
+ PartialPathNode getASuccessor() { none() }
+
+ /**
+ * Gets the approximate distance to the nearest source measured in number
+ * of interprocedural steps.
+ */
+ int getSourceDistance() {
+ result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ /**
+ * Gets the approximate distance to the nearest sink measured in number
+ * of interprocedural steps.
+ */
+ int getSinkDistance() {
+ result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration())
+ }
+
+ private string ppAp() {
+ exists(string s |
+ s = this.(PartialPathNodeFwd).getAp().toString() or
+ s = this.(PartialPathNodeRev).getAp().toString()
+ |
+ if s = "" then result = "" else result = " " + s
+ )
+ }
+
+ private string ppCtx() {
+ result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">"
+ }
+
+ /** Holds if this is a source in a forward-flow path. */
+ predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() }
+
+ /** Holds if this is a sink in a reverse-flow path. */
+ predicate isRevSink() { this.(PartialPathNodeRev).isSink() }
+ }
+
+ /**
+ * Provides the query predicates needed to include a graph in a path-problem query.
+ */
+ module PartialPathGraph {
+ /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+ query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b }
+ }
+
+ private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd {
+ NodeEx node;
+ CallContext cc;
+ TSummaryCtx1 sc1;
+ TSummaryCtx2 sc2;
+ PartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ CallContext getCallContext() { result = cc }
+
+ TSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ PartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeFwd getASuccessor() {
+ partialPathStep(this, result.getNodeEx(), result.getCallContext(), result.getSummaryCtx1(),
+ result.getSummaryCtx2(), result.getAp(), result.getConfiguration())
+ }
+
+ predicate isSource() {
+ sourceNode(node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap instanceof TPartialNil
+ }
+ }
+
+ private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev {
+ NodeEx node;
+ TRevSummaryCtx1 sc1;
+ TRevSummaryCtx2 sc2;
+ RevPartialAccessPath ap;
+ Configuration config;
+
+ PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) }
+
+ NodeEx getNodeEx() { result = node }
+
+ TRevSummaryCtx1 getSummaryCtx1() { result = sc1 }
+
+ TRevSummaryCtx2 getSummaryCtx2() { result = sc2 }
+
+ RevPartialAccessPath getAp() { result = ap }
+
+ override Configuration getConfiguration() { result = config }
+
+ override PartialPathNodeRev getASuccessor() {
+ revPartialPathStep(result, this.getNodeEx(), this.getSummaryCtx1(), this.getSummaryCtx2(),
+ this.getAp(), this.getConfiguration())
+ }
+
+ predicate isSink() {
+ sinkNode(node, config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = TRevPartialNil()
+ }
+ }
+
+ private predicate partialPathStep(
+ PartialPathNodeFwd mid, NodeEx node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and
+ (
+ localFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(mid.getNodeEx(), node, config) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ )
+ or
+ jumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(mid.getNodeEx(), node, config) and
+ cc instanceof CallContextAny and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None() and
+ mid.getAp() instanceof PartialAccessPathNil and
+ ap = TPartialNil(node.getDataFlowType()) and
+ config = mid.getConfiguration()
+ or
+ partialPathStoreStep(mid, _, _, node, ap) and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(PartialAccessPath ap0, TypedContent tc |
+ partialPathReadStep(mid, ap0, tc, node, cc, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsFwd(ap, tc, ap0, config)
+ )
+ or
+ partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config)
+ or
+ partialPathOutOfCallable(mid, node, cc, ap, config) and
+ sc1 = TSummaryCtx1None() and
+ sc2 = TSummaryCtx2None()
+ or
+ partialPathThroughCallable(mid, node, cc, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ bindingset[result, i]
+ private int unbindInt(int i) { i <= result and i >= result }
+
+ pragma[inline]
+ private predicate partialPathStoreStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node,
+ PartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode, DataFlowType contentType |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ store(midNode, tc, node, contentType, mid.getConfiguration()) and
+ ap2.getHead() = tc and
+ ap2.len() = unbindInt(ap1.len() + 1) and
+ compatibleTypes(ap1.getType(), contentType)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsFwd(
+ PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid |
+ partialPathStoreStep(mid, ap1, tc, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathReadStep(
+ PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc,
+ Configuration config
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and
+ ap.getHead() = tc and
+ pragma[only_bind_into](config) = mid.getConfiguration() and
+ cc = mid.getCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable0(
+ PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap,
+ Configuration config
+ ) {
+ pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and
+ innercc = mid.getCallContext() and
+ innercc instanceof CallContextNoCall and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ }
+
+ pragma[nomagic]
+ private predicate partialPathOutOfCallable1(
+ PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc |
+ partialPathOutOfCallable0(mid, pos, innercc, ap, config) and
+ c = pos.getCallable() and
+ kind = pos.getKind() and
+ resolveReturn(innercc, c, call)
+ |
+ if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext()
+ )
+ }
+
+ private predicate partialPathOutOfCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(ReturnKindExt kind, DataFlowCall call |
+ partialPathOutOfCallable1(mid, call, kind, cc, ap, config)
+ |
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathIntoArg(
+ PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(ArgNode arg |
+ arg = mid.getNodeEx().asNode() and
+ cc = mid.getCallContext() and
+ arg.argumentOf(call, i) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate partialPathIntoCallable0(
+ PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc,
+ DataFlowCall call, PartialAccessPath ap, Configuration config
+ ) {
+ partialPathIntoArg(mid, i, outercc, call, ap, config) and
+ callable = resolveCall(call, outercc)
+ }
+
+ private predicate partialPathIntoCallable(
+ PartialPathNodeFwd mid, ParamNodeEx p, CallContext outercc, CallContextCall innercc,
+ TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(int i, DataFlowCallable callable |
+ partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and
+ p.isParameterOf(callable, i) and
+ sc1 = TSummaryCtx1Param(p) and
+ sc2 = TSummaryCtx2Some(ap)
+ |
+ if recordDataFlowCallSite(call, callable)
+ then innercc = TSpecificCall(call)
+ else innercc = TSomeCall()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate paramFlowsThroughInPartialPath(
+ ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(PartialPathNodeFwd mid, RetNodeEx ret |
+ mid.getNodeEx() = ret and
+ kind = ret.getKind() and
+ cc = mid.getCallContext() and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration() and
+ ap = mid.getAp()
+ )
+ }
+
+ pragma[noinline]
+ private predicate partialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc,
+ PartialAccessPath ap, Configuration config
+ ) {
+ exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 |
+ partialPathIntoCallable(mid, _, cc, innercc, sc1, sc2, call, _, config) and
+ paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config)
+ )
+ }
+
+ private predicate partialPathThroughCallable(
+ PartialPathNodeFwd mid, NodeEx out, CallContext cc, PartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, ReturnKindExt kind |
+ partialPathThroughCallable0(call, mid, kind, cc, ap, config) and
+ out.asNode() = kind.getAnOutNode(call)
+ )
+ }
+
+ private predicate revPartialPathStep(
+ PartialPathNodeRev mid, NodeEx node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2,
+ RevPartialAccessPath ap, Configuration config
+ ) {
+ localFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalLocalFlowStep(node, mid.getNodeEx(), config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ jumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ or
+ additionalJumpStep(node, mid.getNodeEx(), config) and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ mid.getAp() instanceof RevPartialAccessPathNil and
+ ap = TRevPartialNil() and
+ config = mid.getConfiguration()
+ or
+ revPartialPathReadStep(mid, _, _, node, ap) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ config = mid.getConfiguration()
+ or
+ exists(RevPartialAccessPath ap0, Content c |
+ revPartialPathStoreStep(mid, ap0, c, node, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ apConsRev(ap, c, ap0, config)
+ )
+ or
+ exists(ParamNodeEx p |
+ mid.getNodeEx() = p and
+ viableParamArgEx(_, p, node) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ sc1 = TRevSummaryCtx1None() and
+ sc2 = TRevSummaryCtx2None() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ or
+ exists(ReturnPosition pos |
+ revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and
+ pos = getReturnPosition(node.asNode())
+ )
+ or
+ revPartialPathThroughCallable(mid, node, ap, config) and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2()
+ }
+
+ pragma[inline]
+ private predicate revPartialPathReadStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node,
+ RevPartialAccessPath ap2
+ ) {
+ exists(NodeEx midNode |
+ midNode = mid.getNodeEx() and
+ ap1 = mid.getAp() and
+ read(node, c, midNode, mid.getConfiguration()) and
+ ap2.getHead() = c and
+ ap2.len() = unbindInt(ap1.len() + 1)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate apConsRev(
+ RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config
+ ) {
+ exists(PartialPathNodeRev mid |
+ revPartialPathReadStep(mid, ap1, c, _, ap2) and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathStoreStep(
+ PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config
+ ) {
+ exists(NodeEx midNode, TypedContent tc |
+ midNode = mid.getNodeEx() and
+ ap = mid.getAp() and
+ store(node, tc, midNode, _, config) and
+ ap.getHead() = c and
+ config = mid.getConfiguration() and
+ tc.getContent() = c
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathIntoReturn(
+ PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2,
+ DataFlowCall call, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(NodeEx out |
+ mid.getNodeEx() = out and
+ viableReturnPosOutEx(call, pos, out) and
+ sc1 = TRevSummaryCtx1Some(pos) and
+ sc2 = TRevSummaryCtx2Some(ap) and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathFlowsThrough(
+ int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(PartialPathNodeRev mid, ParamNodeEx p |
+ mid.getNodeEx() = p and
+ p.getPosition() = pos and
+ sc1 = mid.getSummaryCtx1() and
+ sc2 = mid.getSummaryCtx2() and
+ ap = mid.getAp() and
+ config = mid.getConfiguration()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable0(
+ DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap,
+ Configuration config
+ ) {
+ exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 |
+ revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and
+ revPartialPathFlowsThrough(pos, sc1, sc2, ap, config)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate revPartialPathThroughCallable(
+ PartialPathNodeRev mid, ArgNodeEx node, RevPartialAccessPath ap, Configuration config
+ ) {
+ exists(DataFlowCall call, int pos |
+ revPartialPathThroughCallable0(call, mid, pos, ap, config) and
+ node.asNode().(ArgNode).argumentOf(call, pos)
+ )
+ }
+}
+
+import FlowExploration
+
+private predicate partialFlow(
+ PartialPathNode source, PartialPathNode node, Configuration configuration
+) {
+ source.getConfiguration() = configuration and
+ source.isFwdSource() and
+ node = source.getASuccessor+()
+}
+
+private predicate revPartialFlow(
+ PartialPathNode node, PartialPathNode sink, Configuration configuration
+) {
+ sink.getConfiguration() = configuration and
+ sink.isRevSink() and
+ node.getASuccessor+() = sink
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll
new file mode 100644
index 00000000000..f43a550af57
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll
@@ -0,0 +1,1294 @@
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+import Cached
+
+/**
+ * The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion.
+ *
+ * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the
+ * estimated per-`AccessPathFront` tuple cost. Access paths exceeding both of
+ * these limits are represented with lower precision during pruning.
+ */
+predicate accessPathApproxCostLimits(int apLimit, int tupleLimit) {
+ apLimit = 10 and
+ tupleLimit = 10000
+}
+
+/**
+ * The cost limits for the `AccessPathApprox` to `AccessPath` expansion.
+ *
+ * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the
+ * estimated per-`AccessPathApprox` tuple cost. Access paths exceeding both of
+ * these limits are represented with lower precision.
+ */
+predicate accessPathCostLimits(int apLimit, int tupleLimit) {
+ apLimit = 5 and
+ tupleLimit = 1000
+}
+
+/**
+ * Provides a simple data-flow analysis for resolving lambda calls. The analysis
+ * currently excludes read-steps, store-steps, and flow-through.
+ *
+ * The analysis uses non-linear recursion: When computing a flow path in or out
+ * of a call, we use the results of the analysis recursively to resolve lambda
+ * calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
+ */
+private module LambdaFlow {
+ private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
+ p.isParameterOf(viableCallable(call), i)
+ }
+
+ private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
+ p.isParameterOf(viableCallableLambda(call, _), i)
+ }
+
+ private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
+ exists(int i |
+ viableParamNonLambda(call, i, p) and
+ arg.argumentOf(call, i)
+ )
+ }
+
+ private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
+ exists(int i |
+ viableParamLambda(call, i, p) and
+ arg.argumentOf(call, i)
+ )
+ }
+
+ private newtype TReturnPositionSimple =
+ TReturnPositionSimple0(DataFlowCallable c, ReturnKind kind) {
+ exists(ReturnNode ret |
+ c = getNodeEnclosingCallable(ret) and
+ kind = ret.getKind()
+ )
+ }
+
+ pragma[noinline]
+ private TReturnPositionSimple getReturnPositionSimple(ReturnNode ret, ReturnKind kind) {
+ result = TReturnPositionSimple0(getNodeEnclosingCallable(ret), kind)
+ }
+
+ pragma[nomagic]
+ private TReturnPositionSimple viableReturnPosNonLambda(DataFlowCall call, ReturnKind kind) {
+ result = TReturnPositionSimple0(viableCallable(call), kind)
+ }
+
+ pragma[nomagic]
+ private TReturnPositionSimple viableReturnPosLambda(
+ DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind
+ ) {
+ result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind)
+ }
+
+ private predicate viableReturnPosOutNonLambda(
+ DataFlowCall call, TReturnPositionSimple pos, OutNode out
+ ) {
+ exists(ReturnKind kind |
+ pos = viableReturnPosNonLambda(call, kind) and
+ out = getAnOutNode(call, kind)
+ )
+ }
+
+ private predicate viableReturnPosOutLambda(
+ DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out
+ ) {
+ exists(ReturnKind kind |
+ pos = viableReturnPosLambda(call, lastCall, kind) and
+ out = getAnOutNode(call, kind)
+ )
+ }
+
+ /**
+ * Holds if data can flow (inter-procedurally) from `node` (of type `t`) to
+ * the lambda call `lambdaCall`.
+ *
+ * The parameter `toReturn` indicates whether the path from `node` to
+ * `lambdaCall` goes through a return, and `toJump` whether the path goes
+ * through a jump step.
+ *
+ * The call context `lastCall` records the last call on the path from `node`
+ * to `lambdaCall`, if any. That is, `lastCall` is able to target the enclosing
+ * callable of `lambdaCall`.
+ */
+ pragma[nomagic]
+ predicate revLambdaFlow(
+ DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
+ boolean toJump, DataFlowCallOption lastCall
+ ) {
+ revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
+ if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode
+ then compatibleTypes(t, getNodeDataFlowType(node))
+ else any()
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlow0(
+ DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn,
+ boolean toJump, DataFlowCallOption lastCall
+ ) {
+ lambdaCall(lambdaCall, kind, node) and
+ t = getNodeDataFlowType(node) and
+ toReturn = false and
+ toJump = false and
+ lastCall = TDataFlowCallNone()
+ or
+ // local flow
+ exists(Node mid, DataFlowType t0 |
+ revLambdaFlow(lambdaCall, kind, mid, t0, toReturn, toJump, lastCall)
+ |
+ simpleLocalFlowStep(node, mid) and
+ t = t0
+ or
+ exists(boolean preservesValue |
+ additionalLambdaFlowStep(node, mid, preservesValue) and
+ getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
+ |
+ preservesValue = false and
+ t = getNodeDataFlowType(node)
+ or
+ preservesValue = true and
+ t = t0
+ )
+ )
+ or
+ // jump step
+ exists(Node mid, DataFlowType t0 |
+ revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and
+ toReturn = false and
+ toJump = true and
+ lastCall = TDataFlowCallNone()
+ |
+ jumpStepCached(node, mid) and
+ t = t0
+ or
+ exists(boolean preservesValue |
+ additionalLambdaFlowStep(node, mid, preservesValue) and
+ getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
+ |
+ preservesValue = false and
+ t = getNodeDataFlowType(node)
+ or
+ preservesValue = true and
+ t = t0
+ )
+ )
+ or
+ // flow into a callable
+ exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call |
+ revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
+ (
+ if lastCall0 = TDataFlowCallNone() and toJump = false
+ then lastCall = TDataFlowCallSome(call)
+ else lastCall = lastCall0
+ ) and
+ toReturn = false
+ |
+ viableParamArgNonLambda(call, p, node)
+ or
+ viableParamArgLambda(call, p, node) // non-linear recursion
+ )
+ or
+ // flow out of a callable
+ exists(TReturnPositionSimple pos |
+ revLambdaFlowOut(lambdaCall, kind, pos, t, toJump, lastCall) and
+ getReturnPositionSimple(node, node.(ReturnNode).getKind()) = pos and
+ toReturn = true
+ )
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlowOutLambdaCall(
+ DataFlowCall lambdaCall, LambdaCallKind kind, OutNode out, DataFlowType t, boolean toJump,
+ DataFlowCall call, DataFlowCallOption lastCall
+ ) {
+ revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
+ exists(ReturnKindExt rk |
+ out = rk.getAnOutNode(call) and
+ lambdaCall(call, _, _)
+ )
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlowOut(
+ DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t,
+ boolean toJump, DataFlowCallOption lastCall
+ ) {
+ exists(DataFlowCall call, OutNode out |
+ revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and
+ viableReturnPosOutNonLambda(call, pos, out)
+ or
+ // non-linear recursion
+ revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and
+ viableReturnPosOutLambda(call, _, pos, out)
+ )
+ }
+
+ pragma[nomagic]
+ predicate revLambdaFlowIn(
+ DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump,
+ DataFlowCallOption lastCall
+ ) {
+ revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
+ }
+}
+
+private DataFlowCallable viableCallableExt(DataFlowCall call) {
+ result = viableCallable(call)
+ or
+ result = viableCallableLambda(call, _)
+}
+
+cached
+private module Cached {
+ /**
+ * If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
+ * force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
+ * collapsing the two stages.
+ */
+ cached
+ predicate forceCachingInSameStage() { any() }
+
+ cached
+ predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
+
+ cached
+ predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
+ c = call.getEnclosingCallable()
+ }
+
+ cached
+ predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) }
+
+ cached
+ predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
+
+ cached
+ predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
+
+ cached
+ predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
+
+ cached
+ predicate outNodeExt(Node n) {
+ n instanceof OutNode
+ or
+ n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode
+ }
+
+ cached
+ predicate hiddenNode(Node n) { nodeIsHidden(n) }
+
+ cached
+ OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) {
+ result = getAnOutNode(call, k.(ValueReturnKind).getKind())
+ or
+ exists(ArgNode arg |
+ result.(PostUpdateNode).getPreUpdateNode() = arg and
+ arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
+ )
+ }
+
+ cached
+ predicate returnNodeExt(Node n, ReturnKindExt k) {
+ k = TValueReturn(n.(ReturnNode).getKind())
+ or
+ exists(ParamNode p, int pos |
+ parameterValueFlowsToPreUpdate(p, n) and
+ p.isParameterOf(_, pos) and
+ k = TParamUpdate(pos)
+ )
+ }
+
+ cached
+ predicate castNode(Node n) { n instanceof CastNode }
+
+ cached
+ predicate castingNode(Node n) {
+ castNode(n) or
+ n instanceof ParamNode or
+ n instanceof OutNodeExt or
+ // For reads, `x.f`, we want to check that the tracked type after the read (which
+ // is obtained by popping the head of the access path stack) is compatible with
+ // the type of `x.f`.
+ read(_, _, n)
+ }
+
+ cached
+ predicate parameterNode(Node n, DataFlowCallable c, int i) {
+ n.(ParameterNode).isParameterOf(c, i)
+ }
+
+ cached
+ predicate argumentNode(Node n, DataFlowCall call, int pos) {
+ n.(ArgumentNode).argumentOf(call, pos)
+ }
+
+ /**
+ * Gets a viable target for the lambda call `call`.
+ *
+ * `lastCall` records the call required to reach `call` in order for the result
+ * to be a viable target, if any.
+ */
+ cached
+ DataFlowCallable viableCallableLambda(DataFlowCall call, DataFlowCallOption lastCall) {
+ exists(Node creation, LambdaCallKind kind |
+ LambdaFlow::revLambdaFlow(call, kind, creation, _, _, _, lastCall) and
+ lambdaCreation(creation, kind, result)
+ )
+ }
+
+ /**
+ * Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
+ * The instance parameter is considered to have index `-1`.
+ */
+ pragma[nomagic]
+ private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
+ p.isParameterOf(viableCallableExt(call), i)
+ }
+
+ /**
+ * Holds if `arg` is a possible argument to `p` in `call`, taking virtual
+ * dispatch into account.
+ */
+ cached
+ predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
+ exists(int i |
+ viableParam(call, i, p) and
+ arg.argumentOf(call, i) and
+ compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
+ )
+ }
+
+ pragma[nomagic]
+ private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) {
+ viableCallableExt(call) = result.getCallable() and
+ kind = result.getKind()
+ }
+
+ /**
+ * Holds if a value at return position `pos` can be returned to `out` via `call`,
+ * taking virtual dispatch into account.
+ */
+ cached
+ predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) {
+ exists(ReturnKindExt kind |
+ pos = viableReturnPos(call, kind) and
+ out = kind.getAnOutNode(call)
+ )
+ }
+
+ /** Provides predicates for calculating flow-through summaries. */
+ private module FlowThrough {
+ /**
+ * The first flow-through approximation:
+ *
+ * - Input access paths are abstracted with a Boolean parameter
+ * that indicates (non-)emptiness.
+ */
+ private module Cand {
+ /**
+ * Holds if `p` can flow to `node` in the same callable using only
+ * value-preserving steps.
+ *
+ * `read` indicates whether it is contents of `p` that can flow to `node`.
+ */
+ pragma[nomagic]
+ private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) {
+ p = node and
+ read = false
+ or
+ // local flow
+ exists(Node mid |
+ parameterValueFlowCand(p, mid, read) and
+ simpleLocalFlowStep(mid, node)
+ )
+ or
+ // read
+ exists(Node mid |
+ parameterValueFlowCand(p, mid, false) and
+ read(mid, _, node) and
+ read = true
+ )
+ or
+ // flow through: no prior read
+ exists(ArgNode arg |
+ parameterValueFlowArgCand(p, arg, false) and
+ argumentValueFlowsThroughCand(arg, node, read)
+ )
+ or
+ // flow through: no read inside method
+ exists(ArgNode arg |
+ parameterValueFlowArgCand(p, arg, read) and
+ argumentValueFlowsThroughCand(arg, node, false)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) {
+ parameterValueFlowCand(p, arg, read)
+ }
+
+ pragma[nomagic]
+ predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) {
+ parameterValueFlowCand(p, n.getPreUpdateNode(), false)
+ }
+
+ /**
+ * Holds if `p` can flow to a return node of kind `kind` in the same
+ * callable using only value-preserving steps, not taking call contexts
+ * into account.
+ *
+ * `read` indicates whether it is contents of `p` that can flow to the return
+ * node.
+ */
+ predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) {
+ exists(ReturnNode ret |
+ parameterValueFlowCand(p, ret, read) and
+ kind = ret.getKind()
+ )
+ }
+
+ pragma[nomagic]
+ private predicate argumentValueFlowsThroughCand0(
+ DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read
+ ) {
+ exists(ParamNode param | viableParamArg(call, param, arg) |
+ parameterValueFlowReturnCand(param, kind, read)
+ )
+ }
+
+ /**
+ * Holds if `arg` flows to `out` through a call using only value-preserving steps,
+ * not taking call contexts into account.
+ *
+ * `read` indicates whether it is contents of `arg` that can flow to `out`.
+ */
+ predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) {
+ exists(DataFlowCall call, ReturnKind kind |
+ argumentValueFlowsThroughCand0(call, arg, kind, read) and
+ out = getAnOutNode(call, kind)
+ )
+ }
+
+ predicate cand(ParamNode p, Node n) {
+ parameterValueFlowCand(p, n, _) and
+ (
+ parameterValueFlowReturnCand(p, _, _)
+ or
+ parameterValueFlowsToPreUpdateCand(p, _)
+ )
+ }
+ }
+
+ /**
+ * The final flow-through calculation:
+ *
+ * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`)
+ * or summarized as a single read step with before and after types recorded
+ * in the `ReadStepTypesOption` parameter.
+ * - Types are checked using the `compatibleTypes()` relation.
+ */
+ private module Final {
+ /**
+ * Holds if `p` can flow to `node` in the same callable using only
+ * value-preserving steps and possibly a single read step, not taking
+ * call contexts into account.
+ *
+ * If a read step was taken, then `read` captures the `Content`, the
+ * container type, and the content type.
+ */
+ predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) {
+ parameterValueFlow0(p, node, read) and
+ if node instanceof CastingNode
+ then
+ // normal flow through
+ read = TReadStepTypesNone() and
+ compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node))
+ or
+ // getter
+ compatibleTypes(read.getContentType(), getNodeDataFlowType(node))
+ else any()
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) {
+ p = node and
+ Cand::cand(p, _) and
+ read = TReadStepTypesNone()
+ or
+ // local flow
+ exists(Node mid |
+ parameterValueFlow(p, mid, read) and
+ simpleLocalFlowStep(mid, node)
+ )
+ or
+ // read
+ exists(Node mid |
+ parameterValueFlow(p, mid, TReadStepTypesNone()) and
+ readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
+ read.getContentType()) and
+ Cand::parameterValueFlowReturnCand(p, _, true) and
+ compatibleTypes(getNodeDataFlowType(p), read.getContainerType())
+ )
+ or
+ parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlow0_0(
+ ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read
+ ) {
+ // flow through: no prior read
+ exists(ArgNode arg |
+ parameterValueFlowArg(p, arg, mustBeNone) and
+ argumentValueFlowsThrough(arg, read, node)
+ )
+ or
+ // flow through: no read inside method
+ exists(ArgNode arg |
+ parameterValueFlowArg(p, arg, read) and
+ argumentValueFlowsThrough(arg, mustBeNone, node)
+ )
+ }
+
+ pragma[nomagic]
+ private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) {
+ parameterValueFlow(p, arg, read) and
+ Cand::argumentValueFlowsThroughCand(arg, _, _)
+ }
+
+ pragma[nomagic]
+ private predicate argumentValueFlowsThrough0(
+ DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read
+ ) {
+ exists(ParamNode param | viableParamArg(call, param, arg) |
+ parameterValueFlowReturn(param, kind, read)
+ )
+ }
+
+ /**
+ * Holds if `arg` flows to `out` through a call using only
+ * value-preserving steps and possibly a single read step, not taking
+ * call contexts into account.
+ *
+ * If a read step was taken, then `read` captures the `Content`, the
+ * container type, and the content type.
+ */
+ pragma[nomagic]
+ predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) {
+ exists(DataFlowCall call, ReturnKind kind |
+ argumentValueFlowsThrough0(call, arg, kind, read) and
+ out = getAnOutNode(call, kind)
+ |
+ // normal flow through
+ read = TReadStepTypesNone() and
+ compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out))
+ or
+ // getter
+ compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and
+ compatibleTypes(read.getContentType(), getNodeDataFlowType(out))
+ )
+ }
+
+ /**
+ * Holds if `arg` flows to `out` through a call using only
+ * value-preserving steps and a single read step, not taking call
+ * contexts into account, thus representing a getter-step.
+ */
+ predicate getterStep(ArgNode arg, Content c, Node out) {
+ argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
+ }
+
+ /**
+ * Holds if `p` can flow to a return node of kind `kind` in the same
+ * callable using only value-preserving steps and possibly a single read
+ * step.
+ *
+ * If a read step was taken, then `read` captures the `Content`, the
+ * container type, and the content type.
+ */
+ private predicate parameterValueFlowReturn(
+ ParamNode p, ReturnKind kind, ReadStepTypesOption read
+ ) {
+ exists(ReturnNode ret |
+ parameterValueFlow(p, ret, read) and
+ kind = ret.getKind()
+ )
+ }
+ }
+
+ import Final
+ }
+
+ import FlowThrough
+
+ cached
+ private module DispatchWithCallContext {
+ /**
+ * Holds if the set of viable implementations that can be called by `call`
+ * might be improved by knowing the call context.
+ */
+ pragma[nomagic]
+ private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
+ mayBenefitFromCallContext(call, callable)
+ or
+ callEnclosingCallable(call, callable) and
+ exists(viableCallableLambda(call, TDataFlowCallSome(_)))
+ }
+
+ /**
+ * Gets a viable dispatch target of `call` in the context `ctx`. This is
+ * restricted to those `call`s for which a context might make a difference.
+ */
+ pragma[nomagic]
+ private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) {
+ result = viableImplInCallContext(call, ctx)
+ or
+ result = viableCallableLambda(call, TDataFlowCallSome(ctx))
+ or
+ exists(DataFlowCallable enclosing |
+ mayBenefitFromCallContextExt(call, enclosing) and
+ enclosing = viableCallableExt(ctx) and
+ result = viableCallableLambda(call, TDataFlowCallNone())
+ )
+ }
+
+ /**
+ * Holds if the call context `ctx` reduces the set of viable run-time
+ * dispatch targets of call `call` in `c`.
+ */
+ cached
+ predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) {
+ exists(int tgts, int ctxtgts |
+ mayBenefitFromCallContextExt(call, c) and
+ c = viableCallableExt(ctx) and
+ ctxtgts = count(viableImplInCallContextExt(call, ctx)) and
+ tgts = strictcount(viableCallableExt(call)) and
+ ctxtgts < tgts
+ )
+ }
+
+ /**
+ * Gets a viable run-time dispatch target for the call `call` in the
+ * context `ctx`. This is restricted to those calls for which a context
+ * makes a difference.
+ */
+ cached
+ DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
+ result = viableImplInCallContextExt(call, ctx) and
+ reducedViableImplInCallContext(call, _, ctx)
+ }
+
+ /**
+ * Holds if flow returning from callable `c` to call `call` might return
+ * further and if this path restricts the set of call sites that can be
+ * returned to.
+ */
+ cached
+ predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) {
+ exists(int tgts, int ctxtgts |
+ mayBenefitFromCallContextExt(call, _) and
+ c = viableCallableExt(call) and
+ ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
+ tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and
+ ctxtgts < tgts
+ )
+ }
+
+ /**
+ * Gets a viable run-time dispatch target for the call `call` in the
+ * context `ctx`. This is restricted to those calls and results for which
+ * the return flow from the result to `call` restricts the possible context
+ * `ctx`.
+ */
+ cached
+ DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) {
+ result = viableImplInCallContextExt(call, ctx) and
+ reducedViableImplInReturn(result, call)
+ }
+ }
+
+ import DispatchWithCallContext
+
+ /**
+ * Holds if `p` can flow to the pre-update node associated with post-update
+ * node `n`, in the same callable, using only value-preserving steps.
+ */
+ private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) {
+ parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
+ }
+
+ private predicate store(
+ Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
+ ) {
+ storeStep(node1, c, node2) and
+ contentType = getNodeDataFlowType(node1) and
+ containerType = getNodeDataFlowType(node2)
+ or
+ exists(Node n1, Node n2 |
+ n1 = node1.(PostUpdateNode).getPreUpdateNode() and
+ n2 = node2.(PostUpdateNode).getPreUpdateNode()
+ |
+ argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
+ or
+ read(n2, c, n1) and
+ contentType = getNodeDataFlowType(n1) and
+ containerType = getNodeDataFlowType(n2)
+ )
+ }
+
+ cached
+ predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
+
+ /**
+ * Holds if data can flow from `node1` to `node2` via a direct assignment to
+ * `f`.
+ *
+ * This includes reverse steps through reads when the result of the read has
+ * been stored into, in order to handle cases like `x.f1.f2 = y`.
+ */
+ cached
+ predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) {
+ store(node1, tc.getContent(), node2, contentType, tc.getContainerType())
+ }
+
+ /**
+ * Holds if data can flow from `fromNode` to `toNode` because they are the post-update
+ * nodes of some function output and input respectively, where the output and input
+ * are aliases. A typical example is a function returning `this`, implementing a fluent
+ * interface.
+ */
+ private predicate reverseStepThroughInputOutputAlias(
+ PostUpdateNode fromNode, PostUpdateNode toNode
+ ) {
+ exists(Node fromPre, Node toPre |
+ fromPre = fromNode.getPreUpdateNode() and
+ toPre = toNode.getPreUpdateNode()
+ |
+ exists(DataFlowCall c |
+ // Does the language-specific simpleLocalFlowStep already model flow
+ // from function input to output?
+ fromPre = getAnOutNode(c, _) and
+ toPre.(ArgNode).argumentOf(c, _) and
+ simpleLocalFlowStep(toPre.(ArgNode), fromPre)
+ )
+ or
+ argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre)
+ )
+ }
+
+ cached
+ predicate simpleLocalFlowStepExt(Node node1, Node node2) {
+ simpleLocalFlowStep(node1, node2) or
+ reverseStepThroughInputOutputAlias(node1, node2)
+ }
+
+ /**
+ * Holds if the call context `call` improves virtual dispatch in `callable`.
+ */
+ cached
+ predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
+ reducedViableImplInCallContext(_, callable, call)
+ }
+
+ /**
+ * Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
+ */
+ cached
+ predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
+ exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
+ }
+
+ cached
+ newtype TCallContext =
+ TAnyCallContext() or
+ TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or
+ TSomeCall() or
+ TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) }
+
+ cached
+ newtype TReturnPosition =
+ TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) {
+ exists(ReturnNodeExt ret |
+ c = returnNodeGetEnclosingCallable(ret) and
+ kind = ret.getKind()
+ )
+ }
+
+ cached
+ newtype TLocalFlowCallContext =
+ TAnyLocalCall() or
+ TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) }
+
+ cached
+ newtype TReturnKindExt =
+ TValueReturn(ReturnKind kind) or
+ TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
+
+ cached
+ newtype TBooleanOption =
+ TBooleanNone() or
+ TBooleanSome(boolean b) { b = true or b = false }
+
+ cached
+ newtype TDataFlowCallOption =
+ TDataFlowCallNone() or
+ TDataFlowCallSome(DataFlowCall call)
+
+ cached
+ newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
+
+ cached
+ newtype TAccessPathFront =
+ TFrontNil(DataFlowType t) or
+ TFrontHead(TypedContent tc)
+
+ cached
+ newtype TAccessPathFrontOption =
+ TAccessPathFrontNone() or
+ TAccessPathFrontSome(AccessPathFront apf)
+}
+
+/**
+ * Holds if the call context `call` either improves virtual dispatch in
+ * `callable` or if it allows us to prune unreachable nodes in `callable`.
+ */
+predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
+ recordDataFlowCallSiteDispatch(call, callable) or
+ recordDataFlowCallSiteUnreachable(call, callable)
+}
+
+/**
+ * A `Node` at which a cast can occur such that the type should be checked.
+ */
+class CastingNode extends Node {
+ CastingNode() { castingNode(this) }
+}
+
+private predicate readStepWithTypes(
+ Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
+) {
+ read(n1, c, n2) and
+ container = getNodeDataFlowType(n1) and
+ content = getNodeDataFlowType(n2)
+}
+
+private newtype TReadStepTypesOption =
+ TReadStepTypesNone() or
+ TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) {
+ readStepWithTypes(_, container, c, _, content)
+ }
+
+private class ReadStepTypesOption extends TReadStepTypesOption {
+ predicate isSome() { this instanceof TReadStepTypesSome }
+
+ DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) }
+
+ Content getContent() { this = TReadStepTypesSome(_, result, _) }
+
+ DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) }
+
+ string toString() { if this.isSome() then result = "Some(..)" else result = "None()" }
+}
+
+/**
+ * A call context to restrict the targets of virtual dispatch, prune local flow,
+ * and match the call sites of flow into a method with flow out of a method.
+ *
+ * There are four cases:
+ * - `TAnyCallContext()` : No restrictions on method flow.
+ * - `TSpecificCall(DataFlowCall call)` : Flow entered through the
+ * given `call`. This call improves the set of viable
+ * dispatch targets for at least one method call in the current callable
+ * or helps prune unreachable nodes in the current callable.
+ * - `TSomeCall()` : Flow entered through a parameter. The
+ * originating call does not improve the set of dispatch targets for any
+ * method call in the current callable and was therefore not recorded.
+ * - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and
+ * this dispatch target of `call` implies a reduced set of dispatch origins
+ * to which data may flow if it should reach a `return` statement.
+ */
+abstract class CallContext extends TCallContext {
+ abstract string toString();
+
+ /** Holds if this call context is relevant for `callable`. */
+ abstract predicate relevantFor(DataFlowCallable callable);
+}
+
+abstract class CallContextNoCall extends CallContext { }
+
+class CallContextAny extends CallContextNoCall, TAnyCallContext {
+ override string toString() { result = "CcAny" }
+
+ override predicate relevantFor(DataFlowCallable callable) { any() }
+}
+
+abstract class CallContextCall extends CallContext {
+ /** Holds if this call context may be `call`. */
+ bindingset[call]
+ abstract predicate matchesCall(DataFlowCall call);
+}
+
+class CallContextSpecificCall extends CallContextCall, TSpecificCall {
+ override string toString() {
+ exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")")
+ }
+
+ override predicate relevantFor(DataFlowCallable callable) {
+ recordDataFlowCallSite(getCall(), callable)
+ }
+
+ override predicate matchesCall(DataFlowCall call) { call = this.getCall() }
+
+ DataFlowCall getCall() { this = TSpecificCall(result) }
+}
+
+class CallContextSomeCall extends CallContextCall, TSomeCall {
+ override string toString() { result = "CcSomeCall" }
+
+ override predicate relevantFor(DataFlowCallable callable) {
+ exists(ParamNode p | getNodeEnclosingCallable(p) = callable)
+ }
+
+ override predicate matchesCall(DataFlowCall call) { any() }
+}
+
+class CallContextReturn extends CallContextNoCall, TReturn {
+ override string toString() {
+ exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")")
+ }
+
+ override predicate relevantFor(DataFlowCallable callable) {
+ exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable))
+ }
+}
+
+/**
+ * A call context that is relevant for pruning local flow.
+ */
+abstract class LocalCallContext extends TLocalFlowCallContext {
+ abstract string toString();
+
+ /** Holds if this call context is relevant for `callable`. */
+ abstract predicate relevantFor(DataFlowCallable callable);
+}
+
+class LocalCallContextAny extends LocalCallContext, TAnyLocalCall {
+ override string toString() { result = "LocalCcAny" }
+
+ override predicate relevantFor(DataFlowCallable callable) { any() }
+}
+
+class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall {
+ LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) }
+
+ DataFlowCall call;
+
+ DataFlowCall getCall() { result = call }
+
+ override string toString() { result = "LocalCcCall(" + call + ")" }
+
+ override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) }
+}
+
+private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
+ exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call))
+}
+
+/**
+ * Gets the local call context given the call context and the callable that
+ * the contexts apply to.
+ */
+LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) {
+ ctx.relevantFor(callable) and
+ if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable)
+ then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall()
+ else result instanceof LocalCallContextAny
+}
+
+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+class ParamNode extends Node {
+ ParamNode() { parameterNode(this, _, _) }
+
+ /**
+ * Holds if this node is the parameter of callable `c` at the specified
+ * (zero-based) position.
+ */
+ predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
+}
+
+/** A data-flow node that represents a call argument. */
+class ArgNode extends Node {
+ ArgNode() { argumentNode(this, _, _) }
+
+ /** Holds if this argument occurs at the given position in the given call. */
+ final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
+}
+
+/**
+ * A node from which flow can return to the caller. This is either a regular
+ * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
+ */
+class ReturnNodeExt extends Node {
+ ReturnNodeExt() { returnNodeExt(this, _) }
+
+ /** Gets the kind of this returned value. */
+ ReturnKindExt getKind() { returnNodeExt(this, result) }
+}
+
+/**
+ * A node to which data can flow from a call. Either an ordinary out node
+ * or a post-update node associated with a call argument.
+ */
+class OutNodeExt extends Node {
+ OutNodeExt() { outNodeExt(this) }
+}
+
+/**
+ * An extended return kind. A return kind describes how data can be returned
+ * from a callable. This can either be through a returned value or an updated
+ * parameter.
+ */
+abstract class ReturnKindExt extends TReturnKindExt {
+ /** Gets a textual representation of this return kind. */
+ abstract string toString();
+
+ /** Gets a node corresponding to data flow out of `call`. */
+ final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) }
+}
+
+class ValueReturnKind extends ReturnKindExt, TValueReturn {
+ private ReturnKind kind;
+
+ ValueReturnKind() { this = TValueReturn(kind) }
+
+ ReturnKind getKind() { result = kind }
+
+ override string toString() { result = kind.toString() }
+}
+
+class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
+ private int pos;
+
+ ParamUpdateReturnKind() { this = TParamUpdate(pos) }
+
+ int getPosition() { result = pos }
+
+ override string toString() { result = "param update " + pos }
+}
+
+/** A callable tagged with a relevant return kind. */
+class ReturnPosition extends TReturnPosition0 {
+ private DataFlowCallable c;
+ private ReturnKindExt kind;
+
+ ReturnPosition() { this = TReturnPosition0(c, kind) }
+
+ /** Gets the callable. */
+ DataFlowCallable getCallable() { result = c }
+
+ /** Gets the return kind. */
+ ReturnKindExt getKind() { result = kind }
+
+ /** Gets a textual representation of this return position. */
+ string toString() { result = "[" + kind + "] " + c }
+}
+
+/**
+ * Gets the enclosing callable of `n`. Unlike `n.getEnclosingCallable()`, this
+ * predicate ensures that joins go from `n` to the result instead of the other
+ * way around.
+ */
+pragma[inline]
+DataFlowCallable getNodeEnclosingCallable(Node n) {
+ nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result))
+}
+
+/** Gets the type of `n` used for type pruning. */
+pragma[inline]
+DataFlowType getNodeDataFlowType(Node n) {
+ nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result))
+}
+
+pragma[noinline]
+private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) {
+ result = getNodeEnclosingCallable(ret)
+}
+
+pragma[noinline]
+private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) {
+ result.getCallable() = returnNodeGetEnclosingCallable(ret) and
+ kind = result.getKind()
+}
+
+pragma[noinline]
+ReturnPosition getReturnPosition(ReturnNodeExt ret) {
+ result = getReturnPosition0(ret, ret.getKind())
+}
+
+/**
+ * Checks whether `inner` can return to `call` in the call context `innercc`.
+ * Assumes a context of `inner = viableCallableExt(call)`.
+ */
+bindingset[innercc, inner, call]
+predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
+ innercc instanceof CallContextAny
+ or
+ exists(DataFlowCallable c0, DataFlowCall call0 |
+ callEnclosingCallable(call0, inner) and
+ innercc = TReturn(c0, call0) and
+ c0 = prunedViableImplInCallContextReverse(call0, call)
+ )
+}
+
+/**
+ * Checks whether `call` can resolve to `calltarget` in the call context `cc`.
+ * Assumes a context of `calltarget = viableCallableExt(call)`.
+ */
+bindingset[cc, call, calltarget]
+predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
+ exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
+ if reducedViableImplInCallContext(call, _, ctx)
+ then calltarget = prunedViableImplInCallContext(call, ctx)
+ else any()
+ )
+ or
+ cc instanceof CallContextSomeCall
+ or
+ cc instanceof CallContextAny
+ or
+ cc instanceof CallContextReturn
+}
+
+/**
+ * Resolves a return from `callable` in `cc` to `call`. This is equivalent to
+ * `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
+ */
+bindingset[cc, callable]
+predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
+ cc instanceof CallContextAny and callable = viableCallableExt(call)
+ or
+ exists(DataFlowCallable c0, DataFlowCall call0 |
+ callEnclosingCallable(call0, callable) and
+ cc = TReturn(c0, call0) and
+ c0 = prunedViableImplInCallContextReverse(call0, call)
+ )
+}
+
+/**
+ * Resolves a call from `call` in `cc` to `result`. This is equivalent to
+ * `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
+ */
+bindingset[call, cc]
+DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
+ exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
+ if reducedViableImplInCallContext(call, _, ctx)
+ then result = prunedViableImplInCallContext(call, ctx)
+ else result = viableCallableExt(call)
+ )
+ or
+ result = viableCallableExt(call) and cc instanceof CallContextSomeCall
+ or
+ result = viableCallableExt(call) and cc instanceof CallContextAny
+ or
+ result = viableCallableExt(call) and cc instanceof CallContextReturn
+}
+
+/** An optional Boolean value. */
+class BooleanOption extends TBooleanOption {
+ string toString() {
+ this = TBooleanNone() and result = ""
+ or
+ this = TBooleanSome(any(boolean b | result = b.toString()))
+ }
+}
+
+/** An optional `DataFlowCall`. */
+class DataFlowCallOption extends TDataFlowCallOption {
+ string toString() {
+ this = TDataFlowCallNone() and
+ result = "(none)"
+ or
+ exists(DataFlowCall call |
+ this = TDataFlowCallSome(call) and
+ result = call.toString()
+ )
+ }
+}
+
+/** Content tagged with the type of a containing object. */
+class TypedContent extends MkTypedContent {
+ private Content c;
+ private DataFlowType t;
+
+ TypedContent() { this = MkTypedContent(c, t) }
+
+ /** Gets the content. */
+ Content getContent() { result = c }
+
+ /** Gets the container type. */
+ DataFlowType getContainerType() { result = t }
+
+ /** Gets a textual representation of this content. */
+ string toString() { result = c.toString() }
+
+ /**
+ * Holds if access paths with this `TypedContent` at their head always should
+ * be tracked at high precision. This disables adaptive access path precision
+ * for such access paths.
+ */
+ predicate forceHighPrecision() { forceHighPrecision(c) }
+}
+
+/**
+ * The front of an access path. This is either a head or a nil.
+ */
+abstract class AccessPathFront extends TAccessPathFront {
+ abstract string toString();
+
+ abstract DataFlowType getType();
+
+ abstract boolean toBoolNonEmpty();
+
+ TypedContent getHead() { this = TFrontHead(result) }
+
+ predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
+}
+
+class AccessPathFrontNil extends AccessPathFront, TFrontNil {
+ private DataFlowType t;
+
+ AccessPathFrontNil() { this = TFrontNil(t) }
+
+ override string toString() { result = ppReprType(t) }
+
+ override DataFlowType getType() { result = t }
+
+ override boolean toBoolNonEmpty() { result = false }
+}
+
+class AccessPathFrontHead extends AccessPathFront, TFrontHead {
+ private TypedContent tc;
+
+ AccessPathFrontHead() { this = TFrontHead(tc) }
+
+ override string toString() { result = tc.toString() }
+
+ override DataFlowType getType() { result = tc.getContainerType() }
+
+ override boolean toBoolNonEmpty() { result = true }
+}
+
+/** An optional access path front. */
+class AccessPathFrontOption extends TAccessPathFrontOption {
+ string toString() {
+ this = TAccessPathFrontNone() and result = ""
+ or
+ this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString()))
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
new file mode 100644
index 00000000000..a55e65a81f6
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplConsistency.qll
@@ -0,0 +1,181 @@
+/**
+ * Provides consistency queries for checking invariants in the language-specific
+ * data-flow classes and predicates.
+ */
+
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+private import tainttracking1.TaintTrackingParameter::Private
+private import tainttracking1.TaintTrackingParameter::Public
+
+module Consistency {
+ private class RelevantNode extends Node {
+ RelevantNode() {
+ this instanceof ArgumentNode or
+ this instanceof ParameterNode or
+ this instanceof ReturnNode or
+ this = getAnOutNode(_, _) or
+ simpleLocalFlowStep(this, _) or
+ simpleLocalFlowStep(_, this) or
+ jumpStep(this, _) or
+ jumpStep(_, this) or
+ storeStep(this, _, _) or
+ storeStep(_, _, this) or
+ readStep(this, _, _) or
+ readStep(_, _, this) or
+ defaultAdditionalTaintStep(this, _) or
+ defaultAdditionalTaintStep(_, this)
+ }
+ }
+
+ query predicate uniqueEnclosingCallable(Node n, string msg) {
+ exists(int c |
+ n instanceof RelevantNode and
+ c = count(n.getEnclosingCallable()) and
+ c != 1 and
+ msg = "Node should have one enclosing callable but has " + c + "."
+ )
+ }
+
+ query predicate uniqueType(Node n, string msg) {
+ exists(int c |
+ n instanceof RelevantNode and
+ c = count(getNodeType(n)) and
+ c != 1 and
+ msg = "Node should have one type but has " + c + "."
+ )
+ }
+
+ query predicate uniqueNodeLocation(Node n, string msg) {
+ exists(int c |
+ c =
+ count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+ n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ ) and
+ c != 1 and
+ msg = "Node should have one location but has " + c + "."
+ )
+ }
+
+ query predicate missingLocation(string msg) {
+ exists(int c |
+ c =
+ strictcount(Node n |
+ not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+ n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ )
+ ) and
+ msg = "Nodes without location: " + c
+ )
+ }
+
+ query predicate uniqueNodeToString(Node n, string msg) {
+ exists(int c |
+ c = count(n.toString()) and
+ c != 1 and
+ msg = "Node should have one toString but has " + c + "."
+ )
+ }
+
+ query predicate missingToString(string msg) {
+ exists(int c |
+ c = strictcount(Node n | not exists(n.toString())) and
+ msg = "Nodes without toString: " + c
+ )
+ }
+
+ query predicate parameterCallable(ParameterNode p, string msg) {
+ exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
+ msg = "Callable mismatch for parameter."
+ }
+
+ query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
+ simpleLocalFlowStep(n1, n2) and
+ n1.getEnclosingCallable() != n2.getEnclosingCallable() and
+ msg = "Local flow step does not preserve enclosing callable."
+ }
+
+ private DataFlowType typeRepr() { result = getNodeType(_) }
+
+ query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
+ t = typeRepr() and
+ not compatibleTypes(t, t) and
+ msg = "Type compatibility predicate is not reflexive."
+ }
+
+ query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
+ isUnreachableInCall(n, call) and
+ exists(DataFlowCallable c |
+ c = n.getEnclosingCallable() and
+ not viableCallable(call) = c
+ ) and
+ msg = "Call context for isUnreachableInCall is inconsistent with call graph."
+ }
+
+ query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
+ (
+ n = getAnOutNode(call, _) and
+ msg = "OutNode and call does not share enclosing callable."
+ or
+ n.(ArgumentNode).argumentOf(call, _) and
+ msg = "ArgumentNode and call does not share enclosing callable."
+ ) and
+ n.getEnclosingCallable() != call.getEnclosingCallable()
+ }
+
+ // This predicate helps the compiler forget that in some languages
+ // it is impossible for a result of `getPreUpdateNode` to be an
+ // instance of `PostUpdateNode`.
+ private Node getPre(PostUpdateNode n) {
+ result = n.getPreUpdateNode()
+ or
+ none()
+ }
+
+ query predicate postIsNotPre(PostUpdateNode n, string msg) {
+ getPre(n) = n and
+ msg = "PostUpdateNode should not equal its pre-update node."
+ }
+
+ query predicate postHasUniquePre(PostUpdateNode n, string msg) {
+ exists(int c |
+ c = count(n.getPreUpdateNode()) and
+ c != 1 and
+ msg = "PostUpdateNode should have one pre-update node but has " + c + "."
+ )
+ }
+
+ query predicate uniquePostUpdate(Node n, string msg) {
+ 1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
+ msg = "Node has multiple PostUpdateNodes."
+ }
+
+ query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
+ n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
+ msg = "PostUpdateNode does not share callable with its pre-update node."
+ }
+
+ private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
+
+ query predicate reverseRead(Node n, string msg) {
+ exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
+ msg = "Origin of readStep is missing a PostUpdateNode."
+ }
+
+ query predicate argHasPostUpdate(ArgumentNode n, string msg) {
+ not hasPost(n) and
+ not isImmutableOrUnobservable(n) and
+ msg = "ArgumentNode is missing PostUpdateNode."
+ }
+
+ // This predicate helps the compiler forget that in some languages
+ // it is impossible for a `PostUpdateNode` to be the target of
+ // `simpleLocalFlowStep`.
+ private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
+
+ query predicate postWithInFlow(Node n, string msg) {
+ isPostUpdateNode(n) and
+ simpleLocalFlowStep(_, n) and
+ msg = "PostUpdateNode should not be the target of local flow."
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplSpecific.qll
new file mode 100644
index 00000000000..e78a0814a14
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplSpecific.qll
@@ -0,0 +1,11 @@
+/**
+ * Provides Ruby-specific definitions for use in the data flow library.
+ */
+module Private {
+ import DataFlowPrivate
+ import DataFlowDispatch
+}
+
+module Public {
+ import DataFlowPublic
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll
new file mode 100644
index 00000000000..5850939fb50
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll
@@ -0,0 +1,799 @@
+private import ruby
+private import codeql.ruby.CFG
+private import codeql.ruby.dataflow.SSA
+private import DataFlowPublic
+private import DataFlowDispatch
+private import SsaImpl as SsaImpl
+private import FlowSummaryImpl as FlowSummaryImpl
+
+abstract class NodeImpl extends Node {
+ /** Do not call: use `getEnclosingCallable()` instead. */
+ abstract CfgScope getCfgScope();
+
+ /** Do not call: use `getLocation()` instead. */
+ abstract Location getLocationImpl();
+
+ /** Do not call: use `toString()` instead. */
+ abstract string toStringImpl();
+}
+
+private class ExprNodeImpl extends ExprNode, NodeImpl {
+ override CfgScope getCfgScope() { result = this.getExprNode().getExpr().getCfgScope() }
+
+ override Location getLocationImpl() { result = this.getExprNode().getLocation() }
+
+ override string toStringImpl() { result = this.getExprNode().toString() }
+}
+
+/** Provides predicates related to local data flow. */
+module LocalFlow {
+ private import codeql.ruby.dataflow.internal.SsaImpl
+
+ /**
+ * Holds if `nodeFrom` is a last node referencing SSA definition `def`, which
+ * can reach `next`.
+ */
+ private predicate localFlowSsaInput(Node nodeFrom, Ssa::Definition def, Ssa::Definition next) {
+ exists(BasicBlock bb, int i | lastRefBeforeRedef(def, bb, i, next) |
+ def = nodeFrom.(SsaDefinitionNode).getDefinition() and
+ def.definesAt(_, bb, i)
+ or
+ exists(CfgNodes::ExprCfgNode e |
+ e = nodeFrom.asExpr() and
+ e = bb.getNode(i) and
+ e.getExpr() instanceof VariableReadAccess
+ )
+ )
+ }
+
+ /** Gets the SSA definition node corresponding to parameter `p`. */
+ SsaDefinitionNode getParameterDefNode(NamedParameter p) {
+ exists(BasicBlock bb, int i |
+ bb.getNode(i).getNode() = p.getDefiningAccess() and
+ result.getDefinition().definesAt(_, bb, i)
+ )
+ }
+
+ /**
+ * Holds if there is a local flow step from `nodeFrom` to `nodeTo` involving
+ * SSA definition `def`.
+ */
+ predicate localSsaFlowStep(Ssa::Definition def, Node nodeFrom, Node nodeTo) {
+ // Flow from assignment into SSA definition
+ def.(Ssa::WriteDefinition).assigns(nodeFrom.asExpr()) and
+ nodeTo.(SsaDefinitionNode).getDefinition() = def
+ or
+ // Flow from SSA definition to first read
+ def = nodeFrom.(SsaDefinitionNode).getDefinition() and
+ nodeTo.asExpr() = def.getAFirstRead()
+ or
+ // Flow from read to next read
+ exists(
+ CfgNodes::ExprNodes::VariableReadAccessCfgNode read1,
+ CfgNodes::ExprNodes::VariableReadAccessCfgNode read2
+ |
+ def.hasAdjacentReads(read1, read2) and
+ nodeTo.asExpr() = read2
+ |
+ nodeFrom.asExpr() = read1
+ or
+ read1 = nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr()
+ )
+ or
+ // Flow into phi node
+ exists(Ssa::PhiNode phi |
+ localFlowSsaInput(nodeFrom, def, phi) and
+ phi = nodeTo.(SsaDefinitionNode).getDefinition() and
+ def = phi.getAnInput()
+ )
+ // TODO
+ // or
+ // // Flow into uncertain SSA definition
+ // exists(LocalFlow::UncertainExplicitSsaDefinition uncertain |
+ // localFlowSsaInput(nodeFrom, def, uncertain) and
+ // uncertain = nodeTo.(SsaDefinitionNode).getDefinition() and
+ // def = uncertain.getPriorDefinition()
+ // )
+ }
+}
+
+/** An argument of a call (including qualifier arguments). */
+private class Argument extends Expr {
+ private Call call;
+ private int arg;
+
+ Argument() { this = call.getArgument(arg) }
+
+ /** Holds if this expression is the `i`th argument of `c`. */
+ predicate isArgumentOf(Expr c, int i) { c = call and i = arg }
+}
+
+/** A collection of cached types and predicates to be evaluated in the same stage. */
+cached
+private module Cached {
+ cached
+ newtype TNode =
+ TExprNode(CfgNodes::ExprCfgNode n) or
+ TReturningNode(CfgNodes::ReturningCfgNode n) or
+ TSynthReturnNode(CfgScope scope, ReturnKind kind) {
+ exists(ReturningNode ret |
+ ret.(NodeImpl).getCfgScope() = scope and
+ ret.getKind() = kind
+ )
+ } or
+ TSsaDefinitionNode(Ssa::Definition def) or
+ TNormalParameterNode(Parameter p) { not p instanceof BlockParameter } or
+ TSelfParameterNode(MethodBase m) or
+ TBlockParameterNode(MethodBase m) or
+ TExprPostUpdateNode(CfgNodes::ExprCfgNode n) {
+ exists(AstNode node | node = n.getNode() |
+ node instanceof Argument and
+ not node instanceof BlockArgument
+ or
+ n = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver()
+ )
+ } or
+ TSummaryNode(
+ FlowSummaryImpl::Public::SummarizedCallable c,
+ FlowSummaryImpl::Private::SummaryNodeState state
+ ) {
+ FlowSummaryImpl::Private::summaryNodeRange(c, state)
+ } or
+ TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, int i) {
+ FlowSummaryImpl::Private::summaryParameterNodeRange(c, i)
+ }
+
+ class TParameterNode =
+ TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode;
+
+ private predicate defaultValueFlow(NamedParameter p, ExprNode e) {
+ p.(OptionalParameter).getDefaultValue() = e.getExprNode().getExpr()
+ or
+ p.(KeywordParameter).getDefaultValue() = e.getExprNode().getExpr()
+ }
+
+ private predicate localFlowStepCommon(Node nodeFrom, Node nodeTo) {
+ LocalFlow::localSsaFlowStep(_, nodeFrom, nodeTo)
+ or
+ nodeFrom.(SelfParameterNode).getMethod() = nodeTo.asExpr().getExpr().getEnclosingCallable() and
+ nodeTo.asExpr().getExpr() instanceof Self
+ or
+ nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs()
+ or
+ nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::BlockArgumentCfgNode).getValue()
+ or
+ nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::StmtSequenceCfgNode).getLastStmt()
+ or
+ nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ConditionalExprCfgNode).getBranch(_)
+ or
+ nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::CaseExprCfgNode).getBranch(_)
+ or
+ exists(CfgNodes::ExprCfgNode exprTo, ReturningStatementNode n |
+ nodeFrom = n and
+ exprTo = nodeTo.asExpr() and
+ n.getReturningNode().getNode() instanceof BreakStmt and
+ exprTo.getNode() instanceof Loop and
+ nodeTo.asExpr().getAPredecessor(any(SuccessorTypes::BreakSuccessor s)) = n.getReturningNode()
+ )
+ or
+ nodeFrom.asExpr() = nodeTo.(ReturningStatementNode).getReturningNode().getReturnedValueNode()
+ or
+ nodeTo.asExpr() =
+ any(CfgNodes::ExprNodes::ForExprCfgNode for |
+ exists(SuccessorType s |
+ not s instanceof SuccessorTypes::BreakSuccessor and
+ exists(for.getAPredecessor(s))
+ ) and
+ nodeFrom.asExpr() = for.getValue()
+ )
+ }
+
+ /**
+ * This is the local flow predicate that is used as a building block in global
+ * data flow.
+ */
+ cached
+ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
+ localFlowStepCommon(nodeFrom, nodeTo)
+ or
+ defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
+ or
+ nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
+ or
+ nodeTo.(SynthReturnNode).getAnInput() = nodeFrom
+ or
+ FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true)
+ }
+
+ /** This is the local flow predicate that is exposed. */
+ cached
+ predicate localFlowStepImpl(Node nodeFrom, Node nodeTo) {
+ localFlowStepCommon(nodeFrom, nodeTo)
+ or
+ defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
+ or
+ nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
+ or
+ // Simple flow through library code is included in the exposed local
+ // step relation, even though flow is technically inter-procedural
+ FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
+ }
+
+ /** This is the local flow predicate that is used in type tracking. */
+ cached
+ predicate localFlowStepTypeTracker(Node nodeFrom, Node nodeTo) {
+ localFlowStepCommon(nodeFrom, nodeTo)
+ or
+ exists(NamedParameter p |
+ defaultValueFlow(p, nodeFrom) and
+ nodeTo = LocalFlow::getParameterDefNode(p)
+ )
+ }
+
+ cached
+ predicate isLocalSourceNode(Node n) {
+ n instanceof ParameterNode
+ or
+ // This case should not be needed once we have proper use-use flow
+ // for `self`. At that point, the `self`s returned by `trackInstance`
+ // in `DataFlowDispatch.qll` should refer to the post-update node,
+ // and we can remove this case.
+ n instanceof SelfArgumentNode
+ or
+ not localFlowStepTypeTracker+(any(Node e |
+ e instanceof ExprNode
+ or
+ e instanceof ParameterNode
+ ), n)
+ }
+
+ cached
+ newtype TContent = TTodoContent() // stub
+}
+
+import Cached
+
+/** Holds if `n` should be hidden from path explanations. */
+predicate nodeIsHidden(Node n) {
+ exists(Ssa::Definition def | def = n.(SsaDefinitionNode).getDefinition() |
+ def instanceof Ssa::PhiNode
+ )
+ or
+ n instanceof SummaryNode
+ or
+ n instanceof SummaryParameterNode
+ or
+ n instanceof SynthReturnNode
+}
+
+/** An SSA definition, viewed as a node in a data flow graph. */
+class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode {
+ Ssa::Definition def;
+
+ SsaDefinitionNode() { this = TSsaDefinitionNode(def) }
+
+ /** Gets the underlying SSA definition. */
+ Ssa::Definition getDefinition() { result = def }
+
+ override CfgScope getCfgScope() { result = def.getBasicBlock().getScope() }
+
+ override Location getLocationImpl() { result = def.getLocation() }
+
+ override string toStringImpl() { result = def.toString() }
+}
+
+/**
+ * A value returning statement, viewed as a node in a data flow graph.
+ *
+ * Note that because of control-flow splitting, one `ReturningStmt` may correspond
+ * to multiple `ReturningStatementNode`s, just like it may correspond to multiple
+ * `ControlFlow::Node`s.
+ */
+class ReturningStatementNode extends NodeImpl, TReturningNode {
+ CfgNodes::ReturningCfgNode n;
+
+ ReturningStatementNode() { this = TReturningNode(n) }
+
+ /** Gets the expression corresponding to this node. */
+ CfgNodes::ReturningCfgNode getReturningNode() { result = n }
+
+ override CfgScope getCfgScope() { result = n.getScope() }
+
+ override Location getLocationImpl() { result = n.getLocation() }
+
+ override string toStringImpl() { result = n.toString() }
+}
+
+private module ParameterNodes {
+ abstract class ParameterNodeImpl extends ParameterNode, NodeImpl {
+ abstract predicate isSourceParameterOf(Callable c, int i);
+
+ override predicate isParameterOf(DataFlowCallable c, int i) {
+ this.isSourceParameterOf(c.asCallable(), i)
+ }
+ }
+
+ /**
+ * The value of a normal parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+ class NormalParameterNode extends ParameterNodeImpl, TNormalParameterNode {
+ private Parameter parameter;
+
+ NormalParameterNode() { this = TNormalParameterNode(parameter) }
+
+ override Parameter getParameter() { result = parameter }
+
+ override predicate isSourceParameterOf(Callable c, int i) { c.getParameter(i) = parameter }
+
+ override CfgScope getCfgScope() { result = parameter.getCallable() }
+
+ override Location getLocationImpl() { result = parameter.getLocation() }
+
+ override string toStringImpl() { result = parameter.toString() }
+ }
+
+ /**
+ * The value of the `self` parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+ class SelfParameterNode extends ParameterNodeImpl, TSelfParameterNode {
+ private MethodBase method;
+
+ SelfParameterNode() { this = TSelfParameterNode(method) }
+
+ final MethodBase getMethod() { result = method }
+
+ override predicate isSourceParameterOf(Callable c, int i) { method = c and i = -1 }
+
+ override CfgScope getCfgScope() { result = method }
+
+ override Location getLocationImpl() { result = method.getLocation() }
+
+ override string toStringImpl() { result = "self in " + method.toString() }
+ }
+
+ /**
+ * The value of a block parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+ class BlockParameterNode extends ParameterNodeImpl, TBlockParameterNode {
+ private MethodBase method;
+
+ BlockParameterNode() { this = TBlockParameterNode(method) }
+
+ final MethodBase getMethod() { result = method }
+
+ override Parameter getParameter() {
+ result = method.getAParameter() and result instanceof BlockParameter
+ }
+
+ override predicate isSourceParameterOf(Callable c, int i) { c = method and i = -2 }
+
+ override CfgScope getCfgScope() { result = method }
+
+ override Location getLocationImpl() {
+ result = getParameter().getLocation()
+ or
+ not exists(getParameter()) and result = method.getLocation()
+ }
+
+ override string toStringImpl() {
+ result = getParameter().toString()
+ or
+ not exists(getParameter()) and result = "&block"
+ }
+ }
+
+ /** A parameter for a library callable with a flow summary. */
+ class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
+ private FlowSummaryImpl::Public::SummarizedCallable sc;
+ private int pos;
+
+ SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
+
+ override predicate isSourceParameterOf(Callable c, int i) { none() }
+
+ override predicate isParameterOf(DataFlowCallable c, int i) { sc = c and i = pos }
+
+ override CfgScope getCfgScope() { none() }
+
+ override DataFlowCallable getEnclosingCallable() { result = sc }
+
+ override Location getLocationImpl() { none() }
+
+ override string toStringImpl() { result = "parameter " + pos + " of " + sc }
+ }
+}
+
+import ParameterNodes
+
+/** A data-flow node used to model flow summaries. */
+private class SummaryNode extends NodeImpl, TSummaryNode {
+ private FlowSummaryImpl::Public::SummarizedCallable c;
+ private FlowSummaryImpl::Private::SummaryNodeState state;
+
+ SummaryNode() { this = TSummaryNode(c, state) }
+
+ override CfgScope getCfgScope() { none() }
+
+ override DataFlowCallable getEnclosingCallable() { result = c }
+
+ override Location getLocationImpl() { none() }
+
+ override string toStringImpl() { result = "[summary] " + state + " in " + c }
+}
+
+/** A data-flow node that represents a call argument. */
+abstract class ArgumentNode extends Node {
+ /** Holds if this argument occurs at the given position in the given call. */
+ predicate argumentOf(DataFlowCall call, int pos) { this.sourceArgumentOf(call.asCall(), pos) }
+
+ abstract predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos);
+
+ /** Gets the call in which this node is an argument. */
+ final DataFlowCall getCall() { this.argumentOf(result, _) }
+}
+
+private module ArgumentNodes {
+ /** A data-flow node that represents an explicit call argument. */
+ class ExplicitArgumentNode extends ArgumentNode {
+ ExplicitArgumentNode() {
+ this.asExpr().getExpr() instanceof Argument and
+ not this.asExpr().getExpr() instanceof BlockArgument
+ }
+
+ override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
+ this.asExpr() = call.getArgument(pos)
+ }
+ }
+
+ /** A data-flow node that represents the `self` argument of a call. */
+ class SelfArgumentNode extends ArgumentNode {
+ SelfArgumentNode() { this.asExpr() = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver() }
+
+ override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
+ this.asExpr() = call.getReceiver() and
+ pos = -1
+ }
+ }
+
+ /** A data-flow node that represents a block argument. */
+ class BlockArgumentNode extends ArgumentNode {
+ BlockArgumentNode() {
+ this.asExpr().getExpr() instanceof BlockArgument or
+ exists(CfgNodes::ExprNodes::CallCfgNode c | c.getBlock() = this.asExpr())
+ }
+
+ override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
+ pos = -2 and
+ (
+ this.asExpr() = call.getBlock()
+ or
+ exists(CfgNodes::ExprCfgNode arg, int n |
+ arg = call.getArgument(n) and
+ this.asExpr() = arg and
+ arg.getExpr() instanceof BlockArgument
+ )
+ )
+ }
+ }
+
+ private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
+ SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
+
+ override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) { none() }
+
+ override predicate argumentOf(DataFlowCall call, int pos) {
+ FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
+ }
+ }
+}
+
+import ArgumentNodes
+
+/** A data-flow node that represents a value syntactically returned by a callable. */
+abstract class ReturningNode extends Node {
+ /** Gets the kind of this return node. */
+ abstract ReturnKind getKind();
+}
+
+/** A data-flow node that represents a value returned by a callable. */
+abstract class ReturnNode extends Node {
+ /** Gets the kind of this return node. */
+ abstract ReturnKind getKind();
+}
+
+private module ReturnNodes {
+ private predicate isValid(CfgNodes::ReturningCfgNode node) {
+ exists(ReturningStmt stmt, Callable scope |
+ stmt = node.getNode() and
+ scope = node.getScope()
+ |
+ stmt instanceof ReturnStmt and
+ (scope instanceof Method or scope instanceof SingletonMethod or scope instanceof Lambda)
+ or
+ stmt instanceof NextStmt and
+ (scope instanceof Block or scope instanceof Lambda)
+ or
+ stmt instanceof BreakStmt and
+ (scope instanceof Block or scope instanceof Lambda)
+ )
+ }
+
+ /**
+ * A data-flow node that represents an expression returned by a callable,
+ * either using an explict `return` statement or as the expression of a method body.
+ */
+ class ExplicitReturnNode extends ReturningNode, ReturningStatementNode {
+ ExplicitReturnNode() {
+ isValid(n) and
+ n.getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
+ n.getScope() instanceof Callable
+ }
+
+ override ReturnKind getKind() {
+ if n.getNode() instanceof BreakStmt
+ then result instanceof BreakReturnKind
+ else result instanceof NormalReturnKind
+ }
+ }
+
+ class ExprReturnNode extends ReturningNode, ExprNode {
+ ExprReturnNode() {
+ this.getExprNode().getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
+ this.(NodeImpl).getCfgScope() instanceof Callable
+ }
+
+ override ReturnKind getKind() { result instanceof NormalReturnKind }
+ }
+
+ /**
+ * A synthetic data-flow node for joining flow from different syntactic
+ * returns into a single node.
+ *
+ * This node only exists to avoid computing the product of a large fan-in
+ * with a large fan-out.
+ */
+ class SynthReturnNode extends NodeImpl, ReturnNode, TSynthReturnNode {
+ private CfgScope scope;
+ private ReturnKind kind;
+
+ SynthReturnNode() { this = TSynthReturnNode(scope, kind) }
+
+ /** Gets a syntactic return node that flows into this synthetic node. */
+ ReturningNode getAnInput() {
+ result.(NodeImpl).getCfgScope() = scope and
+ result.getKind() = kind
+ }
+
+ override ReturnKind getKind() { result = kind }
+
+ override CfgScope getCfgScope() { result = scope }
+
+ override Location getLocationImpl() { result = scope.getLocation() }
+
+ override string toStringImpl() { result = "return " + kind + " in " + scope }
+ }
+
+ private class SummaryReturnNode extends SummaryNode, ReturnNode {
+ private ReturnKind rk;
+
+ SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
+
+ override ReturnKind getKind() { result = rk }
+ }
+}
+
+import ReturnNodes
+
+/** A data-flow node that represents the output of a call. */
+abstract class OutNode extends Node {
+ /** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
+ abstract DataFlowCall getCall(ReturnKind kind);
+}
+
+private module OutNodes {
+ /**
+ * A data-flow node that reads a value returned directly by a callable,
+ * either via a call or a `yield` of a block.
+ */
+ class ExprOutNode extends OutNode, ExprNode {
+ private DataFlowCall call;
+
+ ExprOutNode() { call.asCall() = this.getExprNode() }
+
+ override DataFlowCall getCall(ReturnKind kind) {
+ result = call and
+ kind instanceof NormalReturnKind
+ }
+ }
+
+ private class SummaryOutNode extends SummaryNode, OutNode {
+ SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
+
+ override DataFlowCall getCall(ReturnKind kind) {
+ FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
+ }
+ }
+}
+
+import OutNodes
+
+predicate jumpStep(Node pred, Node succ) {
+ SsaImpl::captureFlowIn(pred.(SsaDefinitionNode).getDefinition(),
+ succ.(SsaDefinitionNode).getDefinition())
+ or
+ SsaImpl::captureFlowOut(pred.(SsaDefinitionNode).getDefinition(),
+ succ.(SsaDefinitionNode).getDefinition())
+ or
+ exists(Self s, Method m |
+ s = succ.asExpr().getExpr() and
+ pred.(SelfParameterNode).getMethod() = m and
+ m = s.getEnclosingMethod() and
+ m != s.getEnclosingCallable()
+ )
+ or
+ succ.asExpr().getExpr().(ConstantReadAccess).getValue() = pred.asExpr().getExpr()
+}
+
+predicate storeStep(Node node1, Content c, Node node2) {
+ FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2)
+}
+
+predicate readStep(Node node1, Content c, Node node2) {
+ FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c, node2)
+}
+
+/**
+ * Holds if values stored inside content `c` are cleared at node `n`. For example,
+ * any value stored inside `f` is cleared at the pre-update node associated with `x`
+ * in `x.f = newValue`.
+ */
+predicate clearsContent(Node n, Content c) {
+ storeStep(_, c, n)
+ or
+ FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
+}
+
+private newtype TDataFlowType = TTodoDataFlowType()
+
+class DataFlowType extends TDataFlowType {
+ string toString() { result = "" }
+}
+
+/** Gets the type of `n` used for type pruning. */
+DataFlowType getNodeType(NodeImpl n) { any() }
+
+/** Gets a string representation of a `DataFlowType`. */
+string ppReprType(DataFlowType t) { result = t.toString() }
+
+/**
+ * Holds if `t1` and `t2` are compatible, that is, whether data can flow from
+ * a node of type `t1` to a node of type `t2`.
+ */
+pragma[inline]
+predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
+
+/**
+ * A node associated with an object after an operation that might have
+ * changed its state.
+ *
+ * This can be either the argument to a callable after the callable returns
+ * (which might have mutated the argument), or the qualifier of a field after
+ * an update to the field.
+ *
+ * Nodes corresponding to AST elements, for example `ExprNode`, usually refer
+ * to the value before the update.
+ */
+abstract class PostUpdateNode extends Node {
+ /** Gets the node before the state update. */
+ abstract Node getPreUpdateNode();
+}
+
+private module PostUpdateNodes {
+ class ExprPostUpdateNode extends PostUpdateNode, NodeImpl, TExprPostUpdateNode {
+ private CfgNodes::ExprCfgNode e;
+
+ ExprPostUpdateNode() { this = TExprPostUpdateNode(e) }
+
+ override ExprNode getPreUpdateNode() { e = result.getExprNode() }
+
+ override CfgScope getCfgScope() { result = e.getExpr().getCfgScope() }
+
+ override Location getLocationImpl() { result = e.getLocation() }
+
+ override string toStringImpl() { result = "[post] " + e.toString() }
+ }
+
+ private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
+ private Node pre;
+
+ SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
+
+ override Node getPreUpdateNode() { result = pre }
+ }
+}
+
+private import PostUpdateNodes
+
+/** A node that performs a type cast. */
+class CastNode extends Node {
+ CastNode() { this instanceof ReturningNode }
+}
+
+class DataFlowExpr = CfgNodes::ExprCfgNode;
+
+int accessPathLimit() { result = 5 }
+
+/**
+ * Holds if access paths with `c` at their head always should be tracked at high
+ * precision. This disables adaptive access path precision for such access paths.
+ */
+predicate forceHighPrecision(Content c) { none() }
+
+/** The unit type. */
+private newtype TUnit = TMkUnit()
+
+/** The trivial type with a single element. */
+class Unit extends TUnit {
+ /** Gets a textual representation of this element. */
+ string toString() { result = "unit" }
+}
+
+/**
+ * Holds if `n` does not require a `PostUpdateNode` as it either cannot be
+ * modified or its modification cannot be observed, for example if it is a
+ * freshly created object that is not saved in a variable.
+ *
+ * This predicate is only used for consistency checks.
+ */
+predicate isImmutableOrUnobservable(Node n) { n instanceof BlockArgumentNode }
+
+/**
+ * Holds if the node `n` is unreachable when the call context is `call`.
+ */
+predicate isUnreachableInCall(Node n, DataFlowCall call) { none() }
+
+newtype LambdaCallKind =
+ TYieldCallKind() or
+ TLambdaCallKind()
+
+/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
+predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
+ kind = TYieldCallKind() and
+ creation.asExpr().getExpr() = c.asCallable().(Block)
+ or
+ kind = TLambdaCallKind() and
+ (
+ creation.asExpr().getExpr() = c.asCallable().(Lambda)
+ or
+ creation.asExpr() =
+ any(CfgNodes::ExprNodes::MethodCallCfgNode mc |
+ c.asCallable() = mc.getBlock().getExpr() and
+ mc.getExpr().getMethodName() = "lambda"
+ )
+ )
+}
+
+/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
+predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
+ kind = TYieldCallKind() and
+ receiver.(BlockParameterNode).getMethod() =
+ call.asCall().getExpr().(YieldCall).getEnclosingMethod()
+ or
+ kind = TLambdaCallKind() and
+ call.asCall() =
+ any(CfgNodes::ExprNodes::MethodCallCfgNode mc |
+ receiver.asExpr() = mc.getReceiver() and
+ mc.getExpr().getMethodName() = "call"
+ )
+ or
+ receiver = call.(SummaryCall).getReceiver() and
+ if receiver.(ParameterNode).isParameterOf(_, -2)
+ then kind = TYieldCallKind()
+ else kind = TLambdaCallKind()
+}
+
+/** Extra data-flow steps needed for lambda flow analysis. */
+predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll
new file mode 100644
index 00000000000..c8ad1ca1eaf
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll
@@ -0,0 +1,210 @@
+private import ruby
+private import DataFlowDispatch
+private import DataFlowPrivate
+private import codeql.ruby.CFG
+private import codeql.ruby.typetracking.TypeTracker
+private import codeql.ruby.dataflow.SSA
+private import FlowSummaryImpl as FlowSummaryImpl
+
+/**
+ * An element, viewed as a node in a data flow graph. Either an expression
+ * (`ExprNode`) or a parameter (`ParameterNode`).
+ */
+class Node extends TNode {
+ /** Gets the expression corresponding to this node, if any. */
+ CfgNodes::ExprCfgNode asExpr() { result = this.(ExprNode).getExprNode() }
+
+ /** Gets the parameter corresponding to this node, if any. */
+ Parameter asParameter() { result = this.(ParameterNode).getParameter() }
+
+ /** Gets a textual representation of this node. */
+ // TODO: cache
+ final string toString() { result = this.(NodeImpl).toStringImpl() }
+
+ /** Gets the location of this node. */
+ // TODO: cache
+ final Location getLocation() { result = this.(NodeImpl).getLocationImpl() }
+
+ DataFlowCallable getEnclosingCallable() { result = TCfgScope(this.(NodeImpl).getCfgScope()) }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /**
+ * Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
+ */
+ LocalSourceNode getALocalSource() { result.flowsTo(this) }
+}
+
+/** A data-flow node corresponding to a call in the control-flow graph. */
+class CallNode extends LocalSourceNode {
+ private CfgNodes::ExprNodes::CallCfgNode node;
+
+ CallNode() { node = this.asExpr() }
+
+ /** Gets the data-flow node corresponding to the receiver of the call corresponding to this data-flow node */
+ Node getReceiver() { result.asExpr() = node.getReceiver() }
+
+ /** Gets the data-flow node corresponding to the `n`th argument of the call corresponding to this data-flow node */
+ Node getArgument(int n) { result.asExpr() = node.getArgument(n) }
+
+ /** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */
+ Node getKeywordArgument(string name) { result.asExpr() = node.getKeywordArgument(name) }
+}
+
+/**
+ * An expression, viewed as a node in a data flow graph.
+ *
+ * Note that because of control-flow splitting, one `Expr` may correspond
+ * to multiple `ExprNode`s, just like it may correspond to multiple
+ * `ControlFlow::Node`s.
+ */
+class ExprNode extends Node, TExprNode {
+ private CfgNodes::ExprCfgNode n;
+
+ ExprNode() { this = TExprNode(n) }
+
+ /** Gets the expression corresponding to this node. */
+ CfgNodes::ExprCfgNode getExprNode() { result = n }
+}
+
+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+class ParameterNode extends Node, TParameterNode {
+ /** Gets the parameter corresponding to this node, if any. */
+ Parameter getParameter() { none() }
+
+ /**
+ * Holds if this node is the parameter of callable `c` at the specified
+ * (zero-based) position.
+ */
+ predicate isParameterOf(DataFlowCallable c, int i) { none() }
+}
+
+/**
+ * A data-flow node that is a source of local flow.
+ */
+class LocalSourceNode extends Node {
+ LocalSourceNode() { isLocalSourceNode(this) }
+
+ /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
+ pragma[inline]
+ predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) }
+
+ /**
+ * Gets a node that this node may flow to using one heap and/or interprocedural step.
+ *
+ * See `TypeTracker` for more details about how to use this.
+ */
+ pragma[inline]
+ LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
+}
+
+predicate hasLocalSource(Node sink, Node source) {
+ // Declaring `source` to be a `SourceNode` currently causes a redundant check in the
+ // recursive case, so instead we check it explicitly here.
+ source = sink and
+ source instanceof LocalSourceNode
+ or
+ exists(Node mid |
+ hasLocalSource(mid, source) and
+ localFlowStepTypeTracker(mid, sink)
+ )
+}
+
+/** Gets a node corresponding to expression `e`. */
+ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e }
+
+/**
+ * Gets the node corresponding to the value of parameter `p` at function entry.
+ */
+ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
+
+/**
+ * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
+ * (intra-procedural) step.
+ */
+predicate localFlowStep = localFlowStepImpl/2;
+
+/**
+ * Holds if data flows from `source` to `sink` in zero or more local
+ * (intra-procedural) steps.
+ */
+predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
+
+/**
+ * Holds if data can flow from `e1` to `e2` in zero or more
+ * local (intra-procedural) steps.
+ */
+predicate localExprFlow(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
+ localFlow(exprNode(e1), exprNode(e2))
+}
+
+/**
+ * A reference contained in an object. This is either a field, a property,
+ * or an element in a collection.
+ */
+class Content extends TContent {
+ /** Gets a textual representation of this content. */
+ string toString() { none() }
+
+ /** Gets the location of this content. */
+ Location getLocation() { none() }
+}
+
+/**
+ * A guard that validates some expression.
+ *
+ * To use this in a configuration, extend the class and provide a
+ * characteristic predicate precisely specifying the guard, and override
+ * `checks` to specify what is being validated and in which branch.
+ *
+ * It is important that all extending classes in scope are disjoint.
+ */
+abstract class BarrierGuard extends CfgNodes::ExprCfgNode {
+ private ConditionBlock conditionBlock;
+
+ BarrierGuard() { this = conditionBlock.getLastNode() }
+
+ /** Holds if this guard controls block `b` upon evaluating to `branch`. */
+ private predicate controlsBlock(BasicBlock bb, boolean branch) {
+ exists(SuccessorTypes::BooleanSuccessor s | s.getValue() = branch |
+ conditionBlock.controls(bb, s)
+ )
+ }
+
+ /**
+ * Holds if this guard validates `expr` upon evaluating to `branch`.
+ * For example, the following code validates `foo` when the condition
+ * `foo == "foo"` is true.
+ * ```ruby
+ * if foo == "foo"
+ * do_something
+ * else
+ * do_something_else
+ * end
+ * ```
+ */
+ abstract predicate checks(CfgNode expr, boolean branch);
+
+ final Node getAGuardedNode() {
+ exists(boolean branch, CfgNodes::ExprCfgNode testedNode, Ssa::Definition def |
+ def.getARead() = testedNode and
+ def.getARead() = result.asExpr() and
+ this.checks(testedNode, branch) and
+ this.controlsBlock(result.asExpr().getBasicBlock(), branch)
+ )
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll
new file mode 100644
index 00000000000..83076558ec4
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll
@@ -0,0 +1,964 @@
+/**
+ * Provides classes and predicates for defining flow summaries.
+ *
+ * The definitions in this file are language-independent, and language-specific
+ * definitions are passed in via the `DataFlowImplSpecific` and
+ * `FlowSummaryImplSpecific` modules.
+ */
+
+private import FlowSummaryImplSpecific
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+private import DataFlowImplCommon
+
+/** Provides classes and predicates for defining flow summaries. */
+module Public {
+ private import Private
+
+ /**
+ * A component used in a flow summary.
+ *
+ * Either a parameter or an argument at a given position, a specific
+ * content type, or a return kind.
+ */
+ class SummaryComponent extends TSummaryComponent {
+ /** Gets a textual representation of this summary component. */
+ string toString() {
+ exists(Content c | this = TContentSummaryComponent(c) and result = c.toString())
+ or
+ exists(int i | this = TParameterSummaryComponent(i) and result = "parameter " + i)
+ or
+ exists(int i | this = TArgumentSummaryComponent(i) and result = "argument " + i)
+ or
+ exists(ReturnKind rk | this = TReturnSummaryComponent(rk) and result = "return (" + rk + ")")
+ }
+ }
+
+ /** Provides predicates for constructing summary components. */
+ module SummaryComponent {
+ /** Gets a summary component for content `c`. */
+ SummaryComponent content(Content c) { result = TContentSummaryComponent(c) }
+
+ /** Gets a summary component for parameter `i`. */
+ SummaryComponent parameter(int i) { result = TParameterSummaryComponent(i) }
+
+ /** Gets a summary component for argument `i`. */
+ SummaryComponent argument(int i) { result = TArgumentSummaryComponent(i) }
+
+ /** Gets a summary component for a return of kind `rk`. */
+ SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) }
+ }
+
+ /**
+ * A (non-empty) stack of summary components.
+ *
+ * A stack is used to represent where data is read from (input) or where it
+ * is written to (output). For example, an input stack `[Field f, Argument 0]`
+ * means that data is read from field `f` from the `0`th argument, while an
+ * output stack `[Field g, Return]` means that data is written to the field
+ * `g` of the returned object.
+ */
+ class SummaryComponentStack extends TSummaryComponentStack {
+ /** Gets the head of this stack. */
+ SummaryComponent head() {
+ this = TSingletonSummaryComponentStack(result) or
+ this = TConsSummaryComponentStack(result, _)
+ }
+
+ /** Gets the tail of this stack, if any. */
+ SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) }
+
+ /** Gets the length of this stack. */
+ int length() {
+ this = TSingletonSummaryComponentStack(_) and result = 1
+ or
+ result = 1 + this.tail().length()
+ }
+
+ /** Gets the stack obtained by dropping the first `i` elements, if any. */
+ SummaryComponentStack drop(int i) {
+ i = 0 and result = this
+ or
+ result = this.tail().drop(i - 1)
+ }
+
+ /** Holds if this stack contains summary component `c`. */
+ predicate contains(SummaryComponent c) { c = this.drop(_).head() }
+
+ /** Gets a textual representation of this stack. */
+ string toString() {
+ exists(SummaryComponent head, SummaryComponentStack tail |
+ head = this.head() and
+ tail = this.tail() and
+ result = head + " of " + tail
+ )
+ or
+ exists(SummaryComponent c |
+ this = TSingletonSummaryComponentStack(c) and
+ result = c.toString()
+ )
+ }
+ }
+
+ /** Provides predicates for constructing stacks of summary components. */
+ module SummaryComponentStack {
+ /** Gets a singleton stack containing `c`. */
+ SummaryComponentStack singleton(SummaryComponent c) {
+ result = TSingletonSummaryComponentStack(c)
+ }
+
+ /**
+ * Gets the stack obtained by pushing `head` onto `tail`.
+ *
+ * Make sure to override `RequiredSummaryComponentStack::required()` in order
+ * to ensure that the constructed stack exists.
+ */
+ SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) {
+ result = TConsSummaryComponentStack(head, tail)
+ }
+
+ /** Gets a singleton stack for argument `i`. */
+ SummaryComponentStack argument(int i) { result = singleton(SummaryComponent::argument(i)) }
+
+ /** Gets a singleton stack representing a return of kind `rk`. */
+ SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) }
+ }
+
+ /**
+ * A class that exists for QL technical reasons only (the IPA type used
+ * to represent component stacks needs to be bounded).
+ */
+ abstract class RequiredSummaryComponentStack extends SummaryComponentStack {
+ /**
+ * Holds if the stack obtained by pushing `head` onto `tail` is required.
+ */
+ abstract predicate required(SummaryComponent c);
+ }
+
+ /** A callable with a flow summary. */
+ abstract class SummarizedCallable extends DataFlowCallable {
+ /**
+ * Holds if data may flow from `input` to `output` through this callable.
+ *
+ * `preservesValue` indicates whether this is a value-preserving step
+ * or a taint-step.
+ *
+ * Input specifications are restricted to stacks that end with
+ * `SummaryComponent::argument(_)`, preceded by zero or more
+ * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components.
+ *
+ * Output specifications are restricted to stacks that end with
+ * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`.
+ *
+ * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero
+ * or more `SummaryComponent::content(_)` components.
+ *
+ * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an
+ * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded
+ * by zero or more `SummaryComponent::content(_)` components.
+ */
+ pragma[nomagic]
+ predicate propagatesFlow(
+ SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ ) {
+ none()
+ }
+
+ /**
+ * Holds if values stored inside `content` are cleared on objects passed as
+ * the `i`th argument to this callable.
+ */
+ pragma[nomagic]
+ predicate clearsContent(int i, Content content) { none() }
+ }
+}
+
+/**
+ * Provides predicates for compiling flow summaries down to atomic local steps,
+ * read steps, and store steps.
+ */
+module Private {
+ private import Public
+
+ newtype TSummaryComponent =
+ TContentSummaryComponent(Content c) or
+ TParameterSummaryComponent(int i) { parameterPosition(i) } or
+ TArgumentSummaryComponent(int i) { parameterPosition(i) } or
+ TReturnSummaryComponent(ReturnKind rk)
+
+ private TSummaryComponent thisParam() {
+ result = TParameterSummaryComponent(instanceParameterPosition())
+ }
+
+ newtype TSummaryComponentStack =
+ TSingletonSummaryComponentStack(SummaryComponent c) or
+ TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) {
+ tail.(RequiredSummaryComponentStack).required(head)
+ or
+ tail.(RequiredSummaryComponentStack).required(TParameterSummaryComponent(_)) and
+ head = thisParam()
+ }
+
+ pragma[nomagic]
+ private predicate summary(
+ SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output,
+ boolean preservesValue
+ ) {
+ c.propagatesFlow(input, output, preservesValue)
+ or
+ // observe side effects of callbacks on input arguments
+ c.propagatesFlow(output, input, preservesValue) and
+ preservesValue = true and
+ isCallbackParameter(input) and
+ isContentOfArgument(output)
+ or
+ // flow from the receiver of a callback into the instance-parameter
+ exists(SummaryComponentStack s, SummaryComponentStack callbackRef |
+ c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _)
+ |
+ callbackRef = s.drop(_) and
+ (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and
+ input = callbackRef.tail() and
+ output = TConsSummaryComponentStack(thisParam(), input) and
+ preservesValue = true
+ )
+ }
+
+ private predicate isCallbackParameter(SummaryComponentStack s) {
+ s.head() = TParameterSummaryComponent(_) and exists(s.tail())
+ }
+
+ private predicate isContentOfArgument(SummaryComponentStack s) {
+ s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail())
+ or
+ s = TSingletonSummaryComponentStack(TArgumentSummaryComponent(_))
+ }
+
+ private predicate outputState(SummarizedCallable c, SummaryComponentStack s) {
+ summary(c, _, s, _)
+ or
+ exists(SummaryComponentStack out |
+ outputState(c, out) and
+ out.head() = TContentSummaryComponent(_) and
+ s = out.tail()
+ )
+ or
+ // Add the argument node corresponding to the requested post-update node
+ inputState(c, s) and isCallbackParameter(s)
+ }
+
+ private predicate inputState(SummarizedCallable c, SummaryComponentStack s) {
+ summary(c, s, _, _)
+ or
+ exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail())
+ or
+ exists(SummaryComponentStack out |
+ outputState(c, out) and
+ out.head() = TParameterSummaryComponent(_) and
+ s = out.tail()
+ )
+ }
+
+ private newtype TSummaryNodeState =
+ TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or
+ TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) }
+
+ /**
+ * A state used to break up (complex) flow summaries into atomic flow steps.
+ * For a flow summary
+ *
+ * ```ql
+ * propagatesFlow(
+ * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ * )
+ * ```
+ *
+ * the following states are used:
+ *
+ * - `TSummaryNodeInputState(SummaryComponentStack s)`:
+ * this state represents that the components in `s` _have been read_ from the
+ * input.
+ * - `TSummaryNodeOutputState(SummaryComponentStack s)`:
+ * this state represents that the components in `s` _remain to be written_ to
+ * the output.
+ */
+ class SummaryNodeState extends TSummaryNodeState {
+ /** Holds if this state is a valid input state for `c`. */
+ pragma[nomagic]
+ predicate isInputState(SummarizedCallable c, SummaryComponentStack s) {
+ this = TSummaryNodeInputState(s) and
+ inputState(c, s)
+ }
+
+ /** Holds if this state is a valid output state for `c`. */
+ pragma[nomagic]
+ predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) {
+ this = TSummaryNodeOutputState(s) and
+ outputState(c, s)
+ }
+
+ /** Gets a textual representation of this state. */
+ string toString() {
+ exists(SummaryComponentStack s |
+ this = TSummaryNodeInputState(s) and
+ result = "read: " + s
+ )
+ or
+ exists(SummaryComponentStack s |
+ this = TSummaryNodeOutputState(s) and
+ result = "to write: " + s
+ )
+ }
+ }
+
+ /**
+ * Holds if `state` represents having read the `i`th argument for `c`. In this case
+ * we are not synthesizing a data-flow node, but instead assume that a relevant
+ * parameter node already exists.
+ */
+ private predicate parameterReadState(SummarizedCallable c, SummaryNodeState state, int i) {
+ state.isInputState(c, SummaryComponentStack::argument(i))
+ }
+
+ /**
+ * Holds if a synthesized summary node is needed for the state `state` in summarized
+ * callable `c`.
+ */
+ predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) {
+ state.isInputState(c, _) and
+ not parameterReadState(c, state, _)
+ or
+ state.isOutputState(c, _)
+ }
+
+ pragma[noinline]
+ private Node summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) {
+ exists(SummaryNodeState state | state.isInputState(c, s) |
+ result = summaryNode(c, state)
+ or
+ exists(int i |
+ parameterReadState(c, state, i) and
+ result.(ParamNode).isParameterOf(c, i)
+ )
+ )
+ }
+
+ pragma[noinline]
+ private Node summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) {
+ exists(SummaryNodeState state |
+ state.isOutputState(c, s) and
+ result = summaryNode(c, state)
+ )
+ }
+
+ /**
+ * Holds if a write targets `post`, which is a post-update node for the `i`th
+ * parameter of `c`.
+ */
+ private predicate isParameterPostUpdate(Node post, SummarizedCallable c, int i) {
+ post = summaryNodeOutputState(c, SummaryComponentStack::argument(i))
+ }
+
+ /** Holds if a parameter node is required for the `i`th parameter of `c`. */
+ predicate summaryParameterNodeRange(SummarizedCallable c, int i) {
+ parameterReadState(c, _, i)
+ or
+ isParameterPostUpdate(_, c, i)
+ }
+
+ private predicate callbackOutput(
+ SummarizedCallable c, SummaryComponentStack s, Node receiver, ReturnKind rk
+ ) {
+ any(SummaryNodeState state).isInputState(c, s) and
+ s.head() = TReturnSummaryComponent(rk) and
+ receiver = summaryNodeInputState(c, s.drop(1))
+ }
+
+ private predicate callbackInput(
+ SummarizedCallable c, SummaryComponentStack s, Node receiver, int i
+ ) {
+ any(SummaryNodeState state).isOutputState(c, s) and
+ s.head() = TParameterSummaryComponent(i) and
+ receiver = summaryNodeInputState(c, s.drop(1))
+ }
+
+ /** Holds if a call targeting `receiver` should be synthesized inside `c`. */
+ predicate summaryCallbackRange(SummarizedCallable c, Node receiver) {
+ callbackOutput(c, _, receiver, _)
+ or
+ callbackInput(c, _, receiver, _)
+ }
+
+ /**
+ * Gets the type of synthesized summary node `n`.
+ *
+ * The type is computed based on the language-specific predicates
+ * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and
+ * `getCallbackReturnType()`.
+ */
+ DataFlowType summaryNodeType(Node n) {
+ exists(Node pre |
+ summaryPostUpdateNode(n, pre) and
+ result = getNodeType(pre)
+ )
+ or
+ exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() |
+ n = summaryNodeInputState(c, s) and
+ (
+ exists(Content cont |
+ head = TContentSummaryComponent(cont) and result = getContentType(cont)
+ )
+ or
+ exists(ReturnKind rk |
+ head = TReturnSummaryComponent(rk) and
+ result =
+ getCallbackReturnType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c),
+ s.drop(1))), rk)
+ )
+ )
+ or
+ n = summaryNodeOutputState(c, s) and
+ (
+ exists(Content cont |
+ head = TContentSummaryComponent(cont) and result = getContentType(cont)
+ )
+ or
+ s.length() = 1 and
+ exists(ReturnKind rk |
+ head = TReturnSummaryComponent(rk) and
+ result = getReturnType(c, rk)
+ )
+ or
+ exists(int i | head = TParameterSummaryComponent(i) |
+ result =
+ getCallbackParameterType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c),
+ s.drop(1))), i)
+ )
+ )
+ )
+ }
+
+ /** Holds if summary node `out` contains output of kind `rk` from call `c`. */
+ predicate summaryOutNode(DataFlowCall c, Node out, ReturnKind rk) {
+ exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver |
+ callbackOutput(callable, s, receiver, rk) and
+ out = summaryNodeInputState(callable, s) and
+ c = summaryDataFlowCall(receiver)
+ )
+ }
+
+ /** Holds if summary node `arg` is the `i`th argument of call `c`. */
+ predicate summaryArgumentNode(DataFlowCall c, Node arg, int i) {
+ exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver |
+ callbackInput(callable, s, receiver, i) and
+ arg = summaryNodeOutputState(callable, s) and
+ c = summaryDataFlowCall(receiver)
+ )
+ }
+
+ /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */
+ predicate summaryPostUpdateNode(Node post, Node pre) {
+ exists(SummarizedCallable c, int i |
+ isParameterPostUpdate(post, c, i) and
+ pre.(ParamNode).isParameterOf(c, i)
+ )
+ or
+ exists(SummarizedCallable callable, SummaryComponentStack s |
+ callbackInput(callable, s, _, _) and
+ pre = summaryNodeOutputState(callable, s) and
+ post = summaryNodeInputState(callable, s)
+ )
+ }
+
+ /** Holds if summary node `ret` is a return node of kind `rk`. */
+ predicate summaryReturnNode(Node ret, ReturnKind rk) {
+ exists(SummarizedCallable callable, SummaryComponentStack s |
+ ret = summaryNodeOutputState(callable, s) and
+ s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk))
+ )
+ }
+
+ /** Provides a compilation of flow summaries to atomic data-flow steps. */
+ module Steps {
+ /**
+ * Holds if there is a local step from `pred` to `succ`, which is synthesized
+ * from a flow summary.
+ */
+ predicate summaryLocalStep(Node pred, Node succ, boolean preservesValue) {
+ exists(
+ SummarizedCallable c, SummaryComponentStack inputContents,
+ SummaryComponentStack outputContents
+ |
+ summary(c, inputContents, outputContents, preservesValue) and
+ pred = summaryNodeInputState(c, inputContents) and
+ succ = summaryNodeOutputState(c, outputContents)
+ |
+ preservesValue = true
+ or
+ preservesValue = false and not summary(c, inputContents, outputContents, true)
+ )
+ or
+ // If flow through a method updates a parameter from some input A, and that
+ // parameter also is returned through B, then we'd like a combined flow from A
+ // to B as well. As an example, this simplifies modeling of fluent methods:
+ // for `StringBuilder.append(x)` with a specified value flow from qualifier to
+ // return value and taint flow from argument 0 to the qualifier, then this
+ // allows us to infer taint flow from argument 0 to the return value.
+ succ instanceof ParamNode and summaryPostUpdateNode(pred, succ) and preservesValue = true
+ or
+ // Similarly we would like to chain together summaries where values get passed
+ // into callbacks along the way.
+ pred instanceof ArgNode and summaryPostUpdateNode(succ, pred) and preservesValue = true
+ }
+
+ /**
+ * Holds if there is a read step of content `c` from `pred` to `succ`, which
+ * is synthesized from a flow summary.
+ */
+ predicate summaryReadStep(Node pred, Content c, Node succ) {
+ exists(SummarizedCallable sc, SummaryComponentStack s |
+ pred = summaryNodeInputState(sc, s.drop(1)) and
+ succ = summaryNodeInputState(sc, s) and
+ SummaryComponent::content(c) = s.head()
+ )
+ }
+
+ /**
+ * Holds if there is a store step of content `c` from `pred` to `succ`, which
+ * is synthesized from a flow summary.
+ */
+ predicate summaryStoreStep(Node pred, Content c, Node succ) {
+ exists(SummarizedCallable sc, SummaryComponentStack s |
+ pred = summaryNodeOutputState(sc, s) and
+ succ = summaryNodeOutputState(sc, s.drop(1)) and
+ SummaryComponent::content(c) = s.head()
+ )
+ }
+
+ /**
+ * Holds if values stored inside content `c` are cleared when passed as
+ * input of type `input` in `call`.
+ */
+ predicate summaryClearsContent(ArgNode arg, Content c) {
+ exists(DataFlowCall call, int i |
+ viableCallable(call).(SummarizedCallable).clearsContent(i, c) and
+ arg.argumentOf(call, i)
+ )
+ }
+
+ pragma[nomagic]
+ private ParamNode summaryArgParam(ArgNode arg, ReturnKindExt rk, OutNodeExt out) {
+ exists(DataFlowCall call, int pos, SummarizedCallable callable |
+ arg.argumentOf(call, pos) and
+ viableCallable(call) = callable and
+ result.isParameterOf(callable, pos) and
+ out = rk.getAnOutNode(call)
+ )
+ }
+
+ /**
+ * Holds if `arg` flows to `out` using a simple flow summary, that is, a flow
+ * summary without reads and stores.
+ *
+ * NOTE: This step should not be used in global data-flow/taint-tracking, but may
+ * be useful to include in the exposed local data-flow/taint-tracking relations.
+ */
+ predicate summaryThroughStep(ArgNode arg, Node out, boolean preservesValue) {
+ exists(ReturnKindExt rk, ReturnNodeExt ret |
+ summaryLocalStep(summaryArgParam(arg, rk, out), ret, preservesValue) and
+ ret.getKind() = rk
+ )
+ }
+
+ /**
+ * Holds if there is a read(+taint) of `c` from `arg` to `out` using a
+ * flow summary.
+ *
+ * NOTE: This step should not be used in global data-flow/taint-tracking, but may
+ * be useful to include in the exposed local data-flow/taint-tracking relations.
+ */
+ predicate summaryGetterStep(ArgNode arg, Content c, Node out) {
+ exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret |
+ summaryReadStep(summaryArgParam(arg, rk, out), c, mid) and
+ summaryLocalStep(mid, ret, _) and
+ ret.getKind() = rk
+ )
+ }
+
+ /**
+ * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a
+ * flow summary.
+ *
+ * NOTE: This step should not be used in global data-flow/taint-tracking, but may
+ * be useful to include in the exposed local data-flow/taint-tracking relations.
+ */
+ predicate summarySetterStep(ArgNode arg, Content c, Node out) {
+ exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret |
+ summaryLocalStep(summaryArgParam(arg, rk, out), mid, _) and
+ summaryStoreStep(mid, c, ret) and
+ ret.getKind() = rk
+ )
+ }
+
+ /**
+ * Holds if data is written into content `c` of argument `arg` using a flow summary.
+ *
+ * Depending on the type of `c`, this predicate may be relevant to include in the
+ * definition of `clearsContent()`.
+ */
+ predicate summaryStoresIntoArg(Content c, Node arg) {
+ exists(ParamUpdateReturnKind rk, ReturnNodeExt ret, PostUpdateNode out |
+ exists(DataFlowCall call, SummarizedCallable callable |
+ getNodeEnclosingCallable(ret) = callable and
+ viableCallable(call) = callable and
+ summaryStoreStep(_, c, ret) and
+ ret.getKind() = pragma[only_bind_into](rk) and
+ out = rk.getAnOutNode(call) and
+ arg = out.getPreUpdateNode()
+ )
+ )
+ }
+ }
+
+ /**
+ * Provides a means of translating externally (e.g., CSV) defined flow
+ * summaries into a `SummarizedCallable`s.
+ */
+ module External {
+ /** Holds if `spec` is a relevant external specification. */
+ private predicate relevantSpec(string spec) {
+ summaryElement(_, spec, _, _) or
+ summaryElement(_, _, spec, _) or
+ sourceElement(_, spec, _) or
+ sinkElement(_, spec, _)
+ }
+
+ /** Holds if the `n`th component of specification `s` is `c`. */
+ predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
+
+ /** Holds if specification `s` has length `len`. */
+ predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
+
+ /** Gets the last component of specification `s`. */
+ string specLast(string s) {
+ exists(int len |
+ specLength(s, len) and
+ specSplit(s, result, len - 1)
+ )
+ }
+
+ /** Holds if specification component `c` parses as parameter `n`. */
+ predicate parseParam(string c, int n) {
+ specSplit(_, c, _) and
+ (
+ c.regexpCapture("Parameter\\[([-0-9]+)\\]", 1).toInt() = n
+ or
+ exists(int n1, int n2 |
+ c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and
+ c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and
+ n = [n1 .. n2]
+ )
+ )
+ }
+
+ /** Holds if specification component `c` parses as argument `n`. */
+ predicate parseArg(string c, int n) {
+ specSplit(_, c, _) and
+ (
+ c.regexpCapture("Argument\\[([-0-9]+)\\]", 1).toInt() = n
+ or
+ exists(int n1, int n2 |
+ c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and
+ c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and
+ n = [n1 .. n2]
+ )
+ )
+ }
+
+ private SummaryComponent interpretComponent(string c) {
+ specSplit(_, c, _) and
+ (
+ exists(int pos | parseArg(c, pos) and result = SummaryComponent::argument(pos))
+ or
+ exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos))
+ or
+ c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
+ or
+ result = interpretComponentSpecific(c)
+ )
+ }
+
+ /**
+ * Holds if `spec` specifies summary component stack `stack`.
+ */
+ predicate interpretSpec(string spec, SummaryComponentStack stack) {
+ interpretSpec(spec, 0, stack)
+ }
+
+ private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
+ exists(string c |
+ relevantSpec(spec) and
+ specLength(spec, idx + 1) and
+ specSplit(spec, c, idx) and
+ stack = SummaryComponentStack::singleton(interpretComponent(c))
+ )
+ or
+ exists(SummaryComponent head, SummaryComponentStack tail |
+ interpretSpec(spec, idx, head, tail) and
+ stack = SummaryComponentStack::push(head, tail)
+ )
+ }
+
+ private predicate interpretSpec(
+ string output, int idx, SummaryComponent head, SummaryComponentStack tail
+ ) {
+ exists(string c |
+ interpretSpec(output, idx + 1, tail) and
+ specSplit(output, c, idx) and
+ head = interpretComponent(c)
+ )
+ }
+
+ private class MkStack extends RequiredSummaryComponentStack {
+ MkStack() { interpretSpec(_, _, _, this) }
+
+ override predicate required(SummaryComponent c) { interpretSpec(_, _, c, this) }
+ }
+
+ private class SummarizedCallableExternal extends SummarizedCallable {
+ SummarizedCallableExternal() { summaryElement(this, _, _, _) }
+
+ override predicate propagatesFlow(
+ SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
+ ) {
+ exists(string inSpec, string outSpec, string kind |
+ summaryElement(this, inSpec, outSpec, kind) and
+ interpretSpec(inSpec, input) and
+ interpretSpec(outSpec, output)
+ |
+ kind = "value" and preservesValue = true
+ or
+ kind = "taint" and preservesValue = false
+ )
+ }
+ }
+
+ /** Holds if component `c` of specification `spec` cannot be parsed. */
+ predicate invalidSpecComponent(string spec, string c) {
+ specSplit(spec, c, _) and
+ not exists(interpretComponent(c))
+ }
+
+ private predicate inputNeedsReference(string c) {
+ c = "Argument" or
+ parseArg(c, _)
+ }
+
+ private predicate outputNeedsReference(string c) {
+ c = "Argument" or
+ parseArg(c, _) or
+ c = "ReturnValue"
+ }
+
+ private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
+ exists(SourceOrSinkElement e |
+ sourceElement(e, output, kind) and
+ if outputNeedsReference(specLast(output))
+ then e = ref.getCallTarget()
+ else e = ref.asElement()
+ )
+ }
+
+ private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
+ exists(SourceOrSinkElement e |
+ sinkElement(e, input, kind) and
+ if inputNeedsReference(specLast(input))
+ then e = ref.getCallTarget()
+ else e = ref.asElement()
+ )
+ }
+
+ private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
+ sourceElementRef(ref, output, _) and
+ specLength(output, idx) and
+ node = ref
+ or
+ exists(InterpretNode mid, string c |
+ interpretOutput(output, idx + 1, ref, mid) and
+ specSplit(output, c, idx)
+ |
+ exists(int pos |
+ node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), pos)
+ |
+ c = "Argument" or parseArg(c, pos)
+ )
+ or
+ exists(int pos | node.asNode().(ParamNode).isParameterOf(mid.asCallable(), pos) |
+ c = "Parameter" or parseParam(c, pos)
+ )
+ or
+ c = "ReturnValue" and
+ node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind()))
+ or
+ interpretOutputSpecific(c, mid, node)
+ )
+ }
+
+ private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
+ sinkElementRef(ref, input, _) and
+ specLength(input, idx) and
+ node = ref
+ or
+ exists(InterpretNode mid, string c |
+ interpretInput(input, idx + 1, ref, mid) and
+ specSplit(input, c, idx)
+ |
+ exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) |
+ c = "Argument" or parseArg(c, pos)
+ )
+ or
+ exists(ReturnNodeExt ret |
+ c = "ReturnValue" and
+ ret = node.asNode() and
+ ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and
+ mid.asCallable() = getNodeEnclosingCallable(ret)
+ )
+ or
+ interpretInputSpecific(c, mid, node)
+ )
+ }
+
+ /**
+ * Holds if `node` is specified as a source with the given kind in a CSV flow
+ * model.
+ */
+ predicate isSourceNode(InterpretNode node, string kind) {
+ exists(InterpretNode ref, string output |
+ sourceElementRef(ref, output, kind) and
+ interpretOutput(output, 0, ref, node)
+ )
+ }
+
+ /**
+ * Holds if `node` is specified as a sink with the given kind in a CSV flow
+ * model.
+ */
+ predicate isSinkNode(InterpretNode node, string kind) {
+ exists(InterpretNode ref, string input |
+ sinkElementRef(ref, input, kind) and
+ interpretInput(input, 0, ref, node)
+ )
+ }
+ }
+
+ /** Provides a query predicate for outputting a set of relevant flow summaries. */
+ module TestOutput {
+ /** A flow summary to include in the `summary/3` query predicate. */
+ abstract class RelevantSummarizedCallable extends SummarizedCallable {
+ /** Gets the string representation of this callable used by `summary/3`. */
+ string getFullString() { result = this.toString() }
+ }
+
+ /** A query predicate for outputting flow summaries in QL tests. */
+ query predicate summary(string callable, string flow, boolean preservesValue) {
+ exists(
+ RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output
+ |
+ callable = c.getFullString() and
+ c.propagatesFlow(input, output, preservesValue) and
+ flow = input + " -> " + output
+ )
+ }
+ }
+
+ /**
+ * Provides query predicates for rendering the generated data flow graph for
+ * a summarized callable.
+ *
+ * Import this module into a `.ql` file of `@kind graph` to render the graph.
+ * The graph is restricted to callables from `RelevantSummarizedCallable`.
+ */
+ module RenderSummarizedCallable {
+ /** A summarized callable to include in the graph. */
+ abstract class RelevantSummarizedCallable extends SummarizedCallable { }
+
+ private newtype TNodeOrCall =
+ MkNode(Node n) {
+ exists(RelevantSummarizedCallable c |
+ n = summaryNode(c, _)
+ or
+ n.(ParamNode).isParameterOf(c, _)
+ )
+ } or
+ MkCall(DataFlowCall call) {
+ call = summaryDataFlowCall(_) and
+ call.getEnclosingCallable() instanceof RelevantSummarizedCallable
+ }
+
+ private class NodeOrCall extends TNodeOrCall {
+ Node asNode() { this = MkNode(result) }
+
+ DataFlowCall asCall() { this = MkCall(result) }
+
+ string toString() {
+ result = this.asNode().toString()
+ or
+ result = this.asCall().toString()
+ }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.asNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ or
+ this.asCall().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+ }
+
+ query predicate nodes(NodeOrCall n, string key, string val) {
+ key = "semmle.label" and val = n.toString()
+ }
+
+ private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) {
+ exists(boolean preservesValue |
+ Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and
+ if preservesValue = true then value = "value" else value = "taint"
+ )
+ or
+ exists(Content c |
+ Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and
+ value = "read (" + c + ")"
+ or
+ Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and
+ value = "store (" + c + ")"
+ or
+ Private::Steps::summaryClearsContent(a.asNode(), c) and
+ b = a and
+ value = "clear (" + c + ")"
+ )
+ or
+ summaryPostUpdateNode(b.asNode(), a.asNode()) and
+ value = "post-update"
+ or
+ b.asCall() = summaryDataFlowCall(a.asNode()) and
+ value = "receiver"
+ or
+ exists(int i |
+ summaryArgumentNode(b.asCall(), a.asNode(), i) and
+ value = "argument (" + i + ")"
+ )
+ }
+
+ query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) {
+ key = "semmle.label" and
+ value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ")
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll
new file mode 100644
index 00000000000..c373ffc883a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll
@@ -0,0 +1,117 @@
+/**
+ * Provides Ruby specific classes and predicates for defining flow summaries.
+ */
+
+private import ruby
+private import DataFlowDispatch
+private import DataFlowPrivate
+private import DataFlowPublic
+private import DataFlowImplCommon
+private import FlowSummaryImpl::Private
+private import FlowSummaryImpl::Public
+private import codeql.ruby.dataflow.FlowSummary as FlowSummary
+
+/** Holds is `i` is a valid parameter position. */
+predicate parameterPosition(int i) { i in [-2 .. 10] }
+
+/** Gets the parameter position of the instance parameter. */
+int instanceParameterPosition() { none() } // disables implicit summary flow to `self` for callbacks
+
+/** Gets the synthesized summary data-flow node for the given values. */
+Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSummaryNode(c, state) }
+
+/** Gets the synthesized data-flow call for `receiver`. */
+SummaryCall summaryDataFlowCall(Node receiver) { receiver = result.getReceiver() }
+
+/** Gets the type of content `c`. */
+DataFlowType getContentType(Content c) { any() }
+
+/** Gets the return type of kind `rk` for callable `c`. */
+bindingset[c, rk]
+DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
+
+/**
+ * Gets the type of the `i`th parameter in a synthesized call that targets a
+ * callback of type `t`.
+ */
+bindingset[t, i]
+DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
+
+/**
+ * Gets the return type of kind `rk` in a synthesized call that targets a
+ * callback of type `t`.
+ */
+DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() }
+
+/**
+ * Holds if an external flow summary exists for `c` with input specification
+ * `input`, output specification `output`, and kind `kind`.
+ */
+predicate summaryElement(DataFlowCallable c, string input, string output, string kind) {
+ exists(FlowSummary::SummarizedCallable sc, boolean preservesValue |
+ sc.propagatesFlowExt(input, output, preservesValue) and
+ c.asLibraryCallable() = sc and
+ if preservesValue = true then kind = "value" else kind = "taint"
+ )
+}
+
+/**
+ * Gets the summary component for specification component `c`, if any.
+ *
+ * This covers all the Ruby-specific components of a flow summary, and
+ * is currently restricted to `"BlockArgument"`.
+ */
+SummaryComponent interpretComponentSpecific(string c) {
+ c = "BlockArgument" and
+ result = FlowSummary::SummaryComponent::block()
+}
+
+/** Gets the return kind corresponding to specification `"ReturnValue"`. */
+NormalReturnKind getReturnValueKind() { any() }
+
+/**
+ * All definitions in this module are required by the shared implementation
+ * (for source/sink interpretation), but they are unused for Ruby, where
+ * we rely on API graphs instead.
+ */
+private module UnusedSourceSinkInterpretation {
+ /**
+ * Holds if an external source specification exists for `e` with output specification
+ * `output` and kind `kind`.
+ */
+ predicate sourceElement(AstNode n, string output, string kind) { none() }
+
+ /**
+ * Holds if an external sink specification exists for `n` with input specification
+ * `input` and kind `kind`.
+ */
+ predicate sinkElement(AstNode n, string input, string kind) { none() }
+
+ class SourceOrSinkElement = AstNode;
+
+ /** An entity used to interpret a source/sink specification. */
+ class InterpretNode extends AstNode {
+ /** Gets the element that this node corresponds to, if any. */
+ SourceOrSinkElement asElement() { none() }
+
+ /** Gets the data-flow node that this node corresponds to, if any. */
+ Node asNode() { none() }
+
+ /** Gets the call that this node corresponds to, if any. */
+ DataFlowCall asCall() { none() }
+
+ /** Gets the callable that this node corresponds to, if any. */
+ DataFlowCallable asCallable() { none() }
+
+ /** Gets the target of this call, if any. */
+ Callable getCallTarget() { none() }
+ }
+
+ /** Provides additional sink specification logic. */
+ predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
+
+ /** Provides additional source specification logic. */
+ predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
+}
+
+import UnusedSourceSinkInterpretation
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImpl.qll
new file mode 100644
index 00000000000..54269c5cb59
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImpl.qll
@@ -0,0 +1,289 @@
+private import SsaImplCommon
+private import codeql.ruby.AST
+private import codeql.ruby.CFG
+private import codeql.ruby.ast.Variable
+private import CfgNodes::ExprNodes
+
+/** Holds if `v` is uninitialized at index `i` in entry block `bb`. */
+predicate uninitializedWrite(EntryBasicBlock bb, int i, LocalVariable v) {
+ v.getDeclaringScope() = bb.getScope() and
+ i = -1
+}
+
+/** Holds if `bb` contains a caputured read of variable `v`. */
+pragma[noinline]
+private predicate hasCapturedVariableRead(BasicBlock bb, LocalVariable v) {
+ exists(LocalVariableReadAccess read |
+ read = bb.getANode().getNode() and
+ read.isCapturedAccess() and
+ read.getVariable() = v
+ )
+}
+
+/**
+ * Holds if an entry definition is needed for captured variable `v` at index
+ * `i` in entry block `bb`.
+ */
+predicate capturedEntryWrite(EntryBasicBlock bb, int i, LocalVariable v) {
+ hasCapturedVariableRead(bb.getASuccessor*(), v) and
+ i = -1
+}
+
+/** Holds if `bb` contains a caputured write to variable `v`. */
+pragma[noinline]
+private predicate writesCapturedVariable(BasicBlock bb, LocalVariable v) {
+ exists(LocalVariableWriteAccess write |
+ write = bb.getANode().getNode() and
+ write.isCapturedAccess() and
+ write.getVariable() = v
+ )
+}
+
+/**
+ * Holds if a pseudo read of captured variable `v` should be inserted
+ * at index `i` in exit block `bb`.
+ */
+private predicate capturedExitRead(AnnotatedExitBasicBlock bb, int i, LocalVariable v) {
+ bb.isNormal() and
+ writesCapturedVariable(bb.getAPredecessor*(), v) and
+ i = bb.length()
+}
+
+private CfgScope getCaptureOuterCfgScope(CfgScope scope) {
+ result = scope.getOuterCfgScope() and
+ (
+ scope instanceof Block
+ or
+ scope instanceof Lambda
+ )
+}
+
+/** Holds if captured variable `v` is read inside `scope`. */
+pragma[noinline]
+private predicate hasCapturedRead(Variable v, CfgScope scope) {
+ any(LocalVariableReadAccess read |
+ read.getVariable() = v and scope = getCaptureOuterCfgScope*(read.getCfgScope())
+ ).isCapturedAccess()
+}
+
+pragma[noinline]
+private predicate hasVariableWriteWithCapturedRead(BasicBlock bb, LocalVariable v, CfgScope scope) {
+ hasCapturedRead(v, scope) and
+ exists(VariableWriteAccess write |
+ write = bb.getANode().getNode() and
+ write.getVariable() = v and
+ bb.getScope() = scope.getOuterCfgScope()
+ )
+}
+
+/**
+ * Holds if the call at index `i` in basic block `bb` may reach a callable
+ * that reads captured variable `v`.
+ */
+private predicate capturedCallRead(BasicBlock bb, int i, LocalVariable v) {
+ exists(CfgScope scope |
+ hasVariableWriteWithCapturedRead(bb.getAPredecessor*(), v, scope) and
+ bb.getNode(i).getNode() instanceof Call
+ |
+ not scope instanceof Block
+ or
+ // If the read happens inside a block, we restrict to the call that
+ // contains the block
+ scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock()
+ )
+}
+
+/** Holds if captured variable `v` is written inside `scope`. */
+pragma[noinline]
+private predicate hasCapturedWrite(Variable v, CfgScope scope) {
+ any(LocalVariableWriteAccess write |
+ write.getVariable() = v and scope = getCaptureOuterCfgScope*(write.getCfgScope())
+ ).isCapturedAccess()
+}
+
+/** Holds if `v` is read at index `i` in basic block `bb`. */
+private predicate variableReadActual(BasicBlock bb, int i, LocalVariable v) {
+ exists(VariableReadAccess read |
+ read.getVariable() = v and
+ read = bb.getNode(i).getNode()
+ )
+}
+
+predicate variableRead(BasicBlock bb, int i, LocalVariable v, boolean certain) {
+ variableReadActual(bb, i, v) and
+ certain = true
+ or
+ capturedCallRead(bb, i, v) and
+ certain = false
+ or
+ capturedExitRead(bb, i, v) and
+ certain = false
+}
+
+pragma[noinline]
+private predicate hasVariableReadWithCapturedWrite(BasicBlock bb, LocalVariable v, CfgScope scope) {
+ hasCapturedWrite(v, scope) and
+ exists(VariableReadAccess read |
+ read = bb.getANode().getNode() and
+ read.getVariable() = v and
+ bb.getScope() = scope.getOuterCfgScope()
+ )
+}
+
+cached
+private module Cached {
+ /**
+ * Holds if the call at index `i` in basic block `bb` may reach a callable
+ * that writes captured variable `v`.
+ */
+ cached
+ predicate capturedCallWrite(BasicBlock bb, int i, LocalVariable v) {
+ exists(CfgScope scope |
+ hasVariableReadWithCapturedWrite(bb.getASuccessor*(), v, scope) and
+ bb.getNode(i).getNode() instanceof Call
+ |
+ not scope instanceof Block
+ or
+ // If the write happens inside a block, we restrict to the call that
+ // contains the block
+ scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock()
+ )
+ }
+
+ /**
+ * Holds if `v` is written at index `i` in basic block `bb`, and the corresponding
+ * AST write access is `write`.
+ */
+ cached
+ predicate variableWriteActual(BasicBlock bb, int i, LocalVariable v, VariableWriteAccess write) {
+ exists(AstNode n |
+ write.getVariable() = v and
+ n = bb.getNode(i).getNode()
+ |
+ write.isExplicitWrite(n)
+ or
+ write.isImplicitWrite() and
+ n = write
+ )
+ }
+
+ cached
+ VariableReadAccessCfgNode getARead(Definition def) {
+ exists(LocalVariable v, BasicBlock bb, int i |
+ ssaDefReachesRead(v, def, bb, i) and
+ variableReadActual(bb, i, v) and
+ result = bb.getNode(i)
+ )
+ }
+
+ /**
+ * Holds if there is flow for a captured variable from the enclosing scope into a block.
+ * ```rb
+ * foo = 0
+ * bar {
+ * puts foo
+ * }
+ * ```
+ */
+ cached
+ predicate captureFlowIn(Definition def, Definition entry) {
+ exists(LocalVariable v, BasicBlock bb, int i |
+ ssaDefReachesRead(v, def, bb, i) and
+ capturedCallRead(bb, i, v) and
+ exists(BasicBlock bb2, int i2 |
+ capturedEntryWrite(bb2, i2, v) and
+ entry.definesAt(v, bb2, i2)
+ )
+ )
+ }
+
+ /**
+ * Holds if there is outgoing flow for a captured variable that is updated in a block.
+ * ```rb
+ * foo = 0
+ * bar {
+ * foo += 10
+ * }
+ * puts foo
+ * ```
+ */
+ cached
+ predicate captureFlowOut(Definition def, Definition exit) {
+ exists(LocalVariable v, BasicBlock bb, int i |
+ ssaDefReachesRead(v, def, bb, i) and
+ capturedExitRead(bb, i, v) and
+ exists(BasicBlock bb2, int i2 |
+ capturedCallWrite(bb2, i2, v) and
+ exit.definesAt(v, bb2, i2)
+ )
+ )
+ }
+
+ cached
+ Definition phiHasInputFromBlock(PhiNode phi, BasicBlock bb) {
+ phiHasInputFromBlock(phi, result, bb)
+ }
+
+ /**
+ * Holds if the value defined at SSA definition `def` can reach a read at `read`,
+ * without passing through any other non-pseudo read.
+ */
+ cached
+ predicate firstRead(Definition def, VariableReadAccessCfgNode read) {
+ exists(BasicBlock bb1, int i1, BasicBlock bb2, int i2 |
+ def.definesAt(_, bb1, i1) and
+ adjacentDefNoUncertainReads(def, bb1, i1, bb2, i2) and
+ read = bb2.getNode(i2)
+ )
+ }
+
+ /**
+ * Holds if the read at `read2` is a read of the same SSA definition `def`
+ * as the read at `read1`, and `read2` can be reached from `read1` without
+ * passing through another non-pseudo read.
+ */
+ cached
+ predicate adjacentReadPair(
+ Definition def, VariableReadAccessCfgNode read1, VariableReadAccessCfgNode read2
+ ) {
+ exists(BasicBlock bb1, int i1, BasicBlock bb2, int i2 |
+ read1 = bb1.getNode(i1) and
+ variableReadActual(bb1, i1, _) and
+ adjacentDefNoUncertainReads(def, bb1, i1, bb2, i2) and
+ read2 = bb2.getNode(i2)
+ )
+ }
+
+ /**
+ * Holds if the read of `def` at `read` may be a last read. That is, `read`
+ * can either reach another definition of the underlying source variable or
+ * the end of the CFG scope, without passing through another non-pseudo read.
+ */
+ cached
+ predicate lastRead(Definition def, VariableReadAccessCfgNode read) {
+ exists(BasicBlock bb, int i |
+ lastRefNoUncertainReads(def, bb, i) and
+ variableReadActual(bb, i, _) and
+ read = bb.getNode(i)
+ )
+ }
+
+ /**
+ * Holds if the reference to `def` at index `i` in basic block `bb` can reach
+ * another definition `next` of the same underlying source variable, without
+ * passing through another write or non-pseudo read.
+ *
+ * The reference is either a read of `def` or `def` itself.
+ */
+ cached
+ predicate lastRefBeforeRedef(Definition def, BasicBlock bb, int i, Definition next) {
+ lastRefRedefNoUncertainReads(def, bb, i, next)
+ }
+
+ cached
+ Definition uncertainWriteDefinitionInput(UncertainWriteDefinition def) {
+ uncertainWriteDefinitionInput(def, result)
+ }
+}
+
+import Cached
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll
new file mode 100644
index 00000000000..884f4406d01
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplCommon.qll
@@ -0,0 +1,637 @@
+/**
+ * Provides a language-independent implementation of static single assignment
+ * (SSA) form.
+ */
+
+private import SsaImplSpecific
+
+private BasicBlock getABasicBlockPredecessor(BasicBlock bb) { getABasicBlockSuccessor(result) = bb }
+
+/**
+ * Liveness analysis (based on source variables) to restrict the size of the
+ * SSA representation.
+ */
+private module Liveness {
+ /**
+ * A classification of variable references into reads (of a given kind) and
+ * (certain or uncertain) writes.
+ */
+ private newtype TRefKind =
+ Read(boolean certain) { certain in [false, true] } or
+ Write(boolean certain) { certain in [false, true] }
+
+ private class RefKind extends TRefKind {
+ string toString() {
+ exists(boolean certain | this = Read(certain) and result = "read (" + certain + ")")
+ or
+ exists(boolean certain | this = Write(certain) and result = "write (" + certain + ")")
+ }
+
+ int getOrder() {
+ this = Read(_) and
+ result = 0
+ or
+ this = Write(_) and
+ result = 1
+ }
+ }
+
+ /**
+ * Holds if the `i`th node of basic block `bb` is a reference to `v` of kind `k`.
+ */
+ private predicate ref(BasicBlock bb, int i, SourceVariable v, RefKind k) {
+ exists(boolean certain | variableRead(bb, i, v, certain) | k = Read(certain))
+ or
+ exists(boolean certain | variableWrite(bb, i, v, certain) | k = Write(certain))
+ }
+
+ private newtype OrderedRefIndex =
+ MkOrderedRefIndex(int i, int tag) {
+ exists(RefKind rk | ref(_, i, _, rk) | tag = rk.getOrder())
+ }
+
+ private OrderedRefIndex refOrd(BasicBlock bb, int i, SourceVariable v, RefKind k, int ord) {
+ ref(bb, i, v, k) and
+ result = MkOrderedRefIndex(i, ord) and
+ ord = k.getOrder()
+ }
+
+ /**
+ * Gets the (1-based) rank of the reference to `v` at the `i`th node of
+ * basic block `bb`, which has the given reference kind `k`.
+ *
+ * Reads are considered before writes when they happen at the same index.
+ */
+ private int refRank(BasicBlock bb, int i, SourceVariable v, RefKind k) {
+ refOrd(bb, i, v, k, _) =
+ rank[result](int j, int ord, OrderedRefIndex res |
+ res = refOrd(bb, j, v, _, ord)
+ |
+ res order by j, ord
+ )
+ }
+
+ private int maxRefRank(BasicBlock bb, SourceVariable v) {
+ result = refRank(bb, _, v, _) and
+ not result + 1 = refRank(bb, _, v, _)
+ }
+
+ /**
+ * Gets the (1-based) rank of the first reference to `v` inside basic block `bb`
+ * that is either a read or a certain write.
+ */
+ private int firstReadOrCertainWrite(BasicBlock bb, SourceVariable v) {
+ result =
+ min(int r, RefKind k |
+ r = refRank(bb, _, v, k) and
+ k != Write(false)
+ |
+ r
+ )
+ }
+
+ /**
+ * Holds if source variable `v` is live at the beginning of basic block `bb`.
+ */
+ predicate liveAtEntry(BasicBlock bb, SourceVariable v) {
+ // The first read or certain write to `v` inside `bb` is a read
+ refRank(bb, _, v, Read(_)) = firstReadOrCertainWrite(bb, v)
+ or
+ // There is no certain write to `v` inside `bb`, but `v` is live at entry
+ // to a successor basic block of `bb`
+ not exists(firstReadOrCertainWrite(bb, v)) and
+ liveAtExit(bb, v)
+ }
+
+ /**
+ * Holds if source variable `v` is live at the end of basic block `bb`.
+ */
+ predicate liveAtExit(BasicBlock bb, SourceVariable v) {
+ liveAtEntry(getABasicBlockSuccessor(bb), v)
+ }
+
+ /**
+ * Holds if variable `v` is live in basic block `bb` at index `i`.
+ * The rank of `i` is `rnk` as defined by `refRank()`.
+ */
+ private predicate liveAtRank(BasicBlock bb, int i, SourceVariable v, int rnk) {
+ exists(RefKind kind | rnk = refRank(bb, i, v, kind) |
+ rnk = maxRefRank(bb, v) and
+ liveAtExit(bb, v)
+ or
+ ref(bb, i, v, kind) and
+ kind = Read(_)
+ or
+ exists(RefKind nextKind |
+ liveAtRank(bb, _, v, rnk + 1) and
+ rnk + 1 = refRank(bb, _, v, nextKind) and
+ nextKind != Write(true)
+ )
+ )
+ }
+
+ /**
+ * Holds if variable `v` is live after the (certain or uncertain) write at
+ * index `i` inside basic block `bb`.
+ */
+ predicate liveAfterWrite(BasicBlock bb, int i, SourceVariable v) {
+ exists(int rnk | rnk = refRank(bb, i, v, Write(_)) | liveAtRank(bb, i, v, rnk))
+ }
+}
+
+private import Liveness
+
+/** Holds if `bb1` strictly dominates `bb2`. */
+private predicate strictlyDominates(BasicBlock bb1, BasicBlock bb2) {
+ bb1 = getImmediateBasicBlockDominator+(bb2)
+}
+
+/** Holds if `bb1` dominates a predecessor of `bb2`. */
+private predicate dominatesPredecessor(BasicBlock bb1, BasicBlock bb2) {
+ exists(BasicBlock pred | pred = getABasicBlockPredecessor(bb2) |
+ bb1 = pred
+ or
+ strictlyDominates(bb1, pred)
+ )
+}
+
+/** Holds if `df` is in the dominance frontier of `bb`. */
+private predicate inDominanceFrontier(BasicBlock bb, BasicBlock df) {
+ dominatesPredecessor(bb, df) and
+ not strictlyDominates(bb, df)
+}
+
+/**
+ * Holds if `bb` is in the dominance frontier of a block containing a
+ * definition of `v`.
+ */
+pragma[noinline]
+private predicate inDefDominanceFrontier(BasicBlock bb, SourceVariable v) {
+ exists(BasicBlock defbb, Definition def |
+ def.definesAt(v, defbb, _) and
+ inDominanceFrontier(defbb, bb)
+ )
+}
+
+cached
+newtype TDefinition =
+ TWriteDef(SourceVariable v, BasicBlock bb, int i) {
+ variableWrite(bb, i, v, _) and
+ liveAfterWrite(bb, i, v)
+ } or
+ TPhiNode(SourceVariable v, BasicBlock bb) {
+ inDefDominanceFrontier(bb, v) and
+ liveAtEntry(bb, v)
+ }
+
+private module SsaDefReaches {
+ newtype TSsaRefKind =
+ SsaRead() or
+ SsaDef()
+
+ /**
+ * A classification of SSA variable references into reads and definitions.
+ */
+ class SsaRefKind extends TSsaRefKind {
+ string toString() {
+ this = SsaRead() and
+ result = "SsaRead"
+ or
+ this = SsaDef() and
+ result = "SsaDef"
+ }
+
+ int getOrder() {
+ this = SsaRead() and
+ result = 0
+ or
+ this = SsaDef() and
+ result = 1
+ }
+ }
+
+ /**
+ * Holds if the `i`th node of basic block `bb` is a reference to `v`,
+ * either a read (when `k` is `SsaRead()`) or an SSA definition (when `k`
+ * is `SsaDef()`).
+ *
+ * Unlike `Liveness::ref`, this includes `phi` nodes.
+ */
+ predicate ssaRef(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) {
+ variableRead(bb, i, v, _) and
+ k = SsaRead()
+ or
+ exists(Definition def | def.definesAt(v, bb, i)) and
+ k = SsaDef()
+ }
+
+ private newtype OrderedSsaRefIndex =
+ MkOrderedSsaRefIndex(int i, SsaRefKind k) { ssaRef(_, i, _, k) }
+
+ private OrderedSsaRefIndex ssaRefOrd(BasicBlock bb, int i, SourceVariable v, SsaRefKind k, int ord) {
+ ssaRef(bb, i, v, k) and
+ result = MkOrderedSsaRefIndex(i, k) and
+ ord = k.getOrder()
+ }
+
+ /**
+ * Gets the (1-based) rank of the reference to `v` at the `i`th node of basic
+ * block `bb`, which has the given reference kind `k`.
+ *
+ * For example, if `bb` is a basic block with a phi node for `v` (considered
+ * to be at index -1), reads `v` at node 2, and defines it at node 5, we have:
+ *
+ * ```ql
+ * ssaRefRank(bb, -1, v, SsaDef()) = 1 // phi node
+ * ssaRefRank(bb, 2, v, Read()) = 2 // read at node 2
+ * ssaRefRank(bb, 5, v, SsaDef()) = 3 // definition at node 5
+ * ```
+ *
+ * Reads are considered before writes when they happen at the same index.
+ */
+ int ssaRefRank(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) {
+ ssaRefOrd(bb, i, v, k, _) =
+ rank[result](int j, int ord, OrderedSsaRefIndex res |
+ res = ssaRefOrd(bb, j, v, _, ord)
+ |
+ res order by j, ord
+ )
+ }
+
+ int maxSsaRefRank(BasicBlock bb, SourceVariable v) {
+ result = ssaRefRank(bb, _, v, _) and
+ not result + 1 = ssaRefRank(bb, _, v, _)
+ }
+
+ /**
+ * Holds if the SSA definition `def` reaches rank index `rnk` in its own
+ * basic block `bb`.
+ */
+ predicate ssaDefReachesRank(BasicBlock bb, Definition def, int rnk, SourceVariable v) {
+ exists(int i |
+ rnk = ssaRefRank(bb, i, v, SsaDef()) and
+ def.definesAt(v, bb, i)
+ )
+ or
+ ssaDefReachesRank(bb, def, rnk - 1, v) and
+ rnk = ssaRefRank(bb, _, v, SsaRead())
+ }
+
+ /**
+ * Holds if the SSA definition of `v` at `def` reaches index `i` in the same
+ * basic block `bb`, without crossing another SSA definition of `v`.
+ */
+ predicate ssaDefReachesReadWithinBlock(SourceVariable v, Definition def, BasicBlock bb, int i) {
+ exists(int rnk |
+ ssaDefReachesRank(bb, def, rnk, v) and
+ rnk = ssaRefRank(bb, i, v, SsaRead())
+ )
+ }
+
+ /**
+ * Holds if the SSA definition of `v` at `def` reaches uncertain SSA definition
+ * `redef` in the same basic block, without crossing another SSA definition of `v`.
+ */
+ predicate ssaDefReachesUncertainDefWithinBlock(
+ SourceVariable v, Definition def, UncertainWriteDefinition redef
+ ) {
+ exists(BasicBlock bb, int rnk, int i |
+ ssaDefReachesRank(bb, def, rnk, v) and
+ rnk = ssaRefRank(bb, i, v, SsaDef()) - 1 and
+ redef.definesAt(v, bb, i)
+ )
+ }
+
+ /**
+ * Same as `ssaRefRank()`, but restricted to a particular SSA definition `def`.
+ */
+ int ssaDefRank(Definition def, SourceVariable v, BasicBlock bb, int i, SsaRefKind k) {
+ v = def.getSourceVariable() and
+ result = ssaRefRank(bb, i, v, k) and
+ (
+ ssaDefReachesRead(_, def, bb, i)
+ or
+ def.definesAt(_, bb, i)
+ )
+ }
+
+ /**
+ * Holds if the reference to `def` at index `i` in basic block `bb` is the
+ * last reference to `v` inside `bb`.
+ */
+ pragma[noinline]
+ predicate lastSsaRef(Definition def, SourceVariable v, BasicBlock bb, int i) {
+ ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v)
+ }
+
+ predicate defOccursInBlock(Definition def, BasicBlock bb, SourceVariable v) {
+ exists(ssaDefRank(def, v, bb, _, _))
+ }
+
+ pragma[noinline]
+ private predicate ssaDefReachesThroughBlock(Definition def, BasicBlock bb) {
+ ssaDefReachesEndOfBlock(bb, def, _) and
+ not defOccursInBlock(_, bb, def.getSourceVariable())
+ }
+
+ /**
+ * Holds if `def` is accessed in basic block `bb1` (either a read or a write),
+ * `bb2` is a transitive successor of `bb1`, `def` is live at the end of `bb1`,
+ * and the underlying variable for `def` is neither read nor written in any block
+ * on the path between `bb1` and `bb2`.
+ */
+ predicate varBlockReaches(Definition def, BasicBlock bb1, BasicBlock bb2) {
+ defOccursInBlock(def, bb1, _) and
+ bb2 = getABasicBlockSuccessor(bb1)
+ or
+ exists(BasicBlock mid |
+ varBlockReaches(def, bb1, mid) and
+ ssaDefReachesThroughBlock(def, mid) and
+ bb2 = getABasicBlockSuccessor(mid)
+ )
+ }
+
+ /**
+ * Holds if `def` is accessed in basic block `bb1` (either a read or a write),
+ * `def` is read at index `i2` in basic block `bb2`, `bb2` is in a transitive
+ * successor block of `bb1`, and `def` is neither read nor written in any block
+ * on a path between `bb1` and `bb2`.
+ */
+ predicate defAdjacentRead(Definition def, BasicBlock bb1, BasicBlock bb2, int i2) {
+ varBlockReaches(def, bb1, bb2) and
+ ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1
+ }
+}
+
+private import SsaDefReaches
+
+pragma[nomagic]
+predicate liveThrough(BasicBlock bb, SourceVariable v) {
+ liveAtExit(bb, v) and
+ not ssaRef(bb, _, v, SsaDef())
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Holds if the SSA definition of `v` at `def` reaches the end of basic
+ * block `bb`, at which point it is still live, without crossing another
+ * SSA definition of `v`.
+ */
+pragma[nomagic]
+predicate ssaDefReachesEndOfBlock(BasicBlock bb, Definition def, SourceVariable v) {
+ exists(int last | last = maxSsaRefRank(bb, v) |
+ ssaDefReachesRank(bb, def, last, v) and
+ liveAtExit(bb, v)
+ )
+ or
+ // The construction of SSA form ensures that each read of a variable is
+ // dominated by its definition. An SSA definition therefore reaches a
+ // control flow node if it is the _closest_ SSA definition that dominates
+ // the node. If two definitions dominate a node then one must dominate the
+ // other, so therefore the definition of _closest_ is given by the dominator
+ // tree. Thus, reaching definitions can be calculated in terms of dominance.
+ ssaDefReachesEndOfBlock(getImmediateBasicBlockDominator(bb), def, pragma[only_bind_into](v)) and
+ liveThrough(bb, pragma[only_bind_into](v))
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Holds if `inp` is an input to the phi node `phi` along the edge originating in `bb`.
+ */
+pragma[nomagic]
+predicate phiHasInputFromBlock(PhiNode phi, Definition inp, BasicBlock bb) {
+ exists(SourceVariable v, BasicBlock bbDef |
+ phi.definesAt(v, bbDef, _) and
+ getABasicBlockPredecessor(bbDef) = bb and
+ ssaDefReachesEndOfBlock(bb, inp, v)
+ )
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Holds if the SSA definition of `v` at `def` reaches a read at index `i` in
+ * basic block `bb`, without crossing another SSA definition of `v`. The read
+ * is of kind `rk`.
+ */
+pragma[nomagic]
+predicate ssaDefReachesRead(SourceVariable v, Definition def, BasicBlock bb, int i) {
+ ssaDefReachesReadWithinBlock(v, def, bb, i)
+ or
+ variableRead(bb, i, v, _) and
+ ssaDefReachesEndOfBlock(getABasicBlockPredecessor(bb), def, v) and
+ not ssaDefReachesReadWithinBlock(v, _, bb, i)
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Holds if `def` is accessed at index `i1` in basic block `bb1` (either a read
+ * or a write), `def` is read at index `i2` in basic block `bb2`, and there is a
+ * path between them without any read of `def`.
+ */
+pragma[nomagic]
+predicate adjacentDefRead(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) {
+ exists(int rnk |
+ rnk = ssaDefRank(def, _, bb1, i1, _) and
+ rnk + 1 = ssaDefRank(def, _, bb1, i2, SsaRead()) and
+ variableRead(bb1, i2, _, _) and
+ bb2 = bb1
+ )
+ or
+ lastSsaRef(def, _, bb1, i1) and
+ defAdjacentRead(def, bb1, bb2, i2)
+}
+
+pragma[noinline]
+private predicate adjacentDefRead(
+ Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2, SourceVariable v
+) {
+ adjacentDefRead(def, bb1, i1, bb2, i2) and
+ v = def.getSourceVariable()
+}
+
+private predicate adjacentDefReachesRead(
+ Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
+) {
+ exists(SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) |
+ ssaRef(bb1, i1, v, SsaDef())
+ or
+ variableRead(bb1, i1, v, true)
+ )
+ or
+ exists(BasicBlock bb3, int i3 |
+ adjacentDefReachesRead(def, bb1, i1, bb3, i3) and
+ variableRead(bb3, i3, _, false) and
+ adjacentDefRead(def, bb3, i3, bb2, i2)
+ )
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Same as `adjacentDefRead`, but ignores uncertain reads.
+ */
+pragma[nomagic]
+predicate adjacentDefNoUncertainReads(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) {
+ adjacentDefReachesRead(def, bb1, i1, bb2, i2) and
+ variableRead(bb2, i2, _, true)
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Holds if the node at index `i` in `bb` is a last reference to SSA definition
+ * `def`. The reference is last because it can reach another write `next`,
+ * without passing through another read or write.
+ */
+pragma[nomagic]
+predicate lastRefRedef(Definition def, BasicBlock bb, int i, Definition next) {
+ exists(SourceVariable v |
+ // Next reference to `v` inside `bb` is a write
+ exists(int rnk, int j |
+ rnk = ssaDefRank(def, v, bb, i, _) and
+ next.definesAt(v, bb, j) and
+ rnk + 1 = ssaRefRank(bb, j, v, SsaDef())
+ )
+ or
+ // Can reach a write using one or more steps
+ lastSsaRef(def, v, bb, i) and
+ exists(BasicBlock bb2 |
+ varBlockReaches(def, bb, bb2) and
+ 1 = ssaDefRank(next, v, bb2, _, SsaDef())
+ )
+ )
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Holds if `inp` is an immediately preceding definition of uncertain definition
+ * `def`. Since `def` is uncertain, the value from the preceding definition might
+ * still be valid.
+ */
+pragma[nomagic]
+predicate uncertainWriteDefinitionInput(UncertainWriteDefinition def, Definition inp) {
+ lastRefRedef(inp, _, _, def)
+}
+
+private predicate adjacentDefReachesUncertainRead(
+ Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
+) {
+ adjacentDefReachesRead(def, bb1, i1, bb2, i2) and
+ variableRead(bb2, i2, _, false)
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Same as `lastRefRedef`, but ignores uncertain reads.
+ */
+pragma[nomagic]
+predicate lastRefRedefNoUncertainReads(Definition def, BasicBlock bb, int i, Definition next) {
+ lastRefRedef(def, bb, i, next) and
+ not variableRead(bb, i, def.getSourceVariable(), false)
+ or
+ exists(BasicBlock bb0, int i0 |
+ lastRefRedef(def, bb0, i0, next) and
+ adjacentDefReachesUncertainRead(def, bb, i, bb0, i0)
+ )
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Holds if the node at index `i` in `bb` is a last reference to SSA
+ * definition `def`.
+ *
+ * That is, the node can reach the end of the enclosing callable, or another
+ * SSA definition for the underlying source variable, without passing through
+ * another read.
+ */
+pragma[nomagic]
+predicate lastRef(Definition def, BasicBlock bb, int i) {
+ lastRefRedef(def, bb, i, _)
+ or
+ lastSsaRef(def, _, bb, i) and
+ (
+ // Can reach exit directly
+ bb instanceof ExitBasicBlock
+ or
+ // Can reach a block using one or more steps, where `def` is no longer live
+ exists(BasicBlock bb2 | varBlockReaches(def, bb, bb2) |
+ not defOccursInBlock(def, bb2, _) and
+ not ssaDefReachesEndOfBlock(bb2, def, _)
+ )
+ )
+}
+
+/**
+ * NB: If this predicate is exposed, it should be cached.
+ *
+ * Same as `lastRefRedef`, but ignores uncertain reads.
+ */
+pragma[nomagic]
+predicate lastRefNoUncertainReads(Definition def, BasicBlock bb, int i) {
+ lastRef(def, bb, i) and
+ not variableRead(bb, i, def.getSourceVariable(), false)
+ or
+ exists(BasicBlock bb0, int i0 |
+ lastRef(def, bb0, i0) and
+ adjacentDefReachesUncertainRead(def, bb, i, bb0, i0)
+ )
+}
+
+/** A static single assignment (SSA) definition. */
+class Definition extends TDefinition {
+ /** Gets the source variable underlying this SSA definition. */
+ SourceVariable getSourceVariable() { this.definesAt(result, _, _) }
+
+ /**
+ * Holds if this SSA definition defines `v` at index `i` in basic block `bb`.
+ * Phi nodes are considered to be at index `-1`, while normal variable writes
+ * are at the index of the control flow node they wrap.
+ */
+ final predicate definesAt(SourceVariable v, BasicBlock bb, int i) {
+ this = TWriteDef(v, bb, i)
+ or
+ this = TPhiNode(v, bb) and i = -1
+ }
+
+ /** Gets the basic block to which this SSA definition belongs. */
+ final BasicBlock getBasicBlock() { this.definesAt(_, result, _) }
+
+ /** Gets a textual representation of this SSA definition. */
+ string toString() { none() }
+}
+
+/** An SSA definition that corresponds to a write. */
+class WriteDefinition extends Definition, TWriteDef {
+ private SourceVariable v;
+ private BasicBlock bb;
+ private int i;
+
+ WriteDefinition() { this = TWriteDef(v, bb, i) }
+
+ override string toString() { result = "WriteDef" }
+}
+
+/** A phi node. */
+class PhiNode extends Definition, TPhiNode {
+ override string toString() { result = "Phi" }
+}
+
+/**
+ * An SSA definition that represents an uncertain update of the underlying
+ * source variable.
+ */
+class UncertainWriteDefinition extends WriteDefinition {
+ UncertainWriteDefinition() {
+ exists(SourceVariable v, BasicBlock bb, int i |
+ this.definesAt(v, bb, i) and
+ variableWrite(bb, i, v, false)
+ )
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplSpecific.qll
new file mode 100644
index 00000000000..76646f17e8d
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/SsaImplSpecific.qll
@@ -0,0 +1,34 @@
+/** Provides the Ruby specific parameters for `SsaImplCommon.qll`. */
+
+private import SsaImpl as SsaImpl
+private import codeql.ruby.AST
+private import codeql.ruby.ast.Parameter
+private import codeql.ruby.ast.Variable
+private import codeql.ruby.controlflow.BasicBlocks as BasicBlocks
+private import codeql.ruby.controlflow.ControlFlowGraph
+
+class BasicBlock = BasicBlocks::BasicBlock;
+
+BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result = bb.getImmediateDominator() }
+
+BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
+
+class ExitBasicBlock = BasicBlocks::ExitBasicBlock;
+
+class SourceVariable = LocalVariable;
+
+predicate variableWrite(BasicBlock bb, int i, SourceVariable v, boolean certain) {
+ (
+ SsaImpl::uninitializedWrite(bb, i, v)
+ or
+ SsaImpl::capturedEntryWrite(bb, i, v)
+ or
+ SsaImpl::variableWriteActual(bb, i, v, _)
+ ) and
+ certain = true
+ or
+ SsaImpl::capturedCallWrite(bb, i, v) and
+ certain = false
+}
+
+predicate variableRead = SsaImpl::variableRead/4;
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll
new file mode 100755
index 00000000000..86c8ffb7f50
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll
@@ -0,0 +1,41 @@
+private import ruby
+private import TaintTrackingPublic
+private import codeql.ruby.CFG
+private import codeql.ruby.DataFlow
+private import FlowSummaryImpl as FlowSummaryImpl
+
+/**
+ * Holds if `node` should be a sanitizer in all global taint flow configurations
+ * but not in local taint.
+ */
+predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
+
+/**
+ * Holds if default `TaintTracking::Configuration`s should allow implicit reads
+ * of `c` at sinks and inputs to additional taint steps.
+ */
+bindingset[node]
+predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
+
+/**
+ * Holds if the additional step from `nodeFrom` to `nodeTo` should be included
+ * in all global taint flow configurations.
+ */
+cached
+predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ // operation involving `nodeFrom`
+ exists(CfgNodes::ExprNodes::OperationCfgNode op |
+ op = nodeTo.asExpr() and
+ op.getAnOperand() = nodeFrom.asExpr() and
+ not op.getExpr() instanceof AssignExpr
+ )
+ or
+ // string interpolation of `nodeFrom` into `nodeTo`
+ nodeFrom.asExpr() =
+ nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
+ or
+ // element reference from nodeFrom
+ nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ElementReferenceCfgNode).getReceiver()
+ or
+ FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPublic.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPublic.qll
new file mode 100755
index 00000000000..3fe5659bdc7
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPublic.qll
@@ -0,0 +1,31 @@
+private import ruby
+private import TaintTrackingPrivate
+private import codeql.ruby.CFG
+private import codeql.ruby.DataFlow
+private import FlowSummaryImpl as FlowSummaryImpl
+
+/**
+ * Holds if taint propagates from `source` to `sink` in zero or more local
+ * (intra-procedural) steps.
+ */
+predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
+
+/**
+ * Holds if taint can flow from `e1` to `e2` in zero or more
+ * local (intra-procedural) steps.
+ */
+predicate localExprTaint(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
+ localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
+}
+
+/**
+ * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
+ * (intra-procedural) step.
+ */
+predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ defaultAdditionalTaintStep(nodeFrom, nodeTo)
+ or
+ // Simple flow through library code is included in the exposed local
+ // step relation, even though flow is technically inter-procedural
+ FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, false)
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
new file mode 100644
index 00000000000..f4f73b8247c
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
@@ -0,0 +1,120 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ * // Override `isSource` and `isSink`.
+ * // Optionally override `isSanitizer`.
+ * // Optionally override `isSanitizerIn`.
+ * // Optionally override `isSanitizerOut`.
+ * // Optionally override `isSanitizerGuard`.
+ * // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+ bindingset[this]
+ Configuration() { any() }
+
+ /**
+ * Holds if `source` is a relevant taint source.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSource(DataFlow::Node source);
+
+ /**
+ * Holds if `sink` is a relevant taint sink.
+ *
+ * The smaller this predicate is, the faster `hasFlow()` will converge.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ abstract override predicate isSink(DataFlow::Node sink);
+
+ /** Holds if the node `node` is a taint sanitizer. */
+ predicate isSanitizer(DataFlow::Node node) { none() }
+
+ final override predicate isBarrier(DataFlow::Node node) {
+ isSanitizer(node) or
+ defaultTaintSanitizer(node)
+ }
+
+ /** Holds if taint propagation into `node` is prohibited. */
+ predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+ /** Holds if taint propagation out of `node` is prohibited. */
+ predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+ final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+ /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+ predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+ final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+ /**
+ * Holds if the additional taint propagation step from `node1` to `node2`
+ * must be taken into account in the analysis.
+ */
+ predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+ final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ isAdditionalTaintStep(node1, node2) or
+ defaultAdditionalTaintStep(node1, node2)
+ }
+
+ override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
+ (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
+ defaultImplicitTaintRead(node, c)
+ }
+
+ /**
+ * Holds if taint may flow from `source` to `sink` for this configuration.
+ */
+ // overridden to provide taint-tracking specific qldoc
+ override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+ super.hasFlow(source, sink)
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingParameter.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingParameter.qll
new file mode 100644
index 00000000000..ce6f5ed1c48
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingParameter.qll
@@ -0,0 +1,6 @@
+import codeql.ruby.dataflow.internal.TaintTrackingPublic as Public
+
+module Private {
+ import codeql.ruby.DataFlow::DataFlow as DataFlow
+ import codeql.ruby.dataflow.internal.TaintTrackingPrivate
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/filters/GeneratedCode.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/filters/GeneratedCode.qll
new file mode 100644
index 00000000000..18d12be3aac
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/filters/GeneratedCode.qll
@@ -0,0 +1,43 @@
+/** Provides classes for detecting generated code. */
+
+private import ruby
+private import codeql.ruby.ast.internal.TreeSitter
+
+/** A source file that contains generated code. */
+abstract class GeneratedCodeFile extends RubyFile { }
+
+/** A file contining comments suggesting it contains generated code. */
+class GeneratedCommentFile extends GeneratedCodeFile {
+ GeneratedCommentFile() { this = any(GeneratedCodeComment c).getLocation().getFile() }
+}
+
+/** A comment line that indicates generated code. */
+abstract class GeneratedCodeComment extends Ruby::Comment { }
+
+/**
+ * A generic comment line that suggests that the file is generated.
+ */
+class GenericGeneratedCodeComment extends GeneratedCodeComment {
+ GenericGeneratedCodeComment() {
+ exists(string line, string entity, string was, string automatically | line = getValue() |
+ entity = "file|class|art[ei]fact|module|script" and
+ was = "was|is|has been" and
+ automatically = "automatically |mechanically |auto[- ]?" and
+ line.regexpMatch("(?i).*\\bThis (" + entity + ") (" + was + ") (" + automatically +
+ ")?generated\\b.*")
+ )
+ }
+}
+
+/** A comment warning against modifications. */
+class DontModifyMarkerComment extends GeneratedCodeComment {
+ DontModifyMarkerComment() {
+ exists(string line | line = getValue() |
+ line.regexpMatch("(?i).*\\bGenerated by\\b.*\\bDo not edit\\b.*") or
+ line.regexpMatch("(?i).*\\bAny modifications to this file will be lost\\b.*")
+ )
+ }
+}
+
+/** Holds if `file` looks like it contains generated code. */
+predicate isGeneratedCode(GeneratedCodeFile file) { any() }
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll
new file mode 100644
index 00000000000..0eec1e15f58
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll
@@ -0,0 +1,259 @@
+private import codeql.ruby.AST
+private import codeql.ruby.Concepts
+private import codeql.ruby.controlflow.CfgNodes
+private import codeql.ruby.DataFlow
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.ast.internal.Module
+private import ActionView
+
+private class ActionControllerBaseAccess extends ConstantReadAccess {
+ ActionControllerBaseAccess() {
+ this.getName() = "Base" and
+ this.getScopeExpr().(ConstantAccess).getName() = "ActionController"
+ }
+}
+
+// ApplicationController extends ActionController::Base, but we
+// treat it separately in case the ApplicationController definition
+// is not in the database
+private class ApplicationControllerAccess extends ConstantReadAccess {
+ ApplicationControllerAccess() { this.getName() = "ApplicationController" }
+}
+
+/**
+ * A `ClassDeclaration` for a class that extends `ActionController::Base`.
+ * For example,
+ *
+ * ```rb
+ * class FooController < ActionController::Base
+ * def delete_handler
+ * uid = params[:id]
+ * User.delete_by("id = ?", uid)
+ * end
+ * end
+ * ```
+ */
+class ActionControllerControllerClass extends ClassDeclaration {
+ ActionControllerControllerClass() {
+ // class FooController < ActionController::Base
+ this.getSuperclassExpr() instanceof ActionControllerBaseAccess
+ or
+ // class FooController < ApplicationController
+ this.getSuperclassExpr() instanceof ApplicationControllerAccess
+ or
+ // class BarController < FooController
+ exists(ActionControllerControllerClass other |
+ other.getModule() = resolveScopeExpr(this.getSuperclassExpr())
+ )
+ }
+
+ /**
+ * Gets a `ActionControllerActionMethod` defined in this class.
+ */
+ ActionControllerActionMethod getAnAction() { result = this.getAMethod() }
+}
+
+/**
+ * An instance method defined within an `ActionController` controller class.
+ * This may be the target of a route handler, if such a route is defined.
+ */
+class ActionControllerActionMethod extends Method, HTTP::Server::RequestHandler::Range {
+ private ActionControllerControllerClass controllerClass;
+
+ ActionControllerActionMethod() { this = controllerClass.getAMethod() }
+
+ /**
+ * Establishes a mapping between a method within the file
+ * `app/controllers/_controller.rb` and the
+ * corresponding template file at
+ * `app/views//.html.erb`.
+ */
+ ErbFile getDefaultTemplateFile() {
+ controllerTemplateFile(this.getControllerClass(), result) and
+ result.getBaseName() = this.getName() + ".html.erb"
+ }
+
+ // params come from `params` method rather than a method parameter
+ override Parameter getARoutedParameter() { none() }
+
+ override string getFramework() { result = "ActionController" }
+
+ /** Gets a call to render from within this method. */
+ RenderCall getARenderCall() { result.getParent+() = this }
+
+ // TODO: model the implicit render call when a path through the method does
+ // not end at an explicit render or redirect
+ /** Gets the controller class containing this method. */
+ ActionControllerControllerClass getControllerClass() { result = controllerClass }
+}
+
+// A method call with a `self` receiver from within a controller class
+private class ActionControllerContextCall extends MethodCall {
+ private ActionControllerControllerClass controllerClass;
+
+ ActionControllerContextCall() {
+ this.getReceiver() instanceof Self and
+ this.getEnclosingModule() = controllerClass
+ }
+
+ ActionControllerControllerClass getControllerClass() { result = controllerClass }
+}
+
+/**
+ * A call to the `params` method to fetch the request parameters.
+ */
+abstract class ParamsCall extends MethodCall {
+ ParamsCall() { this.getMethodName() = "params" }
+}
+
+/**
+ * A `RemoteFlowSource::Range` to represent accessing the
+ * ActionController parameters available via the `params` method.
+ */
+class ParamsSource extends RemoteFlowSource::Range {
+ ParamsCall call;
+
+ ParamsSource() { this.asExpr().getExpr() = call }
+
+ override string getSourceType() { result = "ActionController::Metal#params" }
+}
+
+// A call to `params` from within a controller.
+private class ActionControllerParamsCall extends ActionControllerContextCall, ParamsCall { }
+
+// A call to `render` from within a controller.
+private class ActionControllerRenderCall extends ActionControllerContextCall, RenderCall { }
+
+// A call to `render_to` from within a controller.
+private class ActionControllerRenderToCall extends ActionControllerContextCall, RenderToCall { }
+
+// A call to `html_safe` from within a controller.
+private class ActionControllerHtmlSafeCall extends HtmlSafeCall {
+ ActionControllerHtmlSafeCall() {
+ this.getEnclosingModule() instanceof ActionControllerControllerClass
+ }
+}
+
+// A call to `html_escape` from within a controller.
+private class ActionControllerHtmlEscapeCall extends HtmlEscapeCall {
+ ActionControllerHtmlEscapeCall() {
+ this.getEnclosingModule() instanceof ActionControllerControllerClass
+ }
+}
+
+/**
+ * A call to the `redirect_to` method, used in an action to redirect to a
+ * specific URL/path or to a different action in this controller.
+ */
+class RedirectToCall extends ActionControllerContextCall {
+ RedirectToCall() { this.getMethodName() = "redirect_to" }
+
+ /** Gets the `Expr` representing the URL to redirect to, if any */
+ Expr getRedirectUrl() { result = this.getArgument(0) }
+
+ /** Gets the `ActionControllerActionMethod` to redirect to, if any */
+ ActionControllerActionMethod getRedirectActionMethod() {
+ exists(string methodName |
+ methodName = this.getKeywordArgument("action").(StringlikeLiteral).getValueText() and
+ methodName = result.getName() and
+ result.getEnclosingModule() = this.getControllerClass()
+ )
+ }
+}
+
+/**
+ * A call to the `redirect_to` method, as an `HttpRedirectResponse`.
+ */
+class ActionControllerRedirectResponse extends HTTP::Server::HttpRedirectResponse::Range {
+ RedirectToCall redirectToCall;
+
+ ActionControllerRedirectResponse() { this.asExpr().getExpr() = redirectToCall }
+
+ override DataFlow::Node getBody() { none() }
+
+ override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
+
+ override string getMimetypeDefault() { none() }
+
+ override DataFlow::Node getRedirectLocation() {
+ result.asExpr().getExpr() = redirectToCall.getRedirectUrl()
+ }
+}
+
+/**
+ * A method in an `ActionController` class that is accessible from within a
+ * Rails view as a helper method. For instance, in:
+ *
+ * ```rb
+ * class FooController < ActionController::Base
+ * helper_method :logged_in?
+ * def logged_in?
+ * @current_user != nil
+ * end
+ * end
+ * ```
+ *
+ * the `logged_in?` method is a helper method.
+ * See also https://api.rubyonrails.org/classes/AbstractController/Helpers/ClassMethods.html#method-i-helper_method
+ */
+class ActionControllerHelperMethod extends Method {
+ private ActionControllerControllerClass controllerClass;
+
+ ActionControllerHelperMethod() {
+ this.getEnclosingModule() = controllerClass and
+ exists(MethodCall helperMethodMarker |
+ helperMethodMarker.getMethodName() = "helper_method" and
+ helperMethodMarker.getAnArgument().(StringlikeLiteral).getValueText() = this.getName() and
+ helperMethodMarker.getEnclosingModule() = controllerClass
+ )
+ }
+
+ /** Gets the class containing this helper method. */
+ ActionControllerControllerClass getControllerClass() { result = controllerClass }
+}
+
+/**
+ * Gets an `ActionControllerControllerClass` associated with the given `ErbFile`
+ * according to Rails path conventions.
+ * For instance, a template file at `app/views/foo/bar/baz.html.erb` will be
+ * mapped to a controller class in `app/controllers/foo/bar/baz_controller.rb`,
+ * if such a controller class exists.
+ */
+ActionControllerControllerClass getAssociatedControllerClass(ErbFile f) {
+ // There is a direct mapping from template file to controller class
+ controllerTemplateFile(result, f)
+ or
+ // The template `f` is a partial, and it is rendered from within another
+ // template file, `fp`. In this case, `f` inherits the associated
+ // controller classes from `fp`.
+ f.isPartial() and
+ exists(RenderCall r, ErbFile fp |
+ r.getLocation().getFile() = fp and
+ r.getTemplateFile() = f and
+ result = getAssociatedControllerClass(fp)
+ )
+}
+
+// TODO: improve layout support, e.g. for `layout` method
+// https://guides.rubyonrails.org/layouts_and_rendering.html
+/**
+ * Holds if `templatesFile` is a viable file "belonging" to the given
+ * `ActionControllerControllerClass`, according to Rails conventions.
+ *
+ * This handles mappings between controllers in `app/controllers/`, and
+ * templates in `app/views/` and `app/views/layouts/`.
+ */
+predicate controllerTemplateFile(ActionControllerControllerClass cls, ErbFile templateFile) {
+ exists(string templatesPath, string sourcePrefix, string subPath, string controllerPath |
+ controllerPath = cls.getLocation().getFile().getRelativePath() and
+ templatesPath = templateFile.getParentContainer().getRelativePath() and
+ // `sourcePrefix` is either a prefix path ending in a slash, or empty if
+ // the rails app is at the source root
+ sourcePrefix = [controllerPath.regexpCapture("^(.*/)app/controllers/(?:.*?)/(?:[^/]*)$", 1), ""] and
+ controllerPath = sourcePrefix + "app/controllers/" + subPath + "_controller.rb" and
+ (
+ templatesPath = sourcePrefix + "app/views/" + subPath or
+ templateFile.getRelativePath().matches(sourcePrefix + "app/views/layouts/" + subPath + "%")
+ )
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionView.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionView.qll
new file mode 100644
index 00000000000..55638ab6584
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActionView.qll
@@ -0,0 +1,138 @@
+private import codeql.ruby.AST
+private import codeql.ruby.Concepts
+private import codeql.ruby.controlflow.CfgNodes
+private import codeql.ruby.DataFlow
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.ast.internal.Module
+private import ActionController
+
+predicate inActionViewContext(AstNode n) {
+ // Within a template
+ n.getLocation().getFile() instanceof ErbFile
+}
+
+/**
+ * A method call on a string to mark it as HTML safe for Rails.
+ * Strings marked as such will not be automatically escaped when inserted into
+ * HTML.
+ */
+abstract class HtmlSafeCall extends MethodCall {
+ HtmlSafeCall() { this.getMethodName() = "html_safe" }
+}
+
+// A call to `html_safe` from within a template.
+private class ActionViewHtmlSafeCall extends HtmlSafeCall {
+ ActionViewHtmlSafeCall() { inActionViewContext(this) }
+}
+
+/**
+ * A call to a method named "html_escape", "html_escape_once", or "h".
+ */
+abstract class HtmlEscapeCall extends MethodCall {
+ // "h" is aliased to "html_escape" in ActiveSupport
+ HtmlEscapeCall() { this.getMethodName() = ["html_escape", "html_escape_once", "h"] }
+}
+
+class RailsHtmlEscaping extends Escaping::Range, DataFlow::CallNode {
+ RailsHtmlEscaping() { this.asExpr().getExpr() instanceof HtmlEscapeCall }
+
+ override DataFlow::Node getAnInput() { result = this.getArgument(0) }
+
+ override DataFlow::Node getOutput() { result = this }
+
+ override string getKind() { result = Escaping::getHtmlKind() }
+}
+
+// A call to `html_escape` from within a template.
+private class ActionViewHtmlEscapeCall extends HtmlEscapeCall {
+ ActionViewHtmlEscapeCall() { inActionViewContext(this) }
+}
+
+// A call in a context where some commonly used `ActionView` methods are available.
+private class ActionViewContextCall extends MethodCall {
+ ActionViewContextCall() {
+ this.getReceiver() instanceof Self and
+ inActionViewContext(this)
+ }
+
+ predicate isInErbFile() { this.getLocation().getFile() instanceof ErbFile }
+}
+
+/** A call to the `raw` method to output a value without HTML escaping. */
+class RawCall extends ActionViewContextCall {
+ RawCall() { this.getMethodName() = "raw" }
+}
+
+// A call to the `params` method within the context of a template.
+private class ActionViewParamsCall extends ActionViewContextCall, ParamsCall { }
+
+/**
+ * A call to a `render` method that will populate the response body with the
+ * rendered content.
+ */
+abstract class RenderCall extends MethodCall {
+ RenderCall() { this.getMethodName() = "render" }
+
+ private Expr getTemplatePathArgument() {
+ // TODO: support other ways of specifying paths (e.g. `file`)
+ result = [this.getKeywordArgument(["partial", "template", "action"]), this.getArgument(0)]
+ }
+
+ private string getTemplatePathValue() { result = this.getTemplatePathArgument().getValueText() }
+
+ // everything up to and including the final slash, but ignoring any leading slash
+ private string getSubPath() {
+ result = this.getTemplatePathValue().regexpCapture("^/?(.*/)?(?:[^/]*?)$", 1)
+ }
+
+ // everything after the final slash, or the whole string if there is no slash
+ private string getBaseName() {
+ result = this.getTemplatePathValue().regexpCapture("^/?(?:.*/)?([^/]*?)$", 1)
+ }
+
+ /**
+ * Gets the template file to be rendered by this call, if any.
+ */
+ ErbFile getTemplateFile() {
+ result.getTemplateName() = this.getBaseName() and
+ result.getRelativePath().matches("%app/views/" + this.getSubPath() + "%")
+ }
+
+ /**
+ * Get the local variables passed as context to the renderer
+ */
+ HashLiteral getLocals() { result = this.getKeywordArgument("locals") }
+ // TODO: implicit renders in controller actions
+}
+
+// A call to the `render` method within the context of a template.
+private class ActionViewRenderCall extends RenderCall, ActionViewContextCall { }
+
+/**
+ * A render call that does not automatically set the HTTP response body.
+ */
+abstract class RenderToCall extends MethodCall {
+ RenderToCall() { this.getMethodName() = ["render_to_body", "render_to_string"] }
+}
+
+// A call to `render_to` from within a template.
+private class ActionViewRenderToCall extends ActionViewContextCall, RenderToCall { }
+
+/**
+ * A call to the ActionView `link_to` helper method.
+ *
+ * This generates an HTML anchor tag. The method is not designed to expect
+ * user-input, so provided paths are not automatically HTML escaped.
+ */
+class LinkToCall extends ActionViewContextCall {
+ LinkToCall() { this.getMethodName() = "link_to" }
+
+ Expr getPathArgument() {
+ // When `link_to` is called with a block, it uses the first argument as the
+ // path, and otherwise the second argument.
+ exists(this.getBlock()) and result = this.getArgument(0)
+ or
+ not exists(this.getBlock()) and result = this.getArgument(1)
+ }
+}
+// TODO: model flow in/out of template files properly,
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll
new file mode 100644
index 00000000000..2a13b51acfb
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll
@@ -0,0 +1,319 @@
+private import codeql.ruby.AST
+private import codeql.ruby.Concepts
+private import codeql.ruby.controlflow.CfgNodes
+private import codeql.ruby.DataFlow
+private import codeql.ruby.dataflow.internal.DataFlowDispatch
+private import codeql.ruby.ast.internal.Module
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.frameworks.StandardLibrary
+
+private class ActiveRecordBaseAccess extends ConstantReadAccess {
+ ActiveRecordBaseAccess() {
+ this.getName() = "Base" and
+ this.getScopeExpr().(ConstantAccess).getName() = "ActiveRecord"
+ }
+}
+
+// ApplicationRecord extends ActiveRecord::Base, but we
+// treat it separately in case the ApplicationRecord definition
+// is not in the database
+private class ApplicationRecordAccess extends ConstantReadAccess {
+ ApplicationRecordAccess() { this.getName() = "ApplicationRecord" }
+}
+
+/// See https://api.rubyonrails.org/classes/ActiveRecord/Persistence.html
+private string activeRecordPersistenceInstanceMethodName() {
+ result =
+ [
+ "becomes", "becomes!", "decrement", "decrement!", "delete", "delete!", "destroy", "destroy!",
+ "destroyed?", "increment", "increment!", "new_record?", "persisted?",
+ "previously_new_record?", "reload", "save", "save!", "toggle", "toggle!", "touch", "update",
+ "update!", "update_attribute", "update_column", "update_columns"
+ ]
+}
+
+// Methods with these names are defined for all active record model instances,
+// so they are unlikely to refer to a database field.
+private predicate isBuiltInMethodForActiveRecordModelInstance(string methodName) {
+ methodName = activeRecordPersistenceInstanceMethodName() or
+ methodName = basicObjectInstanceMethodName() or
+ methodName = objectInstanceMethodName()
+}
+
+/**
+ * A `ClassDeclaration` for a class that extends `ActiveRecord::Base`. For example,
+ *
+ * ```rb
+ * class UserGroup < ActiveRecord::Base
+ * has_many :users
+ * end
+ * ```
+ */
+class ActiveRecordModelClass extends ClassDeclaration {
+ ActiveRecordModelClass() {
+ // class Foo < ActiveRecord::Base
+ this.getSuperclassExpr() instanceof ActiveRecordBaseAccess
+ or
+ // class Foo < ApplicationRecord
+ this.getSuperclassExpr() instanceof ApplicationRecordAccess
+ or
+ // class Bar < Foo
+ exists(ActiveRecordModelClass other |
+ other.getModule() = resolveScopeExpr(this.getSuperclassExpr())
+ )
+ }
+
+ // Gets the class declaration for this class and all of its super classes
+ private ModuleBase getAllClassDeclarations() {
+ result = this.getModule().getSuperClass*().getADeclaration()
+ }
+
+ /**
+ * Gets methods defined in this class that may access a field from the database.
+ */
+ Method getAPotentialFieldAccessMethod() {
+ // It's a method on this class or one of its super classes
+ result = this.getAllClassDeclarations().getAMethod() and
+ // There is a value that can be returned by this method which may include field data
+ exists(DataFlow::Node returned, ActiveRecordInstanceMethodCall cNode, MethodCall c |
+ exprNodeReturnedFrom(returned, result) and
+ cNode.flowsTo(returned) and
+ c = cNode.asExpr().getExpr()
+ |
+ // The referenced method is not built-in, and...
+ not isBuiltInMethodForActiveRecordModelInstance(c.getMethodName()) and
+ (
+ // ...The receiver does not have a matching method definition, or...
+ not exists(
+ cNode.getInstance().getClass().getAllClassDeclarations().getMethod(c.getMethodName())
+ )
+ or
+ // ...the called method can access a field
+ c.getATarget() = cNode.getInstance().getClass().getAPotentialFieldAccessMethod()
+ )
+ )
+ }
+}
+
+/** A class method call whose receiver is an `ActiveRecordModelClass`. */
+class ActiveRecordModelClassMethodCall extends MethodCall {
+ private ActiveRecordModelClass recvCls;
+
+ ActiveRecordModelClassMethodCall() {
+ // e.g. Foo.where(...)
+ recvCls.getModule() = resolveScopeExpr(this.getReceiver())
+ or
+ // e.g. Foo.joins(:bars).where(...)
+ recvCls = this.getReceiver().(ActiveRecordModelClassMethodCall).getReceiverClass()
+ or
+ // e.g. self.where(...) within an ActiveRecordModelClass
+ this.getReceiver() instanceof Self and
+ this.getEnclosingModule() = recvCls
+ }
+
+ /** The `ActiveRecordModelClass` of the receiver of this method. */
+ ActiveRecordModelClass getReceiverClass() { result = recvCls }
+}
+
+private Expr sqlFragmentArgument(MethodCall call) {
+ exists(string methodName |
+ methodName = call.getMethodName() and
+ (
+ methodName =
+ [
+ "delete_all", "delete_by", "destroy_all", "destroy_by", "exists?", "find_by", "find_by!",
+ "find_or_create_by", "find_or_create_by!", "find_or_initialize_by", "find_by_sql", "from",
+ "group", "having", "joins", "lock", "not", "order", "pluck", "where", "rewhere", "select",
+ "reselect", "update_all"
+ ] and
+ result = call.getArgument(0)
+ or
+ methodName = "calculate" and result = call.getArgument(1)
+ or
+ methodName in ["average", "count", "maximum", "minimum", "sum"] and
+ result = call.getArgument(0)
+ or
+ // This format was supported until Rails 2.3.8
+ methodName = ["all", "find", "first", "last"] and
+ result = call.getKeywordArgument("conditions")
+ or
+ methodName = "reload" and
+ result = call.getKeywordArgument("lock")
+ )
+ )
+}
+
+// An expression that, if tainted by unsanitized input, should not be used as
+// part of an argument to an SQL executing method
+private predicate unsafeSqlExpr(Expr sqlFragmentExpr) {
+ // Literals containing an interpolated value
+ exists(StringInterpolationComponent interpolated |
+ interpolated = sqlFragmentExpr.(StringlikeLiteral).getComponent(_)
+ )
+ or
+ // String concatenations
+ sqlFragmentExpr instanceof AddExpr
+ or
+ // Variable reads
+ sqlFragmentExpr instanceof VariableReadAccess
+ or
+ // Method call
+ sqlFragmentExpr instanceof MethodCall
+}
+
+/**
+ * A method call that may result in executing unintended user-controlled SQL
+ * queries if the `getSqlFragmentSinkArgument()` expression is tainted by
+ * unsanitized user-controlled input. For example, supposing that `User` is an
+ * `ActiveRecord` model class, then
+ *
+ * ```rb
+ * User.where("name = '#{user_name}'")
+ * ```
+ *
+ * may be unsafe if `user_name` is from unsanitized user input, as a value such
+ * as `"') OR 1=1 --"` could result in the application looking up all users
+ * rather than just one with a matching name.
+ */
+class PotentiallyUnsafeSqlExecutingMethodCall extends ActiveRecordModelClassMethodCall {
+ // The SQL fragment argument itself
+ private Expr sqlFragmentExpr;
+
+ PotentiallyUnsafeSqlExecutingMethodCall() {
+ exists(Expr arg |
+ arg = sqlFragmentArgument(this) and
+ unsafeSqlExpr(sqlFragmentExpr) and
+ (
+ sqlFragmentExpr = arg
+ or
+ sqlFragmentExpr = arg.(ArrayLiteral).getElement(0)
+ ) and
+ // Check that method has not been overridden
+ not exists(SingletonMethod m |
+ m.getName() = this.getMethodName() and
+ m.getOuterScope() = this.getReceiverClass()
+ )
+ )
+ }
+
+ Expr getSqlFragmentSinkArgument() { result = sqlFragmentExpr }
+}
+
+/**
+ * An `SqlExecution::Range` for an argument to a
+ * `PotentiallyUnsafeSqlExecutingMethodCall` that may be vulnerable to being
+ * controlled by user input.
+ */
+class ActiveRecordSqlExecutionRange extends SqlExecution::Range {
+ ActiveRecordSqlExecutionRange() {
+ exists(PotentiallyUnsafeSqlExecutingMethodCall mc |
+ this.asExpr().getNode() = mc.getSqlFragmentSinkArgument()
+ )
+ }
+
+ override DataFlow::Node getSql() { result = this }
+}
+
+// TODO: model `ActiveRecord` sanitizers
+// https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html
+/**
+ * A node that may evaluate to one or more `ActiveRecordModelClass` instances.
+ */
+abstract class ActiveRecordModelInstantiation extends OrmInstantiation::Range,
+ DataFlow::LocalSourceNode {
+ abstract ActiveRecordModelClass getClass();
+
+ bindingset[methodName]
+ override predicate methodCallMayAccessField(string methodName) {
+ // The method is not a built-in, and...
+ not isBuiltInMethodForActiveRecordModelInstance(methodName) and
+ (
+ // ...There is no matching method definition in the class, or...
+ not exists(this.getClass().getMethod(methodName))
+ or
+ // ...the called method can access a field.
+ exists(Method m | m = this.getClass().getAPotentialFieldAccessMethod() |
+ m.getName() = methodName
+ )
+ )
+ }
+}
+
+// Names of class methods on ActiveRecord models that may return one or more
+// instances of that model. This also includes the `initialize` method.
+// See https://api.rubyonrails.org/classes/ActiveRecord/FinderMethods.html
+private string finderMethodName() {
+ exists(string baseName |
+ baseName =
+ [
+ "fifth", "find", "find_by", "find_or_initialize_by", "find_or_create_by", "first",
+ "forty_two", "fourth", "last", "second", "second_to_last", "take", "third", "third_to_last"
+ ] and
+ result = baseName + ["", "!"]
+ )
+ or
+ result = "new"
+}
+
+// Gets the "final" receiver in a chain of method calls.
+// For example, in `Foo.bar`, this would give the `Foo` access, and in
+// `foo.bar.baz("arg")` it would give the `foo` variable access
+private Expr getUltimateReceiver(MethodCall call) {
+ exists(Expr recv |
+ recv = call.getReceiver() and
+ (
+ result = getUltimateReceiver(recv)
+ or
+ not recv instanceof MethodCall and result = recv
+ )
+ )
+}
+
+// A call to `find`, `where`, etc. that may return active record model object(s)
+private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode {
+ private MethodCall call;
+ private ActiveRecordModelClass cls;
+ private Expr recv;
+
+ ActiveRecordModelFinderCall() {
+ call = this.asExpr().getExpr() and
+ recv = getUltimateReceiver(call) and
+ resolveConstant(recv) = cls.getQualifiedName() and
+ call.getMethodName() = finderMethodName()
+ }
+
+ final override ActiveRecordModelClass getClass() { result = cls }
+}
+
+// A `self` reference that may resolve to an active record model object
+private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation {
+ private ActiveRecordModelClass cls;
+
+ ActiveRecordModelClassSelfReference() {
+ exists(Self s |
+ s.getEnclosingModule() = cls and
+ s.getEnclosingMethod() = cls.getAMethod() and
+ s = this.asExpr().getExpr()
+ )
+ }
+
+ final override ActiveRecordModelClass getClass() { result = cls }
+}
+
+// A (locally tracked) active record model object
+private class ActiveRecordInstance extends DataFlow::Node {
+ private ActiveRecordModelInstantiation instantiation;
+
+ ActiveRecordInstance() { this = instantiation or instantiation.flowsTo(this) }
+
+ ActiveRecordModelClass getClass() { result = instantiation.getClass() }
+}
+
+// A call whose receiver may be an active record model object
+private class ActiveRecordInstanceMethodCall extends DataFlow::CallNode {
+ private ActiveRecordInstance instance;
+
+ ActiveRecordInstanceMethodCall() { this.getReceiver() = instance }
+
+ ActiveRecordInstance getInstance() { result = instance }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/Files.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/Files.qll
new file mode 100644
index 00000000000..a7a963eb8a9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/Files.qll
@@ -0,0 +1,299 @@
+/**
+ * Provides classes for working with file system libraries.
+ */
+
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.DataFlow
+private import codeql.ruby.frameworks.StandardLibrary
+
+private DataFlow::Node ioInstanceInstantiation() {
+ result = API::getTopLevelMember("IO").getAnInstantiation() or
+ result = API::getTopLevelMember("IO").getAMethodCall(["for_fd", "open", "try_convert"])
+}
+
+private DataFlow::Node ioInstance() {
+ result = ioInstanceInstantiation()
+ or
+ exists(DataFlow::Node inst |
+ inst = ioInstance() and
+ inst.(DataFlow::LocalSourceNode).flowsTo(result)
+ )
+}
+
+// Match some simple cases where a path argument specifies a shell command to
+// be executed. For example, the `"|date"` argument in `IO.read("|date")`, which
+// will execute a shell command and read its output rather than reading from the
+// filesystem.
+private predicate pathArgSpawnsSubprocess(Expr arg) {
+ arg.(StringlikeLiteral).getValueText().charAt(0) = "|"
+}
+
+private DataFlow::Node fileInstanceInstantiation() {
+ result = API::getTopLevelMember("File").getAnInstantiation()
+ or
+ result = API::getTopLevelMember("File").getAMethodCall("open")
+ or
+ // Calls to `Kernel.open` can yield `File` instances
+ result.(KernelMethodCall).getMethodName() = "open" and
+ // Assume that calls that don't invoke shell commands will instead open
+ // a file.
+ not pathArgSpawnsSubprocess(result.(KernelMethodCall).getArgument(0).asExpr().getExpr())
+}
+
+private DataFlow::Node fileInstance() {
+ result = fileInstanceInstantiation()
+ or
+ exists(DataFlow::Node inst |
+ inst = fileInstance() and
+ inst.(DataFlow::LocalSourceNode).flowsTo(result)
+ )
+}
+
+private string ioFileReaderClassMethodName() {
+ result = ["binread", "foreach", "read", "readlines", "try_convert"]
+}
+
+private string ioFileReaderInstanceMethodName() {
+ result =
+ [
+ "getbyte", "getc", "gets", "pread", "read", "read_nonblock", "readbyte", "readchar",
+ "readline", "readlines", "readpartial", "sysread"
+ ]
+}
+
+private string ioFileReaderMethodName(boolean classMethodCall) {
+ classMethodCall = true and result = ioFileReaderClassMethodName()
+ or
+ classMethodCall = false and result = ioFileReaderInstanceMethodName()
+}
+
+/**
+ * Classes and predicates for modeling the core `IO` module.
+ */
+module IO {
+ /**
+ * An instance of the `IO` class, for example in
+ *
+ * ```rb
+ * rand = IO.new(IO.sysopen("/dev/random", "r"), "r")
+ * rand_data = rand.read(32)
+ * ```
+ *
+ * there are 3 `IOInstance`s - the call to `IO.new`, the assignment
+ * `rand = ...`, and the read access to `rand` on the second line.
+ */
+ class IOInstance extends DataFlow::Node {
+ IOInstance() {
+ this = ioInstance() or
+ this = fileInstance()
+ }
+ }
+
+ // "Direct" `IO` instances, i.e. cases where there is no more specific
+ // subtype such as `File`
+ private class IOInstanceStrict extends IOInstance {
+ IOInstanceStrict() { this = ioInstance() }
+ }
+
+ /**
+ * A `DataFlow::CallNode` that reads data using the `IO` class. For example,
+ * the `IO.read call in:
+ *
+ * ```rb
+ * IO.read("|date")
+ * ```
+ *
+ * returns the output of the `date` shell command, invoked as a subprocess.
+ *
+ * This class includes reads both from shell commands and reads from the
+ * filesystem. For working with filesystem accesses specifically, see
+ * `IOFileReader` or the `FileSystemReadAccess` concept.
+ */
+ class IOReader extends DataFlow::CallNode {
+ private boolean classMethodCall;
+ private string api;
+
+ IOReader() {
+ // Class methods
+ api = ["File", "IO"] and
+ classMethodCall = true and
+ this = API::getTopLevelMember(api).getAMethodCall(ioFileReaderMethodName(classMethodCall))
+ or
+ // IO instance methods
+ classMethodCall = false and
+ api = "IO" and
+ exists(IOInstanceStrict ii |
+ this.getReceiver() = ii and
+ this.asExpr().getExpr().(MethodCall).getMethodName() =
+ ioFileReaderMethodName(classMethodCall)
+ )
+ or
+ // File instance methods
+ classMethodCall = false and
+ api = "File" and
+ exists(File::FileInstance fi |
+ this.getReceiver() = fi and
+ this.asExpr().getExpr().(MethodCall).getMethodName() =
+ ioFileReaderMethodName(classMethodCall)
+ )
+ // TODO: enumeration style methods such as `each`, `foreach`, etc.
+ }
+
+ /**
+ * Returns the most specific core class used for this read, `IO` or `File`
+ */
+ string getAPI() { result = api }
+
+ predicate isClassMethodCall() { classMethodCall = true }
+ }
+
+ /**
+ * A `DataFlow::CallNode` that reads data from the filesystem using the `IO`
+ * class. For example, the `IO.read call in:
+ *
+ * ```rb
+ * IO.read("foo.txt")
+ * ```
+ *
+ * reads the file `foo.txt` and returns its contents as a string.
+ */
+ class IOFileReader extends IOReader, FileSystemReadAccess::Range {
+ IOFileReader() {
+ this.getAPI() = "File"
+ or
+ this.isClassMethodCall() and
+ // Assume that calls that don't invoke shell commands will instead
+ // read from a file.
+ not pathArgSpawnsSubprocess(this.getArgument(0).asExpr().getExpr())
+ }
+
+ // TODO: can we infer a path argument for instance method calls?
+ // e.g. by tracing back to the instantiation of that instance
+ override DataFlow::Node getAPathArgument() {
+ result = this.getArgument(0) and this.isClassMethodCall()
+ }
+
+ // This class represents calls that return data
+ override DataFlow::Node getADataNode() { result = this }
+ }
+}
+
+/**
+ * Classes and predicates for modeling the core `File` module.
+ *
+ * Because `File` is a subclass of `IO`, all `FileInstance`s and
+ * `FileModuleReader`s are also `IOInstance`s and `IOModuleReader`s
+ * respectively.
+ */
+module File {
+ /**
+ * An instance of the `File` class, for example in
+ *
+ * ```rb
+ * f = File.new("foo.txt")
+ * puts f.read()
+ * ```
+ *
+ * there are 3 `FileInstance`s - the call to `File.new`, the assignment
+ * `f = ...`, and the read access to `f` on the second line.
+ */
+ class FileInstance extends IO::IOInstance {
+ FileInstance() { this = fileInstance() }
+ }
+
+ /**
+ * A read using the `File` module, e.g. the `f.read` call in
+ *
+ * ```rb
+ * f = File.new("foo.txt")
+ * puts f.read()
+ * ```
+ */
+ class FileModuleReader extends IO::IOFileReader {
+ FileModuleReader() { this.getAPI() = "File" }
+ }
+
+ /**
+ * A call to a `File` method that may return one or more filenames.
+ */
+ class FileModuleFilenameSource extends FileNameSource, DataFlow::CallNode {
+ FileModuleFilenameSource() {
+ // Class methods
+ this =
+ API::getTopLevelMember("File")
+ .getAMethodCall([
+ "absolute_path", "basename", "expand_path", "join", "path", "readlink",
+ "realdirpath", "realpath"
+ ])
+ or
+ // Instance methods
+ exists(FileInstance fi |
+ this.getReceiver() = fi and
+ this.asExpr().getExpr().(MethodCall).getMethodName() = ["path", "to_path"]
+ )
+ }
+ }
+
+ private class FileModulePermissionModification extends FileSystemPermissionModification::Range,
+ DataFlow::CallNode {
+ private DataFlow::Node permissionArg;
+
+ FileModulePermissionModification() {
+ exists(string methodName | this = API::getTopLevelMember("File").getAMethodCall(methodName) |
+ methodName in ["chmod", "lchmod"] and permissionArg = this.getArgument(0)
+ or
+ methodName = "mkfifo" and permissionArg = this.getArgument(1)
+ or
+ methodName in ["new", "open"] and permissionArg = this.getArgument(2)
+ // TODO: defaults for optional args? This may depend on the umask
+ )
+ }
+
+ override DataFlow::Node getAPermissionNode() { result = permissionArg }
+ }
+}
+
+/**
+ * Classes and predicates for modeling the `FileUtils` module from the standard
+ * library.
+ */
+module FileUtils {
+ /**
+ * A call to a FileUtils method that may return one or more filenames.
+ */
+ class FileUtilsFilenameSource extends FileNameSource {
+ FileUtilsFilenameSource() {
+ // Note that many methods in FileUtils accept a `noop` option that will
+ // perform a dry run of the command. This means that, for instance, `rm`
+ // and similar methods may not actually delete/unlink a file when called.
+ this =
+ API::getTopLevelMember("FileUtils")
+ .getAMethodCall([
+ "chmod", "chmod_R", "chown", "chown_R", "getwd", "makedirs", "mkdir", "mkdir_p",
+ "mkpath", "remove", "remove_dir", "remove_entry", "rm", "rm_f", "rm_r", "rm_rf",
+ "rmdir", "rmtree", "safe_unlink", "touch"
+ ])
+ }
+ }
+
+ private class FileUtilsPermissionModification extends FileSystemPermissionModification::Range,
+ DataFlow::CallNode {
+ private DataFlow::Node permissionArg;
+
+ FileUtilsPermissionModification() {
+ exists(string methodName |
+ this = API::getTopLevelMember("FileUtils").getAMethodCall(methodName)
+ |
+ methodName in ["chmod", "chmod_R"] and permissionArg = this.getArgument(0)
+ or
+ methodName in ["install", "makedirs", "mkdir", "mkdir_p", "mkpath"] and
+ permissionArg = this.getKeywordArgument("mode")
+ // TODO: defaults for optional args? This may depend on the umask
+ )
+ }
+
+ override DataFlow::Node getAPermissionNode() { result = permissionArg }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/HttpClients.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/HttpClients.qll
new file mode 100644
index 00000000000..acb902694fe
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/HttpClients.qll
@@ -0,0 +1,12 @@
+/**
+ * Helper file that imports all HTTP clients.
+ */
+
+private import codeql.ruby.frameworks.http_clients.NetHttp
+private import codeql.ruby.frameworks.http_clients.Excon
+private import codeql.ruby.frameworks.http_clients.Faraday
+private import codeql.ruby.frameworks.http_clients.RestClient
+private import codeql.ruby.frameworks.http_clients.Httparty
+private import codeql.ruby.frameworks.http_clients.HttpClient
+private import codeql.ruby.frameworks.http_clients.OpenURI
+private import codeql.ruby.frameworks.http_clients.Typhoeus
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll
new file mode 100644
index 00000000000..f6d883cd6b7
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll
@@ -0,0 +1,337 @@
+private import codeql.ruby.AST
+private import codeql.ruby.Concepts
+private import codeql.ruby.DataFlow
+private import codeql.ruby.ApiGraphs
+
+/**
+ * The `Kernel` module is included by the `Object` class, so its methods are available
+ * in every Ruby object. In addition, its module methods can be called by
+ * providing a specific receiver as in `Kernel.exit`.
+ */
+class KernelMethodCall extends DataFlow::CallNode {
+ private MethodCall methodCall;
+
+ KernelMethodCall() {
+ methodCall = this.asExpr().getExpr() and
+ (
+ this = API::getTopLevelMember("Kernel").getAMethodCall(_)
+ or
+ methodCall instanceof UnknownMethodCall and
+ (
+ this.getReceiver().asExpr().getExpr() instanceof Self and
+ isPrivateKernelMethod(methodCall.getMethodName())
+ or
+ isPublicKernelMethod(methodCall.getMethodName())
+ )
+ )
+ }
+
+ string getMethodName() { result = methodCall.getMethodName() }
+
+ int getNumberOfArguments() { result = methodCall.getNumberOfArguments() }
+}
+
+/**
+ * Public methods in the `Kernel` module. These can be invoked on any object via the usual dot syntax.
+ * ```ruby
+ * arr = []
+ * arr.send("push", 5) # => [5]
+ * ```
+ */
+private predicate isPublicKernelMethod(string method) {
+ method in ["class", "clone", "frozen?", "tap", "then", "yield_self", "send"]
+}
+
+/**
+ * Private methods in the `Kernel` module.
+ * These can be be invoked on `self`, on `Kernel`, or using a low-level primitive like `send` or `instance_eval`.
+ * ```ruby
+ * puts "hello world"
+ * Kernel.puts "hello world"
+ * 5.instance_eval { puts "hello world" }
+ * 5.send("puts", "hello world")
+ * ```
+ */
+private predicate isPrivateKernelMethod(string method) {
+ method in [
+ "Array", "Complex", "Float", "Hash", "Integer", "Rational", "String", "__callee__", "__dir__",
+ "__method__", "`", "abort", "at_exit", "autoload", "autoload?", "binding", "block_given?",
+ "callcc", "caller", "caller_locations", "catch", "chomp", "chop", "eval", "exec", "exit",
+ "exit!", "fail", "fork", "format", "gets", "global_variables", "gsub", "iterator?", "lambda",
+ "load", "local_variables", "loop", "open", "p", "pp", "print", "printf", "proc", "putc",
+ "puts", "raise", "rand", "readline", "readlines", "require", "require_relative", "select",
+ "set_trace_func", "sleep", "spawn", "sprintf", "srand", "sub", "syscall", "system", "test",
+ "throw", "trace_var", "trap", "untrace_var", "warn"
+ ]
+}
+
+string basicObjectInstanceMethodName() {
+ result in [
+ "equal?", "instance_eval", "instance_exec", "method_missing", "singleton_method_added",
+ "singleton_method_removed", "singleton_method_undefined"
+ ]
+}
+
+/**
+ * Instance methods on `BasicObject`, which are available to all classes.
+ */
+class BasicObjectInstanceMethodCall extends UnknownMethodCall {
+ BasicObjectInstanceMethodCall() { this.getMethodName() = basicObjectInstanceMethodName() }
+}
+
+string objectInstanceMethodName() {
+ result in [
+ "!~", "<=>", "===", "=~", "callable_methods", "define_singleton_method", "display",
+ "do_until", "do_while", "dup", "enum_for", "eql?", "extend", "f", "freeze", "h", "hash",
+ "inspect", "instance_of?", "instance_variable_defined?", "instance_variable_get",
+ "instance_variable_set", "instance_variables", "is_a?", "itself", "kind_of?",
+ "matching_methods", "method", "method_missing", "methods", "nil?", "object_id",
+ "private_methods", "protected_methods", "public_method", "public_methods", "public_send",
+ "remove_instance_variable", "respond_to?", "respond_to_missing?", "send",
+ "shortest_abbreviation", "singleton_class", "singleton_method", "singleton_methods", "taint",
+ "tainted?", "to_enum", "to_s", "trust", "untaint", "untrust", "untrusted?"
+ ]
+}
+
+/**
+ * Instance methods on `Object`, which are available to all classes except `BasicObject`.
+ */
+class ObjectInstanceMethodCall extends UnknownMethodCall {
+ ObjectInstanceMethodCall() { this.getMethodName() = objectInstanceMethodName() }
+}
+
+/**
+ * Method calls which have no known target.
+ * These will typically be calls to methods inherited from a superclass.
+ */
+class UnknownMethodCall extends MethodCall {
+ UnknownMethodCall() { not exists(this.(Call).getATarget()) }
+}
+
+/**
+ * A system command executed via subshell literal syntax.
+ * E.g.
+ * ```ruby
+ * `cat foo.txt`
+ * %x(cat foo.txt)
+ * %x[cat foo.txt]
+ * %x{cat foo.txt}
+ * %x/cat foo.txt/
+ * ```
+ */
+class SubshellLiteralExecution extends SystemCommandExecution::Range {
+ SubshellLiteral literal;
+
+ SubshellLiteralExecution() { this.asExpr().getExpr() = literal }
+
+ override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = literal.getComponent(_) }
+
+ override predicate isShellInterpreted(DataFlow::Node arg) { arg = getAnArgument() }
+}
+
+/**
+ * A system command executed via shell heredoc syntax.
+ * E.g.
+ * ```ruby
+ * <<`EOF`
+ * cat foo.text
+ * EOF
+ * ```
+ */
+class SubshellHeredocExecution extends SystemCommandExecution::Range {
+ HereDoc heredoc;
+
+ SubshellHeredocExecution() { this.asExpr().getExpr() = heredoc and heredoc.isSubShell() }
+
+ override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = heredoc.getComponent(_) }
+
+ override predicate isShellInterpreted(DataFlow::Node arg) { arg = getAnArgument() }
+}
+
+/**
+ * A system command executed via the `Kernel.system` method.
+ * `Kernel.system` accepts three argument forms:
+ * - A single string. If it contains no shell meta characters, keywords or
+ * builtins, it is executed directly in a subprocess.
+ * Otherwise, it is executed in a subshell.
+ * ```ruby
+ * system("cat foo.txt | tail")
+ * ```
+ * - A command and one or more arguments.
+ * The command is executed in a subprocess.
+ * ```ruby
+ * system("cat", "foo.txt")
+ * ```
+ * - An array containing the command name and argv[0], followed by zero or more arguments.
+ * The command is executed in a subprocess.
+ * ```ruby
+ * system(["cat", "cat"], "foo.txt")
+ * ```
+ * In addition, `Kernel.system` accepts an optional environment hash as the
+ * first argument and an optional options hash as the last argument.
+ * We don't yet distinguish between these arguments and the command arguments.
+ * ```ruby
+ * system({"FOO" => "BAR"}, "cat foo.txt | tail", {unsetenv_others: true})
+ * ```
+ * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-system
+ */
+class KernelSystemCall extends SystemCommandExecution::Range, KernelMethodCall {
+ KernelSystemCall() { this.getMethodName() = "system" }
+
+ override DataFlow::Node getAnArgument() { result = this.getArgument(_) }
+
+ override predicate isShellInterpreted(DataFlow::Node arg) {
+ // Kernel.system invokes a subshell if you provide a single string as argument
+ this.getNumberOfArguments() = 1 and arg = getAnArgument()
+ }
+}
+
+/**
+ * A system command executed via the `Kernel.exec` method.
+ * `Kernel.exec` takes the same argument forms as `Kernel.system`. See `KernelSystemCall` for details.
+ * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-exec
+ */
+class KernelExecCall extends SystemCommandExecution::Range, KernelMethodCall {
+ KernelExecCall() { this.getMethodName() = "exec" }
+
+ override DataFlow::Node getAnArgument() { result = this.getArgument(_) }
+
+ override predicate isShellInterpreted(DataFlow::Node arg) {
+ // Kernel.exec invokes a subshell if you provide a single string as argument
+ this.getNumberOfArguments() = 1 and arg = getAnArgument()
+ }
+}
+
+/**
+ * A system command executed via the `Kernel.spawn` method.
+ * `Kernel.spawn` takes the same argument forms as `Kernel.system`.
+ * See `KernelSystemCall` for details.
+ * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-spawn
+ * TODO: document and handle the env and option arguments.
+ * ```
+ * spawn([env,] command... [,options]) -> pid
+ * ```
+ */
+class KernelSpawnCall extends SystemCommandExecution::Range, KernelMethodCall {
+ KernelSpawnCall() { this.getMethodName() = "spawn" }
+
+ override DataFlow::Node getAnArgument() { result = this.getArgument(_) }
+
+ override predicate isShellInterpreted(DataFlow::Node arg) {
+ // Kernel.spawn invokes a subshell if you provide a single string as argument
+ this.getNumberOfArguments() = 1 and arg = getAnArgument()
+ }
+}
+
+/**
+ * A system command executed via one of the `Open3` methods.
+ * These methods take the same argument forms as `Kernel.system`.
+ * See `KernelSystemCall` for details.
+ */
+class Open3Call extends SystemCommandExecution::Range {
+ MethodCall methodCall;
+
+ Open3Call() {
+ this.asExpr().getExpr() = methodCall and
+ this =
+ API::getTopLevelMember("Open3")
+ .getAMethodCall(["popen3", "popen2", "popen2e", "capture3", "capture2", "capture2e"])
+ }
+
+ override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
+
+ override predicate isShellInterpreted(DataFlow::Node arg) {
+ // These Open3 methods invoke a subshell if you provide a single string as argument
+ methodCall.getNumberOfArguments() = 1 and arg.asExpr().getExpr() = methodCall.getAnArgument()
+ }
+}
+
+/**
+ * A pipeline of system commands constructed via one of the `Open3` methods.
+ * These methods accept a variable argument list of commands.
+ * Commands can be in any form supported by `Kernel.system`. See `KernelSystemCall` for details.
+ * ```ruby
+ * Open3.pipeline("cat foo.txt", "tail")
+ * Open3.pipeline(["cat", "foo.txt"], "tail")
+ * Open3.pipeline([{}, "cat", "foo.txt"], "tail")
+ * Open3.pipeline([["cat", "cat"], "foo.txt"], "tail")
+ */
+class Open3PipelineCall extends SystemCommandExecution::Range {
+ MethodCall methodCall;
+
+ Open3PipelineCall() {
+ this.asExpr().getExpr() = methodCall and
+ this =
+ API::getTopLevelMember("Open3")
+ .getAMethodCall(["pipeline_rw", "pipeline_r", "pipeline_w", "pipeline_start", "pipeline"])
+ }
+
+ override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
+
+ override predicate isShellInterpreted(DataFlow::Node arg) {
+ // A command in the pipeline is executed in a subshell if it is given as a single string argument.
+ arg.asExpr().getExpr() instanceof StringlikeLiteral and
+ arg.asExpr().getExpr() = methodCall.getAnArgument()
+ }
+}
+
+/**
+ * A call to `Kernel.eval`, which executes its first argument as Ruby code.
+ * ```ruby
+ * a = 1
+ * Kernel.eval("a = 2")
+ * a # => 2
+ * ```
+ */
+class EvalCallCodeExecution extends CodeExecution::Range, KernelMethodCall {
+ EvalCallCodeExecution() { this.getMethodName() = "eval" }
+
+ override DataFlow::Node getCode() { result = this.getArgument(0) }
+}
+
+/**
+ * A call to `Kernel#send`, which executes its first argument as a Ruby method call.
+ * ```ruby
+ * arr = []
+ * arr.send("push", 1)
+ * arr # => [1]
+ * ```
+ */
+class SendCallCodeExecution extends CodeExecution::Range, KernelMethodCall {
+ SendCallCodeExecution() { this.getMethodName() = "send" }
+
+ override DataFlow::Node getCode() { result = this.getArgument(0) }
+}
+
+/**
+ * A call to `BasicObject#instance_eval`, which executes its first argument as Ruby code.
+ */
+class InstanceEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode {
+ InstanceEvalCallCodeExecution() {
+ this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "instance_eval"
+ }
+
+ override DataFlow::Node getCode() { result = this.getArgument(0) }
+}
+
+/**
+ * A call to `Module#class_eval`, which executes its first argument as Ruby code.
+ */
+class ClassEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode {
+ ClassEvalCallCodeExecution() {
+ this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "class_eval"
+ }
+
+ override DataFlow::Node getCode() { result = this.getArgument(0) }
+}
+
+/**
+ * A call to `Module#module_eval`, which executes its first argument as Ruby code.
+ */
+class ModuleEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode {
+ ModuleEvalCallCodeExecution() {
+ this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "module_eval"
+ }
+
+ override DataFlow::Node getCode() { result = this.getArgument(0) }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll
new file mode 100644
index 00000000000..3e37ec6a514
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll
@@ -0,0 +1,182 @@
+private import codeql.ruby.Concepts
+private import codeql.ruby.AST
+private import codeql.ruby.DataFlow
+private import codeql.ruby.typetracking.TypeTracker
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.controlflow.CfgNodes as CfgNodes
+
+private class NokogiriXmlParserCall extends XmlParserCall::Range, DataFlow::CallNode {
+ NokogiriXmlParserCall() {
+ this =
+ [
+ API::getTopLevelMember("Nokogiri").getMember("XML"),
+ API::getTopLevelMember("Nokogiri").getMember("XML").getMember("Document"),
+ API::getTopLevelMember("Nokogiri")
+ .getMember("XML")
+ .getMember("SAX")
+ .getMember("Parser")
+ .getInstance()
+ ].getAMethodCall("parse")
+ }
+
+ override DataFlow::Node getInput() { result = this.getArgument(0) }
+
+ override predicate externalEntitiesEnabled() {
+ this.getArgument(3) =
+ [trackEnableFeature(TNOENT()), trackEnableFeature(TDTDLOAD()), trackDisableFeature(TNONET())]
+ or
+ // calls to methods that enable/disable features in a block argument passed to this parser call.
+ // For example:
+ // ```ruby
+ // doc.parse(...) { |options| options.nononet; options.noent }
+ // ```
+ this.asExpr()
+ .getExpr()
+ .(MethodCall)
+ .getBlock()
+ .getAStmt()
+ .getAChild*()
+ .(MethodCall)
+ .getMethodName() = ["noent", "dtdload", "nononet"]
+ }
+}
+
+private class LibXmlRubyXmlParserCall extends XmlParserCall::Range, DataFlow::CallNode {
+ LibXmlRubyXmlParserCall() {
+ this =
+ [API::getTopLevelMember("LibXML").getMember("XML"), API::getTopLevelMember("XML")]
+ .getMember(["Document", "Parser"])
+ .getAMethodCall(["file", "io", "string"])
+ }
+
+ override DataFlow::Node getInput() { result = this.getArgument(0) }
+
+ override predicate externalEntitiesEnabled() {
+ exists(Pair pair |
+ pair = this.getArgument(1).asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
+ pair.getKey().(Literal).getValueText() = "options" and
+ pair.getValue() =
+ [
+ trackEnableFeature(TNOENT()), trackEnableFeature(TDTDLOAD()),
+ trackDisableFeature(TNONET())
+ ].asExpr().getExpr()
+ )
+ }
+}
+
+private newtype TFeature =
+ TNOENT() or
+ TNONET() or
+ TDTDLOAD()
+
+class Feature extends TFeature {
+ abstract int getValue();
+
+ string toString() { result = getConstantName() }
+
+ abstract string getConstantName();
+}
+
+private class FeatureNOENT extends Feature, TNOENT {
+ override int getValue() { result = 2 }
+
+ override string getConstantName() { result = "NOENT" }
+}
+
+private class FeatureNONET extends Feature, TNONET {
+ override int getValue() { result = 2048 }
+
+ override string getConstantName() { result = "NONET" }
+}
+
+private class FeatureDTDLOAD extends Feature, TDTDLOAD {
+ override int getValue() { result = 4 }
+
+ override string getConstantName() { result = "DTDLOAD" }
+}
+
+private API::Node parseOptionsModule() {
+ result = API::getTopLevelMember("Nokogiri").getMember("XML").getMember("ParseOptions")
+ or
+ result =
+ API::getTopLevelMember("LibXML").getMember("XML").getMember("Parser").getMember("Options")
+ or
+ result = API::getTopLevelMember("XML").getMember("Parser").getMember("Options")
+}
+
+private predicate bitWiseAndOr(CfgNodes::ExprNodes::OperationCfgNode operation) {
+ operation.getExpr() instanceof BitwiseAndExpr or
+ operation.getExpr() instanceof AssignBitwiseAndExpr or
+ operation.getExpr() instanceof BitwiseOrExpr or
+ operation.getExpr() instanceof AssignBitwiseOrExpr
+}
+
+private DataFlow::LocalSourceNode trackFeature(Feature f, boolean enable, TypeTracker t) {
+ t.start() and
+ (
+ // An integer literal with the feature-bit enabled/disabled
+ exists(int bitValue |
+ bitValue = result.asExpr().getExpr().(IntegerLiteral).getValue().bitAnd(f.getValue())
+ |
+ if bitValue = 0 then enable = false else enable = true
+ )
+ or
+ // Use of a constant f
+ enable = true and
+ result = parseOptionsModule().getMember(f.getConstantName()).getAUse()
+ or
+ // Treat `&`, `&=`, `|` and `|=` operators as if they preserve the on/off states
+ // of their operands. This is an overapproximation but likely to work well in practice
+ // because it makes little sense to explicitly set a feature to both `on` and `off` in the
+ // same code.
+ exists(CfgNodes::ExprNodes::OperationCfgNode operation |
+ bitWiseAndOr(operation) and
+ operation = result.asExpr().(CfgNodes::ExprNodes::OperationCfgNode) and
+ operation.getAnOperand() = trackFeature(f, enable).asExpr()
+ )
+ or
+ // The complement operator toggles a feature from enabled to disabled and vice-versa
+ result.asExpr().getExpr() instanceof ComplementExpr and
+ result.asExpr().(CfgNodes::ExprNodes::OperationCfgNode).getAnOperand() =
+ trackFeature(f, enable.booleanNot()).asExpr()
+ or
+ // Nokogiri has a ParseOptions class that is a wrapper around the bit-fields and
+ // provides methods for querying and updating the fields.
+ result =
+ API::getTopLevelMember("Nokogiri")
+ .getMember("XML")
+ .getMember("ParseOptions")
+ .getAnInstantiation() and
+ result.asExpr().(CfgNodes::ExprNodes::CallCfgNode).getArgument(0) =
+ trackFeature(f, enable).asExpr()
+ or
+ // The Nokogiri ParseOptions class has methods for setting/unsetting features.
+ // The method names are the lowercase variants of the constant names, with a "no"
+ // prefix for unsetting a feature.
+ exists(CfgNodes::ExprNodes::CallCfgNode call |
+ enable = true and
+ call.getExpr().(MethodCall).getMethodName() = f.getConstantName().toLowerCase()
+ or
+ enable = false and
+ call.getExpr().(MethodCall).getMethodName() = "no" + f.getConstantName().toLowerCase()
+ |
+ (
+ // these methods update the receiver
+ result.flowsTo(any(DataFlow::Node n | n.asExpr() = call.getReceiver()))
+ or
+ // in addition they return the (updated) receiver to allow chaining calls.
+ result.asExpr() = call
+ )
+ )
+ )
+ or
+ exists(TypeTracker t2 | result = trackFeature(f, enable, t2).track(t2, t))
+}
+
+private DataFlow::Node trackFeature(Feature f, boolean enable) {
+ trackFeature(f, enable, TypeTracker::end()).flowsTo(result)
+}
+
+private DataFlow::Node trackEnableFeature(Feature f) { result = trackFeature(f, true) }
+
+private DataFlow::Node trackDisableFeature(Feature f) { result = trackFeature(f, false) }
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll
new file mode 100644
index 00000000000..efb9d7be66c
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll
@@ -0,0 +1,130 @@
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+
+/**
+ * A call that makes an HTTP request using `Excon`.
+ * ```ruby
+ * # one-off request
+ * Excon.get("http://example.com").body
+ *
+ * # connection re-use
+ * connection = Excon.new("http://example.com")
+ * connection.get(path: "/").body
+ * connection.request(method: :get, path: "/")
+ * ```
+ *
+ * TODO: pipelining, streaming responses
+ * https://github.com/excon/excon/blob/master/README.md
+ */
+class ExconHttpRequest extends HTTP::Client::Request::Range {
+ DataFlow::Node requestUse;
+ API::Node requestNode;
+ API::Node connectionNode;
+
+ ExconHttpRequest() {
+ requestUse = requestNode.getAnImmediateUse() and
+ connectionNode =
+ [
+ // one-off requests
+ API::getTopLevelMember("Excon"),
+ // connection re-use
+ API::getTopLevelMember("Excon").getInstance(),
+ API::getTopLevelMember("Excon").getMember("Connection").getInstance()
+ ] and
+ requestNode =
+ connectionNode
+ .getReturn([
+ // Excon#request exists but Excon.request doesn't.
+ // This shouldn't be a problem - in real code the latter would raise NoMethodError anyway.
+ "get", "head", "delete", "options", "post", "put", "patch", "trace", "request"
+ ]) and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ // Check for `ssl_verify_peer: false` in the options hash.
+ exists(DataFlow::Node arg, int i |
+ i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
+ |
+ argSetsVerifyPeer(arg, false, disablingNode)
+ )
+ or
+ // Or we see a call to `Excon.defaults[:ssl_verify_peer] = false` before the
+ // request, and no `ssl_verify_peer: true` in the explicit options hash for
+ // the request call.
+ exists(DataFlow::CallNode disableCall |
+ setsDefaultVerification(disableCall, false) and
+ disableCall.asExpr().getASuccessor+() = requestUse.asExpr() and
+ disablingNode = disableCall and
+ not exists(DataFlow::Node arg, int i |
+ i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
+ |
+ argSetsVerifyPeer(arg, true, _)
+ )
+ )
+ }
+
+ override string getFramework() { result = "Excon" }
+}
+
+/**
+ * Holds if `arg` represents an options hash that contains the key
+ * `:ssl_verify_peer` with `value`, where `kvNode` is the data-flow node for
+ * this key-value pair.
+ */
+predicate argSetsVerifyPeer(DataFlow::Node arg, boolean value, DataFlow::Node kvNode) {
+ // Either passed as an individual key:value argument, e.g.:
+ // Excon.get(..., ssl_verify_peer: false)
+ isSslVerifyPeerPair(arg.asExpr().getExpr(), value) and
+ kvNode = arg
+ or
+ // Or as a single hash argument, e.g.:
+ // Excon.get(..., { ssl_verify_peer: false, ... })
+ exists(DataFlow::LocalSourceNode optionsNode, Pair p |
+ p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
+ isSslVerifyPeerPair(p, value) and
+ optionsNode.flowsTo(arg) and
+ kvNode.asExpr().getExpr() = p
+ )
+}
+
+/**
+ * Holds if `callNode` sets `Excon.defaults[:ssl_verify_peer]` or
+ * `Excon.ssl_verify_peer` to `value`.
+ */
+private predicate setsDefaultVerification(DataFlow::CallNode callNode, boolean value) {
+ callNode = API::getTopLevelMember("Excon").getReturn("defaults").getAMethodCall("[]=") and
+ isSslVerifyPeerLiteral(callNode.getArgument(0)) and
+ hasBooleanValue(callNode.getArgument(1), value)
+ or
+ callNode = API::getTopLevelMember("Excon").getAMethodCall("ssl_verify_peer=") and
+ hasBooleanValue(callNode.getArgument(0), value)
+}
+
+private predicate isSslVerifyPeerLiteral(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verify_peer" and
+ literal.flowsTo(node)
+ )
+}
+
+/** Holds if `node` can contain `value`. */
+private predicate hasBooleanValue(DataFlow::Node node, boolean value) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(BooleanLiteral).getValue() = value and
+ literal.flowsTo(node)
+ )
+}
+
+/** Holds if `p` is the pair `ssl_verify_peer: `. */
+private predicate isSslVerifyPeerPair(Pair p, boolean value) {
+ exists(DataFlow::Node key, DataFlow::Node valueNode |
+ key.asExpr().getExpr() = p.getKey() and valueNode.asExpr().getExpr() = p.getValue()
+ |
+ isSslVerifyPeerLiteral(key) and
+ hasBooleanValue(valueNode, value)
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll
new file mode 100644
index 00000000000..de3f6f5f811
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll
@@ -0,0 +1,140 @@
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+
+/**
+ * A call that makes an HTTP request using `Faraday`.
+ * ```ruby
+ * # one-off request
+ * Faraday.get("http://example.com").body
+ *
+ * # connection re-use
+ * connection = Faraday.new("http://example.com")
+ * connection.get("/").body
+ * ```
+ */
+class FaradayHttpRequest extends HTTP::Client::Request::Range {
+ DataFlow::Node requestUse;
+ API::Node requestNode;
+ API::Node connectionNode;
+
+ FaradayHttpRequest() {
+ connectionNode =
+ [
+ // one-off requests
+ API::getTopLevelMember("Faraday"),
+ // connection re-use
+ API::getTopLevelMember("Faraday").getInstance()
+ ] and
+ requestNode =
+ connectionNode.getReturn(["get", "head", "delete", "post", "put", "patch", "trace"]) and
+ requestUse = requestNode.getAnImmediateUse() and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ // `Faraday::new` takes an options hash as its second argument, and we're
+ // looking for
+ // `{ ssl: { verify: false } }`
+ // or
+ // `{ ssl: { verify_mode: OpenSSL::SSL::VERIFY_NONE } }`
+ exists(DataFlow::Node arg, int i |
+ i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
+ |
+ // Either passed as an individual key:value argument, e.g.:
+ // Faraday.new(..., ssl: {...})
+ isSslOptionsPairDisablingValidation(arg.asExpr().getExpr()) and
+ disablingNode = arg
+ or
+ // Or as a single hash argument, e.g.:
+ // Faraday.new(..., { ssl: {...} })
+ exists(DataFlow::LocalSourceNode optionsNode, Pair p |
+ p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
+ isSslOptionsPairDisablingValidation(p) and
+ optionsNode.flowsTo(arg) and
+ disablingNode.asExpr().getExpr() = p
+ )
+ )
+ }
+
+ override string getFramework() { result = "Faraday" }
+}
+
+/**
+ * Holds if the pair `p` contains the key `:ssl` for which the value is a hash
+ * containing either `verify: false` or
+ * `verify_mode: OpenSSL::SSL::VERIFY_NONE`.
+ */
+private predicate isSslOptionsPairDisablingValidation(Pair p) {
+ exists(DataFlow::Node key, DataFlow::Node value |
+ key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
+ |
+ isSymbolLiteral(key, "ssl") and
+ (isHashWithVerifyFalse(value) or isHashWithVerifyModeNone(value))
+ )
+}
+
+/** Holds if `node` represents the symbol literal with the given `valueText`. */
+private predicate isSymbolLiteral(DataFlow::Node node, string valueText) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(SymbolLiteral).getValueText() = valueText and
+ literal.flowsTo(node)
+ )
+}
+
+/**
+ * Holds if `node` represents a hash containing the key-value pair
+ * `verify: false`.
+ */
+private predicate isHashWithVerifyFalse(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode hash |
+ isVerifyFalsePair(hash.asExpr().getExpr().(HashLiteral).getAKeyValuePair()) and
+ hash.flowsTo(node)
+ )
+}
+
+/**
+ * Holds if `node` represents a hash containing the key-value pair
+ * `verify_mode: OpenSSL::SSL::VERIFY_NONE`.
+ */
+private predicate isHashWithVerifyModeNone(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode hash |
+ isVerifyModeNonePair(hash.asExpr().getExpr().(HashLiteral).getAKeyValuePair()) and
+ hash.flowsTo(node)
+ )
+}
+
+/**
+ * Holds if the pair `p` has the key `:verify_mode` and the value
+ * `OpenSSL::SSL::VERIFY_NONE`.
+ */
+private predicate isVerifyModeNonePair(Pair p) {
+ exists(DataFlow::Node key, DataFlow::Node value |
+ key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
+ |
+ isSymbolLiteral(key, "verify_mode") and
+ value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
+ )
+}
+
+/**
+ * Holds if the pair `p` has the key `:verify` and the value `false`.
+ */
+private predicate isVerifyFalsePair(Pair p) {
+ exists(DataFlow::Node key, DataFlow::Node value |
+ key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
+ |
+ isSymbolLiteral(key, "verify") and
+ isFalse(value)
+ )
+}
+
+/** Holds if `node` can contain the Boolean value `false`. */
+private predicate isFalse(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(BooleanLiteral).isFalse() and
+ literal.flowsTo(node)
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll
new file mode 100644
index 00000000000..3db9c653a5c
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll
@@ -0,0 +1,55 @@
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+
+/**
+ * A call that makes an HTTP request using `HTTPClient`.
+ * ```ruby
+ * HTTPClient.get("http://example.com").body
+ * HTTPClient.get_content("http://example.com")
+ * ```
+ */
+class HttpClientRequest extends HTTP::Client::Request::Range {
+ API::Node requestNode;
+ API::Node connectionNode;
+ DataFlow::Node requestUse;
+ string method;
+
+ HttpClientRequest() {
+ connectionNode =
+ [
+ // One-off requests
+ API::getTopLevelMember("HTTPClient"),
+ // Conncection re-use
+ API::getTopLevelMember("HTTPClient").getInstance()
+ ] and
+ requestNode = connectionNode.getReturn(method) and
+ requestUse = requestNode.getAnImmediateUse() and
+ method in [
+ "get", "head", "delete", "options", "post", "put", "trace", "get_content", "post_content"
+ ] and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::Node getResponseBody() {
+ // The `get_content` and `post_content` methods return the response body as
+ // a string. The other methods return a `HTTPClient::Message` object which
+ // has various methods that return the response body.
+ method in ["get_content", "post_content"] and result = requestUse
+ or
+ not method in ["get_content", "put_content"] and
+ result = requestNode.getAMethodCall(["body", "http_body", "content", "dump"])
+ }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ // Look for calls to set
+ // `c.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE`
+ // on an HTTPClient connection object `c`.
+ disablingNode =
+ connectionNode.getReturn("ssl_config").getReturn("verify_mode=").getAnImmediateUse() and
+ disablingNode.(DataFlow::CallNode).getArgument(0) =
+ API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
+ }
+
+ override string getFramework() { result = "HTTPClient" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll
new file mode 100644
index 00000000000..b1746692bf7
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll
@@ -0,0 +1,95 @@
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+
+/**
+ * A call that makes an HTTP request using `HTTParty`.
+ * ```ruby
+ * # one-off request - returns the response body
+ * HTTParty.get("http://example.com")
+ *
+ * # TODO: module inclusion
+ * class MyClass
+ * include HTTParty
+ * end
+ *
+ * MyClass.new("http://example.com")
+ * ```
+ */
+class HttpartyRequest extends HTTP::Client::Request::Range {
+ API::Node requestNode;
+ DataFlow::Node requestUse;
+
+ HttpartyRequest() {
+ requestUse = requestNode.getAnImmediateUse() and
+ requestNode =
+ API::getTopLevelMember("HTTParty")
+ .getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::Node getResponseBody() {
+ // If HTTParty can recognise the response type, it will parse and return it
+ // directly from the request call. Otherwise, it will return a `HTTParty::Response`
+ // object that has a `#body` method.
+ // So if there's a call to `#body` on the response, treat that as the response body.
+ exists(DataFlow::Node r | r = requestNode.getAMethodCall("body") | result = r)
+ or
+ // Otherwise, treat the response as the response body.
+ not exists(DataFlow::Node r | r = requestNode.getAMethodCall("body")) and
+ result = requestUse
+ }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ // The various request methods take an options hash as their second
+ // argument, and we're looking for `{ verify: false }` or
+ // `{ verify_peer: false }`.
+ exists(DataFlow::Node arg, int i |
+ i > 0 and arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i)
+ |
+ // Either passed as an individual key:value argument, e.g.:
+ // HTTParty.get(..., verify: false)
+ isVerifyFalsePair(arg.asExpr().getExpr()) and
+ disablingNode = arg
+ or
+ // Or as a single hash argument, e.g.:
+ // HTTParty.get(..., { verify: false, ... })
+ exists(DataFlow::LocalSourceNode optionsNode, Pair p |
+ p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
+ isVerifyFalsePair(p) and
+ optionsNode.flowsTo(arg) and
+ disablingNode.asExpr().getExpr() = p
+ )
+ )
+ }
+
+ override string getFramework() { result = "HTTParty" }
+}
+
+/** Holds if `node` represents the symbol literal `verify` or `verify_peer`. */
+private predicate isVerifyLiteral(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(SymbolLiteral).getValueText() = ["verify", "verify_peer"] and
+ literal.flowsTo(node)
+ )
+}
+
+/** Holds if `node` can contain the Boolean value `false`. */
+private predicate isFalse(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(BooleanLiteral).isFalse() and
+ literal.flowsTo(node)
+ )
+}
+
+/**
+ * Holds if `p` is the pair `verify: false` or `verify_peer: false`.
+ */
+private predicate isVerifyFalsePair(Pair p) {
+ exists(DataFlow::Node key, DataFlow::Node value |
+ key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
+ |
+ isVerifyLiteral(key) and
+ isFalse(value)
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll
new file mode 100644
index 00000000000..9d9c6f7aff3
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll
@@ -0,0 +1,69 @@
+private import codeql.ruby.AST
+private import codeql.ruby.Concepts
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.dataflow.internal.DataFlowPublic
+
+/**
+ * A `Net::HTTP` call which initiates an HTTP request.
+ * ```ruby
+ * Net::HTTP.get("http://example.com/")
+ * Net::HTTP.post("http://example.com/", "some_data")
+ * req = Net::HTTP.new("example.com")
+ * response = req.get("/")
+ * ```
+ */
+class NetHttpRequest extends HTTP::Client::Request::Range {
+ private DataFlow::CallNode request;
+ private DataFlow::Node responseBody;
+
+ NetHttpRequest() {
+ exists(API::Node requestNode, string method |
+ request = requestNode.getAnImmediateUse() and
+ this = request.asExpr().getExpr()
+ |
+ // Net::HTTP.get(...)
+ method = "get" and
+ requestNode = API::getTopLevelMember("Net").getMember("HTTP").getReturn(method) and
+ responseBody = request
+ or
+ // Net::HTTP.post(...).body
+ method in ["post", "post_form"] and
+ requestNode = API::getTopLevelMember("Net").getMember("HTTP").getReturn(method) and
+ responseBody = requestNode.getAMethodCall(["body", "read_body", "entity"])
+ or
+ // Net::HTTP.new(..).get(..).body
+ method in [
+ "get", "get2", "request_get", "head", "head2", "request_head", "delete", "put", "patch",
+ "post", "post2", "request_post", "request"
+ ] and
+ requestNode = API::getTopLevelMember("Net").getMember("HTTP").getInstance().getReturn(method) and
+ responseBody = requestNode.getAMethodCall(["body", "read_body", "entity"])
+ )
+ }
+
+ /**
+ * Gets the node representing the URL of the request.
+ * Currently unused, but may be useful in future, e.g. to filter out certain requests.
+ */
+ DataFlow::Node getURLArgument() { result = request.getArgument(0) }
+
+ override DataFlow::Node getResponseBody() { result = responseBody }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ // A Net::HTTP request bypasses certificate validation if we see a setter
+ // call like this:
+ // foo.verify_mode = OpenSSL::SSL::VERIFY_NONE
+ // and then the receiver of that call flows to the receiver in the request:
+ // foo.request(...)
+ exists(DataFlow::CallNode setter |
+ disablingNode =
+ API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() and
+ setter.asExpr().getExpr().(SetterMethodCall).getMethodName() = "verify_mode=" and
+ disablingNode = setter.getArgument(0) and
+ localFlow(setter.getReceiver(), request.getReceiver())
+ )
+ }
+
+ override string getFramework() { result = "Net::HTTP" }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll
new file mode 100644
index 00000000000..54a2c180fec
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll
@@ -0,0 +1,113 @@
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.frameworks.StandardLibrary
+
+/**
+ * A call that makes an HTTP request using `OpenURI` via `URI.open` or
+ * `URI.parse(...).open`.
+ *
+ * ```ruby
+ * URI.open("http://example.com").readlines
+ * URI.parse("http://example.com").open.read
+ * ```
+ */
+class OpenUriRequest extends HTTP::Client::Request::Range {
+ API::Node requestNode;
+ DataFlow::Node requestUse;
+
+ OpenUriRequest() {
+ requestNode =
+ [API::getTopLevelMember("URI"), API::getTopLevelMember("URI").getReturn("parse")]
+ .getReturn("open") and
+ requestUse = requestNode.getAnImmediateUse() and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::Node getResponseBody() {
+ result = requestNode.getAMethodCall(["read", "readlines"])
+ }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ exists(DataFlow::Node arg |
+ arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(_)
+ |
+ argumentDisablesValidation(arg, disablingNode)
+ )
+ }
+
+ override string getFramework() { result = "OpenURI" }
+}
+
+/**
+ * A call that makes an HTTP request using `OpenURI` and its `Kernel.open`
+ * interface.
+ *
+ * ```ruby
+ * Kernel.open("http://example.com").read
+ * ```
+ */
+class OpenUriKernelOpenRequest extends HTTP::Client::Request::Range {
+ DataFlow::Node requestUse;
+
+ OpenUriKernelOpenRequest() {
+ requestUse instanceof KernelMethodCall and
+ this.getMethodName() = "open" and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::CallNode getResponseBody() {
+ result.asExpr().getExpr().(MethodCall).getMethodName() in ["read", "readlines"] and
+ requestUse.(DataFlow::LocalSourceNode).flowsTo(result.getReceiver())
+ }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ exists(DataFlow::Node arg, int i |
+ i > 0 and
+ arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i)
+ |
+ argumentDisablesValidation(arg, disablingNode)
+ )
+ }
+
+ override string getFramework() { result = "OpenURI" }
+}
+
+/**
+ * Holds if the argument `arg` is an options hash that disables certificate
+ * validation, and `disablingNode` is the specific node representing the
+ * `ssl_verify_mode: OpenSSL::SSL_VERIFY_NONE` pair.
+ */
+private predicate argumentDisablesValidation(DataFlow::Node arg, DataFlow::Node disablingNode) {
+ // Either passed as an individual key:value argument, e.g.:
+ // URI.open(..., ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
+ isSslVerifyModeNonePair(arg.asExpr().getExpr()) and
+ disablingNode = arg
+ or
+ // Or as a single hash argument, e.g.:
+ // URI.open(..., { ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, ... })
+ exists(DataFlow::LocalSourceNode optionsNode, Pair p |
+ p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
+ isSslVerifyModeNonePair(p) and
+ optionsNode.flowsTo(arg) and
+ disablingNode.asExpr().getExpr() = p
+ )
+}
+
+/** Holds if `p` is the pair `ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE`. */
+private predicate isSslVerifyModeNonePair(Pair p) {
+ exists(DataFlow::Node key, DataFlow::Node value |
+ key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
+ |
+ isSslVerifyModeLiteral(key) and
+ value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
+ )
+}
+
+/** Holds if `node` can represent the symbol literal `:ssl_verify_mode`. */
+private predicate isSslVerifyModeLiteral(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verify_mode" and
+ literal.flowsTo(node)
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll
new file mode 100644
index 00000000000..3b6ff318b66
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll
@@ -0,0 +1,71 @@
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+
+/**
+ * A call that makes an HTTP request using `RestClient`.
+ * ```ruby
+ * RestClient.get("http://example.com").body
+ * ```
+ */
+class RestClientHttpRequest extends HTTP::Client::Request::Range {
+ DataFlow::Node requestUse;
+ API::Node requestNode;
+ API::Node connectionNode;
+
+ RestClientHttpRequest() {
+ connectionNode =
+ [
+ API::getTopLevelMember("RestClient"),
+ API::getTopLevelMember("RestClient").getMember("Resource").getInstance()
+ ] and
+ requestNode =
+ connectionNode.getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and
+ requestUse = requestNode.getAnImmediateUse() and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ // `RestClient::Resource::new` takes an options hash argument, and we're
+ // looking for `{ verify_ssl: OpenSSL::SSL::VERIFY_NONE }`.
+ exists(DataFlow::Node arg, int i |
+ i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
+ |
+ // Either passed as an individual key:value argument, e.g.:
+ // RestClient::Resource.new(..., verify_ssl: OpenSSL::SSL::VERIFY_NONE)
+ isVerifySslNonePair(arg.asExpr().getExpr()) and
+ disablingNode = arg
+ or
+ // Or as a single hash argument, e.g.:
+ // RestClient::Resource.new(..., { verify_ssl: OpenSSL::SSL::VERIFY_NONE })
+ exists(DataFlow::LocalSourceNode optionsNode, Pair p |
+ p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
+ isVerifySslNonePair(p) and
+ optionsNode.flowsTo(arg) and
+ disablingNode.asExpr().getExpr() = p
+ )
+ )
+ }
+
+ override string getFramework() { result = "RestClient" }
+}
+
+/** Holds if `p` is the pair `verify_ssl: OpenSSL::SSL::VERIFY_NONE`. */
+private predicate isVerifySslNonePair(Pair p) {
+ exists(DataFlow::Node key, DataFlow::Node value |
+ key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
+ |
+ isSslVerifyModeLiteral(key) and
+ value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
+ )
+}
+
+/** Holds if `node` can represent the symbol literal `:verify_ssl`. */
+private predicate isSslVerifyModeLiteral(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "verify_ssl" and
+ literal.flowsTo(node)
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll
new file mode 100644
index 00000000000..38fa5288079
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll
@@ -0,0 +1,74 @@
+private import ruby
+private import codeql.ruby.Concepts
+private import codeql.ruby.ApiGraphs
+
+/**
+ * A call that makes an HTTP request using `Typhoeus`.
+ * ```ruby
+ * Typhoeus.get("http://example.com").body
+ * ```
+ */
+class TyphoeusHttpRequest extends HTTP::Client::Request::Range {
+ DataFlow::Node requestUse;
+ API::Node requestNode;
+
+ TyphoeusHttpRequest() {
+ requestUse = requestNode.getAnImmediateUse() and
+ requestNode =
+ API::getTopLevelMember("Typhoeus")
+ .getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and
+ this = requestUse.asExpr().getExpr()
+ }
+
+ override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
+
+ override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
+ // Check for `ssl_verifypeer: false` in the options hash.
+ exists(DataFlow::Node arg, int i |
+ i > 0 and arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i)
+ |
+ // Either passed as an individual key:value argument, e.g.:
+ // Typhoeus.get(..., ssl_verifypeer: false)
+ isSslVerifyPeerFalsePair(arg.asExpr().getExpr()) and
+ disablingNode = arg
+ or
+ // Or as a single hash argument, e.g.:
+ // Typhoeus.get(..., { ssl_verifypeer: false, ... })
+ exists(DataFlow::LocalSourceNode optionsNode, Pair p |
+ p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
+ isSslVerifyPeerFalsePair(p) and
+ optionsNode.flowsTo(arg) and
+ disablingNode.asExpr().getExpr() = p
+ )
+ )
+ }
+
+ override string getFramework() { result = "Typhoeus" }
+}
+
+/** Holds if `p` is the pair `ssl_verifypeer: false`. */
+private predicate isSslVerifyPeerFalsePair(Pair p) {
+ exists(DataFlow::Node key, DataFlow::Node value |
+ key.asExpr().getExpr() = p.getKey() and
+ value.asExpr().getExpr() = p.getValue()
+ |
+ isSslVerifyPeerLiteral(key) and
+ isFalse(value)
+ )
+}
+
+/** Holds if `node` represents the symbol literal `verify` or `verify_peer`. */
+private predicate isSslVerifyPeerLiteral(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verifypeer" and
+ literal.flowsTo(node)
+ )
+}
+
+/** Holds if `node` can contain the Boolean value `false`. */
+private predicate isFalse(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode literal |
+ literal.asExpr().getExpr().(BooleanLiteral).isFalse() and
+ literal.flowsTo(node)
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/printAst.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/printAst.qll
new file mode 100644
index 00000000000..0b5604dc670
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/printAst.qll
@@ -0,0 +1,203 @@
+/**
+ * Provides queries to pretty-print a Ruby abstract syntax tree as a graph.
+ *
+ * By default, this will print the AST for all nodes in the database. To change
+ * this behavior, extend `PrintASTConfiguration` and override `shouldPrintNode`
+ * to hold for only the AST nodes you wish to view.
+ */
+
+private import AST
+private import codeql.ruby.regexp.RegExpTreeView as RETV
+
+/** Holds if `n` appears in the desugaring of some other node. */
+predicate isDesugared(AstNode n) {
+ n = any(AstNode sugar).getDesugared()
+ or
+ isDesugared(n.getParent())
+}
+
+/**
+ * The query can extend this class to control which nodes are printed.
+ */
+class PrintAstConfiguration extends string {
+ PrintAstConfiguration() { this = "PrintAstConfiguration" }
+
+ /**
+ * Holds if the given node should be printed.
+ */
+ predicate shouldPrintNode(AstNode n) {
+ not isDesugared(n)
+ or
+ not n.isSynthesized()
+ or
+ n.isSynthesized() and
+ not n = any(AstNode sugar).getDesugared() and
+ exists(AstNode parent |
+ parent = n.getParent() and
+ not parent.isSynthesized() and
+ not n = parent.getDesugared()
+ )
+ }
+
+ predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) {
+ child = parent.getAChild(edgeName) and
+ not child = parent.getDesugared()
+ }
+}
+
+private predicate shouldPrintNode(AstNode n) {
+ any(PrintAstConfiguration config).shouldPrintNode(n)
+}
+
+private predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) {
+ any(PrintAstConfiguration config).shouldPrintAstEdge(parent, edgeName, child)
+}
+
+newtype TPrintNode =
+ TPrintRegularAstNode(AstNode n) { shouldPrintNode(n) } or
+ TPrintRegExpNode(RETV::RegExpTerm term) {
+ exists(RegExpLiteral literal |
+ shouldPrintNode(literal) and
+ term.getRootTerm() = literal.getParsed()
+ )
+ }
+
+/**
+ * A node in the output tree.
+ */
+class PrintAstNode extends TPrintNode {
+ /** Gets a textual representation of this node in the PrintAst output tree. */
+ string toString() { none() }
+
+ /**
+ * Gets the child node with name `edgeName`. Typically this is the name of the
+ * predicate used to access the child.
+ */
+ PrintAstNode getChild(string edgeName) { none() }
+
+ /** Gets a child of this node. */
+ final PrintAstNode getAChild() { result = getChild(_) }
+
+ /** Gets the parent of this node, if any. */
+ final PrintAstNode getParent() { result.getAChild() = this }
+
+ /**
+ * Holds if this node is at the specified location. The location spans column
+ * `startcolumn` of line `startline` to column `endcolumn` of line `endline`
+ * in file `filepath`. For more information, see
+ * [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ none()
+ }
+
+ /** Gets a value used to order this node amongst its siblings. */
+ int getOrder() { none() }
+
+ /**
+ * Gets the value of the property of this node, where the name of the property
+ * is `key`.
+ */
+ final string getProperty(string key) {
+ key = "semmle.label" and
+ result = this.toString()
+ or
+ key = "semmle.order" and result = this.getOrder().toString()
+ }
+}
+
+/** An `AstNode` in the output tree. */
+class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {
+ AstNode astNode;
+
+ PrintRegularAstNode() { this = TPrintRegularAstNode(astNode) }
+
+ override string toString() {
+ result = "[" + concat(astNode.getAPrimaryQlClass(), ", ") + "] " + astNode.toString()
+ }
+
+ override PrintAstNode getChild(string edgeName) {
+ exists(AstNode child | shouldPrintAstEdge(astNode, edgeName, child) |
+ result = TPrintRegularAstNode(child)
+ )
+ or
+ // If this AST node is a regexp literal, add the parsed regexp tree as a
+ // child.
+ exists(RETV::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
+ result = TPrintRegExpNode(t) and edgeName = "getParsed"
+ )
+ }
+
+ override int getOrder() {
+ this =
+ rank[result](PrintRegularAstNode p, Location l, File f |
+ l = p.getLocation() and
+ f = l.getFile()
+ |
+ p order by f.getBaseName(), f.getAbsolutePath(), l.getStartLine(), l.getStartColumn()
+ )
+ }
+
+ /** Gets the location of this node. */
+ Location getLocation() { result = astNode.getLocation() }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ astNode.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/** A parsed regexp node in the output tree. */
+class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
+ RETV::RegExpTerm regexNode;
+
+ PrintRegExpNode() { this = TPrintRegExpNode(regexNode) }
+
+ override string toString() {
+ result = "[" + concat(regexNode.getAPrimaryQlClass(), ", ") + "] " + regexNode.toString()
+ }
+
+ override PrintAstNode getChild(string edgeName) {
+ // Use the child index as an edge name.
+ exists(int i | result = TPrintRegExpNode(regexNode.getChild(i)) and edgeName = i.toString())
+ }
+
+ override int getOrder() { exists(RETV::RegExpTerm p | p.getChild(result) = regexNode) }
+
+ override predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ regexNode.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+ }
+}
+
+/**
+ * Holds if `node` belongs to the output tree, and its property `key` has the
+ * given `value`.
+ */
+query predicate nodes(PrintAstNode node, string key, string value) { value = node.getProperty(key) }
+
+/**
+ * Holds if `target` is a child of `source` in the AST, and property `key` of
+ * the edge has the given `value`.
+ */
+query predicate edges(PrintAstNode source, PrintAstNode target, string key, string value) {
+ target = source.getChild(_) and
+ (
+ key = "semmle.label" and
+ value = strictconcat(string name | source.getChild(name) = target | name, "/")
+ or
+ key = "semmle.order" and
+ value = target.getProperty("semmle.order")
+ )
+}
+
+/**
+ * Holds if property `key` of the graph has the given `value`.
+ */
+query predicate graphProperties(string key, string value) {
+ key = "semmle.graphKind" and value = "tree"
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ExponentialBackTracking.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ExponentialBackTracking.qll
new file mode 100644
index 00000000000..a805366bab8
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ExponentialBackTracking.qll
@@ -0,0 +1,343 @@
+private import ReDoSUtil
+private import RegExpTreeView
+private import codeql.Locations
+
+/*
+ * This query implements the analysis described in the following two papers:
+ *
+ * James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
+ * Regular Expression Denial-of-Service Attacks. NSS 2013.
+ * (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf)
+ * Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
+ * Exponential Runtime via Substructural Logics. 2014.
+ * (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
+ *
+ * The basic idea is to search for overlapping cycles in the NFA, that is,
+ * states `q` such that there are two distinct paths from `q` to itself
+ * that consume the same word `w`.
+ *
+ * For any such state `q`, an attack string can be constructed as follows:
+ * concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
+ * the word `w` that leads back to `q` along two different paths, followed
+ * by a suffix `x` that is _not_ accepted in state `q`. A backtracking
+ * implementation will need to explore at least 2^n different ways of going
+ * from `q` back to itself while trying to match the `n` copies of `w`
+ * before finally giving up.
+ *
+ * Now in order to identify overlapping cycles, all we have to do is find
+ * pumpable forks, that is, states `q` that can transition to two different
+ * states `r1` and `r2` on the same input symbol `c`, such that there are
+ * paths from both `r1` and `r2` to `q` that consume the same word. The latter
+ * condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
+ * in the product NFA.
+ *
+ * This is what the query does. It makes a simple attempt to construct a
+ * prefix `v` leading into `q`, but only to improve the alert message.
+ * And the query tries to prove the existence of a suffix that ensures
+ * rejection. This check might fail, which can cause false positives.
+ *
+ * Finally, sometimes it depends on the translation whether the NFA generated
+ * for a regular expression has a pumpable fork or not. We implement one
+ * particular translation, which may result in false positives or negatives
+ * relative to some particular JavaScript engine.
+ *
+ * More precisely, the query constructs an NFA from a regular expression `r`
+ * as follows:
+ *
+ * * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
+ * the state of the automaton before attempting to match the `i`th character in `t`.
+ * * There is one accepting state `Accept(r)`.
+ * * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
+ * by using an epsilon transition to `Accept(r)` and an any transition to itself.
+ * * Transitions between states may be labelled with epsilon, or an abstract
+ * input symbol.
+ * * Each abstract input symbol represents a set of concrete input characters:
+ * either a single character, a set of characters represented by a
+ * character class, or the set of all characters.
+ * * The product automaton is constructed lazily, starting with pair states
+ * `(q, q)` where `q` is a fork, and proceding along an over-approximate
+ * step relation.
+ * * The over-approximate step relation allows transitions along pairs of
+ * abstract input symbols where the symbols have overlap in the characters they accept.
+ * * Once a trace of pairs of abstract input symbols that leads from a fork
+ * back to itself has been identified, we attempt to construct a concrete
+ * string corresponding to it, which may fail.
+ * * Lastly we ensure that any state reached by repeating `n` copies of `w` has
+ * a suffix `x` (possible empty) that is most likely __not__ accepted.
+ */
+
+/**
+ * Holds if state `s` might be inside a backtracking repetition.
+ */
+pragma[noinline]
+private predicate stateInsideBacktracking(State s) {
+ s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
+}
+
+/**
+ * A infinitely repeating quantifier that might backtrack.
+ */
+private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
+ MaybeBacktrackingRepetition() {
+ exists(RegExpTerm child |
+ child instanceof RegExpAlt or
+ child instanceof RegExpQuantifier
+ |
+ child.getParent+() = this
+ )
+ }
+}
+
+/**
+ * A state in the product automaton.
+ *
+ * We lazily only construct those states that we are actually
+ * going to need: `(q, q)` for every fork state `q`, and any
+ * pair of states that can be reached from a pair that we have
+ * already constructed. To cut down on the number of states,
+ * we only represent states `(q1, q2)` where `q1` is lexicographically
+ * no bigger than `q2`.
+ *
+ * States are only constructed if both states in the pair are
+ * inside a repetition that might backtrack.
+ */
+private newtype TStatePair =
+ MkStatePair(State q1, State q2) {
+ isFork(q1, _, _, _, _) and q2 = q1
+ or
+ (step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
+ rankState(q1) <= rankState(q2)
+ }
+
+/**
+ * Gets a unique number for a `state`.
+ * Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
+ */
+private int rankState(State state) {
+ state =
+ rank[result](State s, Location l |
+ l = s.getRepr().getLocation()
+ |
+ s order by l.getStartLine(), l.getStartColumn(), s.toString()
+ )
+}
+
+/**
+ * A state in the product automaton.
+ */
+private class StatePair extends TStatePair {
+ State q1;
+ State q2;
+
+ StatePair() { this = MkStatePair(q1, q2) }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = "(" + q1 + ", " + q2 + ")" }
+
+ /** Gets the first component of the state pair. */
+ State getLeft() { result = q1 }
+
+ /** Gets the second component of the state pair. */
+ State getRight() { result = q2 }
+}
+
+/**
+ * Holds for all constructed state pairs.
+ *
+ * Used in `statePairDist`
+ */
+private predicate isStatePair(StatePair p) { any() }
+
+/**
+ * Holds if there are transitions from the components of `q` to the corresponding
+ * components of `r`.
+ *
+ * Used in `statePairDist`
+ */
+private predicate delta2(StatePair q, StatePair r) { step(q, _, _, r) }
+
+/**
+ * Gets the minimum length of a path from `q` to `r` in the
+ * product automaton.
+ */
+private int statePairDist(StatePair q, StatePair r) =
+ shortestDistances(isStatePair/1, delta2/2)(q, r, result)
+
+/**
+ * Holds if there are transitions from `q` to `r1` and from `q` to `r2`
+ * labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
+ * trivially have an empty intersection.
+ *
+ * This predicate only holds for states associated with regular expressions
+ * that have at least one repetition quantifier in them (otherwise the
+ * expression cannot be vulnerable to ReDoS attacks anyway).
+ */
+pragma[noopt]
+private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
+ stateInsideBacktracking(q) and
+ exists(State q1, State q2 |
+ q1 = epsilonSucc*(q) and
+ delta(q1, s1, r1) and
+ q2 = epsilonSucc*(q) and
+ delta(q2, s2, r2) and
+ // Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
+ // From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
+ // and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
+ exists(intersect(s1, s2))
+ |
+ s1 != s2
+ or
+ r1 != r2
+ or
+ r1 = r2 and q1 != q2
+ or
+ // If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
+ // one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
+ // despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
+ // To avoid every state in the loop becoming a fork state,
+ // we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
+ // (every epsilon-loop must contain such a state).
+ //
+ // We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
+ // This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
+ // The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`,
+ // and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
+ r1 = r2 and
+ q1 = q2 and
+ epsilonSucc+(q) = q and
+ exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
+ // One of the mid states is an infinite quantifier itself
+ exists(State mid, RegExpTerm term |
+ mid = epsilonSucc+(q) and
+ term = mid.getRepr() and
+ term instanceof InfiniteRepetitionQuantifier and
+ q = epsilonSucc+(mid) and
+ not mid = q
+ )
+ ) and
+ stateInsideBacktracking(r1) and
+ stateInsideBacktracking(r2)
+}
+
+/**
+ * Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
+ * one or the other is defined.
+ */
+private StatePair mkStatePair(State q1, State q2) {
+ result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
+}
+
+/**
+ * Holds if there are transitions from the components of `q` to the corresponding
+ * components of `r` labelled with `s1` and `s2`, respectively.
+ */
+private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
+ exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
+}
+
+/**
+ * Holds if there are transitions from the components of `q` to `r1` and `r2`
+ * labelled with `s1` and `s2`, respectively.
+ *
+ * We only consider transitions where the resulting states `(r1, r2)` are both
+ * inside a repetition that might backtrack.
+ */
+pragma[noopt]
+private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
+ exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
+ deltaClosed(q1, s1, r1) and
+ deltaClosed(q2, s2, r2) and
+ // use noopt to force the join on `intersect` to happen last.
+ exists(intersect(s1, s2))
+ ) and
+ stateInsideBacktracking(r1) and
+ stateInsideBacktracking(r2)
+}
+
+private newtype TTrace =
+ Nil() or
+ Step(InputSymbol s1, InputSymbol s2, TTrace t) {
+ exists(StatePair p |
+ isReachableFromFork(_, p, t, _) and
+ step(p, s1, s2, _)
+ )
+ or
+ t = Nil() and isFork(_, s1, s2, _, _)
+ }
+
+/**
+ * A list of pairs of input symbols that describe a path in the product automaton
+ * starting from some fork state.
+ */
+private class Trace extends TTrace {
+ /** Gets a textual representation of this element. */
+ string toString() {
+ this = Nil() and result = "Nil()"
+ or
+ exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
+ result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
+ )
+ }
+}
+
+/**
+ * Gets a string corresponding to the trace `t`.
+ */
+private string concretise(Trace t) {
+ t = Nil() and result = ""
+ or
+ exists(InputSymbol s1, InputSymbol s2, Trace rest | t = Step(s1, s2, rest) |
+ result = concretise(rest) + intersect(s1, s2)
+ )
+}
+
+/**
+ * Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
+ * a path from `r` back to `(fork, fork)` with `rem` steps.
+ */
+private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
+ // base case
+ exists(InputSymbol s1, InputSymbol s2, State q1, State q2 |
+ isFork(fork, s1, s2, q1, q2) and
+ r = MkStatePair(q1, q2) and
+ w = Step(s1, s2, Nil()) and
+ rem = statePairDist(r, MkStatePair(fork, fork))
+ )
+ or
+ // recursive case
+ exists(StatePair p, Trace v, InputSymbol s1, InputSymbol s2 |
+ isReachableFromFork(fork, p, v, rem + 1) and
+ step(p, s1, s2, r) and
+ w = Step(s1, s2, v) and
+ rem >= statePairDist(r, MkStatePair(fork, fork))
+ )
+}
+
+/**
+ * Gets a state in the product automaton from which `(fork, fork)` is
+ * reachable in zero or more epsilon transitions.
+ */
+private StatePair getAForkPair(State fork) {
+ isFork(fork, _, _, _, _) and
+ result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
+}
+
+/**
+ * Holds if `fork` is a pumpable fork with word `w`.
+ */
+private predicate isPumpable(State fork, string w) {
+ exists(StatePair q, Trace t |
+ isReachableFromFork(fork, q, t, _) and
+ q = getAForkPair(fork) and
+ w = concretise(t)
+ )
+}
+
+/**
+ * An instantiation of `ReDoSConfiguration` for exponential backtracking.
+ */
+class ExponentialReDoSConfiguration extends ReDoSConfiguration {
+ ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" }
+
+ override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ParseRegExp.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ParseRegExp.qll
new file mode 100644
index 00000000000..da7a7917307
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ParseRegExp.qll
@@ -0,0 +1,891 @@
+/**
+ * Library for parsing for Ruby regular expressions.
+ *
+ * N.B. does not yet handle stripping whitespace and comments in regexes with
+ * the `x` (free-spacing) flag.
+ */
+
+private import codeql.ruby.ast.Literal as AST
+private import codeql.Locations
+
+class RegExp extends AST::RegExpLiteral {
+ /**
+ * Helper predicate for `charSetStart(int start, int end)`.
+ *
+ * In order to identify left brackets ('[') which actually start a character class,
+ * we perform a left to right scan of the string.
+ *
+ * To avoid negative recursion we return a boolean. See `escaping`,
+ * the helper for `escapingChar`, for a clean use of this pattern.
+ *
+ * result is true for those start chars that actually mark a start of a char set.
+ */
+ boolean charSetStart(int pos) {
+ exists(int index |
+ // is opening bracket
+ this.charSetDelimiter(index, pos) = true and
+ (
+ // if this is the first bracket, `pos` starts a char set
+ index = 1 and result = true
+ or
+ // if the previous char set delimiter was not a closing bracket, `pos` does
+ // not start a char set. This is needed to handle cases such as `[[]` (a
+ // char set that matches the `[` char)
+ index > 1 and
+ not this.charSetDelimiter(index - 1, _) = false and
+ result = false
+ or
+ // special handling of cases such as `[][]` (the character-set of the characters `]` and `[`).
+ exists(int prevClosingBracketPos |
+ // previous bracket is a closing bracket
+ this.charSetDelimiter(index - 1, prevClosingBracketPos) = false and
+ if
+ // check if the character that comes before the previous closing bracket
+ // is an opening bracket (taking `^` into account)
+ // check if the character that comes before the previous closing bracket
+ // is an opening bracket (taking `^` into account)
+ exists(int posBeforePrevClosingBracket |
+ if this.getChar(prevClosingBracketPos - 1) = "^"
+ then posBeforePrevClosingBracket = prevClosingBracketPos - 2
+ else posBeforePrevClosingBracket = prevClosingBracketPos - 1
+ |
+ this.charSetDelimiter(index - 2, posBeforePrevClosingBracket) = true
+ )
+ then
+ // brackets without anything in between is not valid character ranges, so
+ // the first closing bracket in `[]]` and `[^]]` does not count,
+ //
+ // and we should _not_ mark the second opening bracket in `[][]` and `[^][]`
+ // as starting a new char set. ^ ^
+ exists(int posBeforePrevClosingBracket |
+ this.charSetDelimiter(index - 2, posBeforePrevClosingBracket) = true
+ |
+ result = this.charSetStart(posBeforePrevClosingBracket).booleanNot()
+ )
+ else
+ // if not, `pos` does in fact mark a real start of a character range
+ result = true
+ )
+ )
+ )
+ }
+
+ /**
+ * Helper predicate for chars that could be character-set delimiters.
+ * Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string.
+ * Result if `true` is the char is `[`, and `false` if the char is `]`.
+ */
+ boolean charSetDelimiter(int index, int pos) {
+ pos =
+ rank[index](int p |
+ (this.nonEscapedCharAt(p) = "[" or this.nonEscapedCharAt(p) = "]") and
+ // Brackets that art part of POSIX expressions should not count as
+ // char-set delimiters.
+ not exists(int x, int y |
+ this.posixStyleNamedCharacterProperty(x, y, _) and pos >= x and pos < y
+ )
+ ) and
+ (
+ this.nonEscapedCharAt(pos) = "[" and result = true
+ or
+ this.nonEscapedCharAt(pos) = "]" and result = false
+ )
+ }
+
+ predicate charSetStart(int start, int end) {
+ this.charSetStart(start) = true and
+ (
+ this.getChar(start + 1) = "^" and end = start + 2
+ or
+ not this.getChar(start + 1) = "^" and end = start + 1
+ )
+ }
+
+ /** Whether there is a character class, between start (inclusive) and end (exclusive) */
+ predicate charSet(int start, int end) {
+ exists(int innerStart, int innerEnd |
+ this.charSetStart(start, innerStart) and
+ not this.charSetStart(_, start)
+ |
+ end = innerEnd + 1 and
+ innerEnd =
+ min(int e |
+ e > innerStart and
+ this.nonEscapedCharAt(e) = "]" and
+ not exists(int x, int y |
+ this.posixStyleNamedCharacterProperty(x, y, _) and e >= x and e < y
+ )
+ |
+ e
+ )
+ )
+ }
+
+ predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
+ tokenStart =
+ rank[index](int start, int end | this.charSetToken(charsetStart, start, end) | start) and
+ this.charSetToken(charsetStart, tokenStart, tokenEnd)
+ }
+
+ /** Either a char or a - */
+ predicate charSetToken(int charsetStart, int start, int end) {
+ this.charSetStart(charsetStart, start) and
+ (
+ this.escapedCharacter(start, end)
+ or
+ this.namedCharacterProperty(start, end, _)
+ or
+ exists(this.nonEscapedCharAt(start)) and end = start + 1
+ )
+ or
+ this.charSetToken(charsetStart, _, start) and
+ (
+ this.escapedCharacter(start, end)
+ or
+ this.namedCharacterProperty(start, end, _)
+ or
+ exists(this.nonEscapedCharAt(start)) and
+ end = start + 1 and
+ not this.getChar(start) = "]"
+ )
+ }
+
+ predicate charSetChild(int charsetStart, int start, int end) {
+ this.charSetToken(charsetStart, start, end) and
+ not exists(int rangeStart, int rangeEnd |
+ this.charRange(charsetStart, rangeStart, _, _, rangeEnd) and
+ rangeStart <= start and
+ rangeEnd >= end
+ )
+ or
+ this.charRange(charsetStart, start, _, _, end)
+ }
+
+ predicate charRange(int charsetStart, int start, int lowerEnd, int upperStart, int end) {
+ exists(int index |
+ this.charRangeEnd(charsetStart, index) = true and
+ this.charSetToken(charsetStart, index - 2, start, lowerEnd) and
+ this.charSetToken(charsetStart, index, upperStart, end)
+ )
+ }
+
+ private boolean charRangeEnd(int charsetStart, int index) {
+ this.charSetToken(charsetStart, index, _, _) and
+ (
+ index in [1, 2] and result = false
+ or
+ index > 2 and
+ exists(int connectorStart |
+ this.charSetToken(charsetStart, index - 1, connectorStart, _) and
+ this.nonEscapedCharAt(connectorStart) = "-" and
+ result =
+ this.charRangeEnd(charsetStart, index - 2)
+ .booleanNot()
+ .booleanAnd(this.charRangeEnd(charsetStart, index - 1).booleanNot())
+ )
+ or
+ not exists(int connectorStart |
+ this.charSetToken(charsetStart, index - 1, connectorStart, _) and
+ this.nonEscapedCharAt(connectorStart) = "-"
+ ) and
+ result = false
+ )
+ }
+
+ predicate escapingChar(int pos) { this.escaping(pos) = true }
+
+ private boolean escaping(int pos) {
+ pos = -1 and result = false
+ or
+ this.getChar(pos) = "\\" and result = this.escaping(pos - 1).booleanNot()
+ or
+ this.getChar(pos) != "\\" and result = false
+ }
+
+ /** Gets the text of this regex */
+ string getText() { result = this.getValueText() }
+
+ string getChar(int i) { result = this.getText().charAt(i) }
+
+ string nonEscapedCharAt(int i) {
+ result = this.getText().charAt(i) and
+ not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
+ }
+
+ private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" }
+
+ private predicate isGroupEnd(int i) { this.nonEscapedCharAt(i) = ")" and not this.inCharSet(i) }
+
+ private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
+
+ predicate failedToParse(int i) {
+ exists(this.getChar(i)) and
+ not exists(int start, int end |
+ this.topLevel(start, end) and
+ start <= i and
+ end > i
+ )
+ }
+
+ /** Matches named character properties such as `\p{Word}` and `[[:digit:]]` */
+ predicate namedCharacterProperty(int start, int end, string name) {
+ pStyleNamedCharacterProperty(start, end, name) or
+ posixStyleNamedCharacterProperty(start, end, name)
+ }
+
+ /** Gets the name of the character property in start,end */
+ string getCharacterPropertyName(int start, int end) {
+ this.namedCharacterProperty(start, end, result)
+ }
+
+ /** Matches a POSIX bracket expression such as `[:alnum:]` within a character class. */
+ private predicate posixStyleNamedCharacterProperty(int start, int end, string name) {
+ this.getChar(start) = "[" and
+ this.getChar(start + 1) = ":" and
+ end =
+ min(int e |
+ e > start and
+ this.getChar(e - 2) = ":" and
+ this.getChar(e - 1) = "]"
+ |
+ e
+ ) and
+ exists(int nameStart |
+ this.getChar(start + 2) = "^" and nameStart = start + 3
+ or
+ not this.getChar(start + 2) = "^" and nameStart = start + 2
+ |
+ name = this.getText().substring(nameStart, end - 2)
+ )
+ }
+
+ /**
+ * Matches named character properties. For example:
+ * - `\p{Space}`
+ * - `\P{Digit}` upper-case P means inverted
+ * - `\p{^Word}` caret also means inverted
+ *
+ * These can occur both inside and outside of character classes.
+ */
+ private predicate pStyleNamedCharacterProperty(int start, int end, string name) {
+ this.escapingChar(start) and
+ this.getChar(start + 1) in ["p", "P"] and
+ this.getChar(start + 2) = "{" and
+ this.getChar(end - 1) = "}" and
+ end > start and
+ not exists(int i | start + 2 < i and i < end - 1 | this.getChar(i) = "}") and
+ exists(int nameStart |
+ this.getChar(start + 3) = "^" and nameStart = start + 4
+ or
+ not this.getChar(start + 3) = "^" and nameStart = start + 3
+ |
+ name = this.getText().substring(nameStart, end - 1)
+ )
+ }
+
+ /**
+ * Holds if the named character property is inverted. Examples for which it holds:
+ * - `\P{Digit}` upper-case P means inverted
+ * - `\p{^Word}` caret also means inverted
+ * - `[[:^digit:]]`
+ *
+ * Examples for which it doesn't hold:
+ * - `\p{Word}`
+ * - `\P{^Space}` - upper-case P and caret cancel each other out
+ * - `[[:alnum:]]`
+ */
+ predicate namedCharacterPropertyIsInverted(int start, int end) {
+ this.pStyleNamedCharacterProperty(start, end, _) and
+ exists(boolean upperP, boolean caret |
+ (if this.getChar(start + 1) = "P" then upperP = true else upperP = false) and
+ (if this.getChar(start + 3) = "^" then caret = true else caret = false)
+ |
+ upperP.booleanXor(caret) = true
+ )
+ or
+ this.posixStyleNamedCharacterProperty(start, end, _) and
+ this.getChar(start + 3) = "^"
+ }
+
+ predicate escapedCharacter(int start, int end) {
+ this.escapingChar(start) and
+ not this.numberedBackreference(start, _, _) and
+ not this.namedBackreference(start, _, _) and
+ not this.pStyleNamedCharacterProperty(start, _, _) and
+ (
+ // hex char \xhh
+ this.getChar(start + 1) = "x" and end = start + 4
+ or
+ // wide hex char \uhhhh
+ this.getChar(start + 1) = "u" and end = start + 6
+ or
+ // escape not handled above; update when adding a new case
+ not this.getChar(start + 1) in ["x", "u"] and
+ not exists(this.getChar(start + 1).toInt()) and
+ end = start + 2
+ )
+ }
+
+ predicate inCharSet(int index) {
+ exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2])
+ }
+
+ predicate inPosixBracket(int index) {
+ exists(int x, int y |
+ this.posixStyleNamedCharacterProperty(x, y, _) and index in [x + 1 .. y - 2]
+ )
+ }
+
+ /** 'Simple' characters are any that don't alter the parsing of the regex. */
+ private predicate simpleCharacter(int start, int end) {
+ end = start + 1 and
+ not this.charSet(start, _) and
+ not this.charSet(_, start + 1) and
+ not exists(int x, int y |
+ this.posixStyleNamedCharacterProperty(x, y, _) and
+ start >= x and
+ end <= y
+ ) and
+ exists(string c | c = this.getChar(start) |
+ exists(int x, int y, int z |
+ this.charSet(x, z) and
+ this.charSetStart(x, y)
+ |
+ start = y
+ or
+ start = z - 2
+ or
+ start > y and start < z - 2 and not this.charRange(_, _, start, end, _)
+ )
+ or
+ not this.inCharSet(start) and
+ not c = "(" and
+ not c = "[" and
+ not c = ")" and
+ not c = "|" and
+ not this.qualifier(start, _, _, _)
+ )
+ }
+
+ predicate character(int start, int end) {
+ (
+ this.simpleCharacter(start, end) and
+ not exists(int x, int y | this.escapedCharacter(x, y) and x <= start and y >= end)
+ or
+ this.escapedCharacter(start, end)
+ ) and
+ not exists(int x, int y | this.groupStart(x, y) and x <= start and y >= end) and
+ not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end) and
+ not exists(int x, int y |
+ this.pStyleNamedCharacterProperty(x, y, _) and x <= start and y >= end
+ )
+ }
+
+ predicate normalCharacter(int start, int end) {
+ this.character(start, end) and
+ not this.specialCharacter(start, end, _)
+ }
+
+ predicate specialCharacter(int start, int end, string char) {
+ this.character(start, end) and
+ not this.inCharSet(start) and
+ (
+ end = start + 1 and
+ char = this.getChar(start) and
+ (char = "$" or char = "^" or char = ".")
+ or
+ end = start + 2 and
+ this.escapingChar(start) and
+ char = this.getText().substring(start, end) and
+ char = ["\\A", "\\Z", "\\z"]
+ )
+ }
+
+ /** Whether the text in the range `start,end` is a group */
+ predicate group(int start, int end) {
+ this.groupContents(start, end, _, _)
+ or
+ this.emptyGroup(start, end)
+ }
+
+ /** Gets the number of the group in start,end */
+ int getGroupNumber(int start, int end) {
+ this.group(start, end) and
+ result =
+ count(int i | this.group(i, _) and i < start and not this.nonCapturingGroupStart(i, _)) + 1
+ }
+
+ /** Gets the name, if it has one, of the group in start,end */
+ string getGroupName(int start, int end) {
+ this.group(start, end) and
+ exists(int nameEnd |
+ this.namedGroupStart(start, nameEnd) and
+ result = this.getText().substring(start + 4, nameEnd - 1)
+ )
+ }
+
+ /** Whether the text in the range start, end is a group and can match the empty string. */
+ predicate zeroWidthMatch(int start, int end) {
+ this.emptyGroup(start, end)
+ or
+ this.negativeAssertionGroup(start, end)
+ or
+ this.positiveLookaheadAssertionGroup(start, end)
+ or
+ this.positiveLookbehindAssertionGroup(start, end)
+ }
+
+ predicate emptyGroup(int start, int end) {
+ exists(int endm1 | end = endm1 + 1 |
+ this.groupStart(start, endm1) and
+ this.isGroupEnd(endm1)
+ )
+ }
+
+ private predicate emptyMatchAtStartGroup(int start, int end) {
+ this.emptyGroup(start, end)
+ or
+ this.negativeAssertionGroup(start, end)
+ or
+ this.positiveLookaheadAssertionGroup(start, end)
+ }
+
+ private predicate emptyMatchAtEndGroup(int start, int end) {
+ this.emptyGroup(start, end)
+ or
+ this.negativeAssertionGroup(start, end)
+ or
+ this.positiveLookbehindAssertionGroup(start, end)
+ }
+
+ private predicate negativeAssertionGroup(int start, int end) {
+ exists(int inStart |
+ this.negativeLookaheadAssertionStart(start, inStart)
+ or
+ this.negativeLookbehindAssertionStart(start, inStart)
+ |
+ this.groupContents(start, end, inStart, _)
+ )
+ }
+
+ predicate negativeLookaheadAssertionGroup(int start, int end) {
+ exists(int inStart | this.negativeLookaheadAssertionStart(start, inStart) |
+ this.groupContents(start, end, inStart, _)
+ )
+ }
+
+ predicate negativeLookbehindAssertionGroup(int start, int end) {
+ exists(int inStart | this.negativeLookbehindAssertionStart(start, inStart) |
+ this.groupContents(start, end, inStart, _)
+ )
+ }
+
+ predicate positiveLookaheadAssertionGroup(int start, int end) {
+ exists(int inStart | this.lookaheadAssertionStart(start, inStart) |
+ this.groupContents(start, end, inStart, _)
+ )
+ }
+
+ predicate positiveLookbehindAssertionGroup(int start, int end) {
+ exists(int inStart | this.lookbehindAssertionStart(start, inStart) |
+ this.groupContents(start, end, inStart, _)
+ )
+ }
+
+ private predicate groupStart(int start, int end) {
+ this.nonCapturingGroupStart(start, end)
+ or
+ this.namedGroupStart(start, end)
+ or
+ this.lookaheadAssertionStart(start, end)
+ or
+ this.negativeLookaheadAssertionStart(start, end)
+ or
+ this.lookbehindAssertionStart(start, end)
+ or
+ this.negativeLookbehindAssertionStart(start, end)
+ or
+ this.commentGroupStart(start, end)
+ or
+ this.simpleGroupStart(start, end)
+ }
+
+ /** Matches the start of a non-capturing group, e.g. `(?:` */
+ private predicate nonCapturingGroupStart(int start, int end) {
+ this.isGroupStart(start) and
+ this.getChar(start + 1) = "?" and
+ this.getChar(start + 2) = ":" and
+ end = start + 3
+ }
+
+ /** Matches the start of a simple group, e.g. `(a+)`. */
+ private predicate simpleGroupStart(int start, int end) {
+ this.isGroupStart(start) and
+ this.getChar(start + 1) != "?" and
+ end = start + 1
+ }
+
+ /**
+ * Matches the start of a named group, such as:
+ * - `(?\w+)`
+ * - `(?'name'\w+)`
+ */
+ private predicate namedGroupStart(int start, int end) {
+ this.isGroupStart(start) and
+ this.getChar(start + 1) = "?" and
+ (
+ this.getChar(start + 2) = "<" and
+ not this.getChar(start + 3) = "=" and // (?<=foo) is a positive lookbehind assertion
+ not this.getChar(start + 3) = "!" and // (? start + 3 and this.getChar(i) = ">") and
+ end = nameEnd + 1
+ )
+ or
+ this.getChar(start + 2) = "'" and
+ exists(int nameEnd |
+ nameEnd = min(int i | i > start + 2 and this.getChar(i) = "'") and end = nameEnd + 1
+ )
+ )
+ }
+
+ /** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
+ private predicate lookaheadAssertionStart(int start, int end) {
+ this.isGroupStart(start) and
+ this.getChar(start + 1) = "?" and
+ this.getChar(start + 2) = "=" and
+ end = start + 3
+ }
+
+ /** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
+ private predicate negativeLookaheadAssertionStart(int start, int end) {
+ this.isGroupStart(start) and
+ this.getChar(start + 1) = "?" and
+ this.getChar(start + 2) = "!" and
+ end = start + 3
+ }
+
+ /** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
+ private predicate lookbehindAssertionStart(int start, int end) {
+ this.isGroupStart(start) and
+ this.getChar(start + 1) = "?" and
+ this.getChar(start + 2) = "<" and
+ this.getChar(start + 3) = "=" and
+ end = start + 4
+ }
+
+ /** Matches the start of a negative lookbehind assertion, i.e. `(?`. */
+ predicate namedBackreference(int start, int end, string name) {
+ this.escapingChar(start) and
+ this.getChar(start + 1) = "k" and
+ this.getChar(start + 2) = "<" and
+ exists(int nameEnd | nameEnd = min(int i | i > start + 3 and this.getChar(i) = ">") |
+ end = nameEnd + 1 and
+ name = this.getText().substring(start + 3, nameEnd)
+ )
+ }
+
+ /** Matches a numbered backreference, e.g. `\1`. */
+ predicate numberedBackreference(int start, int end, int value) {
+ this.escapingChar(start) and
+ not this.getChar(start + 1) = "0" and
+ exists(string text, string svalue, int len |
+ end = start + len and
+ text = this.getText() and
+ len in [2 .. 3]
+ |
+ svalue = text.substring(start + 1, start + len) and
+ value = svalue.toInt() and
+ not exists(text.substring(start + 1, start + len + 1).toInt()) and
+ value > 0
+ )
+ }
+
+ /** Whether the text in the range `start,end` is a back reference */
+ predicate backreference(int start, int end) {
+ this.numberedBackreference(start, end, _)
+ or
+ this.namedBackreference(start, end, _)
+ }
+
+ /** Gets the number of the back reference in start,end */
+ int getBackRefNumber(int start, int end) { this.numberedBackreference(start, end, result) }
+
+ /** Gets the name, if it has one, of the back reference in start,end */
+ string getBackRefName(int start, int end) { this.namedBackreference(start, end, result) }
+
+ private predicate baseItem(int start, int end) {
+ this.character(start, end) and
+ not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
+ or
+ this.group(start, end)
+ or
+ this.charSet(start, end)
+ or
+ this.backreference(start, end)
+ or
+ this.pStyleNamedCharacterProperty(start, end, _)
+ }
+
+ private predicate qualifier(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) {
+ this.shortQualifier(start, end, maybeEmpty, mayRepeatForever) and
+ not this.getChar(end) = "?"
+ or
+ exists(int shortEnd | this.shortQualifier(start, shortEnd, maybeEmpty, mayRepeatForever) |
+ if this.getChar(shortEnd) = "?" then end = shortEnd + 1 else end = shortEnd
+ )
+ }
+
+ private predicate shortQualifier(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) {
+ (
+ this.getChar(start) = "+" and maybeEmpty = false and mayRepeatForever = true
+ or
+ this.getChar(start) = "*" and maybeEmpty = true and mayRepeatForever = true
+ or
+ this.getChar(start) = "?" and maybeEmpty = true and mayRepeatForever = false
+ ) and
+ end = start + 1
+ or
+ exists(string lower, string upper |
+ this.multiples(start, end, lower, upper) and
+ (if lower = "" or lower.toInt() = 0 then maybeEmpty = true else maybeEmpty = false) and
+ if upper = "" then mayRepeatForever = true else mayRepeatForever = false
+ )
+ }
+
+ predicate multiples(int start, int end, string lower, string upper) {
+ exists(string text, string match, string inner |
+ text = this.getText() and
+ end = start + match.length() and
+ inner = match.substring(1, match.length() - 1)
+ |
+ match = text.regexpFind("\\{[0-9]+\\}", _, start) and
+ lower = inner and
+ upper = lower
+ or
+ match = text.regexpFind("\\{[0-9]*,[0-9]*\\}", _, start) and
+ exists(int commaIndex |
+ commaIndex = inner.indexOf(",") and
+ lower = inner.prefix(commaIndex) and
+ upper = inner.suffix(commaIndex + 1)
+ )
+ )
+ }
+
+ /**
+ * Whether the text in the range start,end is a qualified item, where item is a character,
+ * a character set or a group.
+ */
+ predicate qualifiedItem(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) {
+ this.qualifiedPart(start, _, end, maybeEmpty, mayRepeatForever)
+ }
+
+ predicate qualifiedPart(
+ int start, int partEnd, int end, boolean maybeEmpty, boolean mayRepeatForever
+ ) {
+ this.baseItem(start, partEnd) and
+ this.qualifier(partEnd, end, maybeEmpty, mayRepeatForever)
+ }
+
+ predicate item(int start, int end) {
+ this.qualifiedItem(start, end, _, _)
+ or
+ this.baseItem(start, end) and not this.qualifier(end, _, _, _)
+ }
+
+ private predicate subsequence(int start, int end) {
+ (
+ start = 0 or
+ this.groupStart(_, start) or
+ this.isOptionDivider(start - 1)
+ ) and
+ this.item(start, end)
+ or
+ exists(int mid |
+ this.subsequence(start, mid) and
+ this.item(mid, end)
+ )
+ }
+
+ /**
+ * Whether the text in the range start,end is a sequence of 1 or more items, where an item is a character,
+ * a character set or a group.
+ */
+ predicate sequence(int start, int end) {
+ this.sequenceOrQualified(start, end) and
+ not this.qualifiedItem(start, end, _, _)
+ }
+
+ private predicate sequenceOrQualified(int start, int end) {
+ this.subsequence(start, end) and
+ not this.itemStart(end)
+ }
+
+ private predicate itemStart(int start) {
+ this.character(start, _) or
+ this.isGroupStart(start) or
+ this.charSet(start, _) or
+ this.backreference(start, _) or
+ this.namedCharacterProperty(start, _, _)
+ }
+
+ private predicate itemEnd(int end) {
+ this.character(_, end)
+ or
+ exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
+ or
+ this.charSet(_, end)
+ or
+ this.qualifier(_, end, _, _)
+ }
+
+ private predicate topLevel(int start, int end) {
+ this.subalternation(start, end, _) and
+ not this.isOptionDivider(end)
+ }
+
+ private predicate subalternation(int start, int end, int itemStart) {
+ this.sequenceOrQualified(start, end) and
+ not this.isOptionDivider(start - 1) and
+ itemStart = start
+ or
+ start = end and
+ not this.itemEnd(start) and
+ this.isOptionDivider(end) and
+ itemStart = start
+ or
+ exists(int mid |
+ this.subalternation(start, mid, _) and
+ this.isOptionDivider(mid) and
+ itemStart = mid + 1
+ |
+ this.sequenceOrQualified(itemStart, end)
+ or
+ not this.itemStart(end) and end = itemStart
+ )
+ }
+
+ /**
+ * Whether the text in the range start,end is an alternation
+ */
+ predicate alternation(int start, int end) {
+ this.topLevel(start, end) and
+ exists(int less | this.subalternation(start, less, _) and less < end)
+ }
+
+ /**
+ * Whether the text in the range start,end is an alternation and the text in partStart, partEnd is one of the
+ * options in that alternation.
+ */
+ predicate alternationOption(int start, int end, int partStart, int partEnd) {
+ this.alternation(start, end) and
+ this.subalternation(start, partEnd, partStart)
+ }
+
+ /** A part of the regex that may match the start of the string. */
+ private predicate firstPart(int start, int end) {
+ start = 0 and end = this.getText().length()
+ or
+ exists(int x | this.firstPart(x, end) |
+ this.emptyMatchAtStartGroup(x, start)
+ or
+ this.qualifiedItem(x, start, true, _)
+ or
+ // ^ and \A match the start of the string
+ this.specialCharacter(x, start, ["^", "\\A"])
+ )
+ or
+ exists(int y | this.firstPart(start, y) |
+ this.item(start, end)
+ or
+ this.qualifiedPart(start, end, y, _, _)
+ )
+ or
+ exists(int x, int y | this.firstPart(x, y) |
+ this.groupContents(x, y, start, end)
+ or
+ this.alternationOption(x, y, start, end)
+ )
+ }
+
+ /** A part of the regex that may match the end of the string. */
+ private predicate lastPart(int start, int end) {
+ start = 0 and end = this.getText().length()
+ or
+ exists(int y | this.lastPart(start, y) |
+ this.emptyMatchAtEndGroup(end, y)
+ or
+ this.qualifiedItem(end, y, true, _)
+ or
+ // $, \Z, and \z match the end of the string.
+ this.specialCharacter(end, y, ["$", "\\Z", "\\z"])
+ )
+ or
+ exists(int x |
+ this.lastPart(x, end) and
+ this.item(start, end)
+ )
+ or
+ exists(int y | this.lastPart(start, y) | this.qualifiedPart(start, end, y, _, _))
+ or
+ exists(int x, int y | this.lastPart(x, y) |
+ this.groupContents(x, y, start, end)
+ or
+ this.alternationOption(x, y, start, end)
+ )
+ }
+
+ /**
+ * Whether the item at [start, end) is one of the first items
+ * to be matched.
+ */
+ predicate firstItem(int start, int end) {
+ (
+ this.character(start, end)
+ or
+ this.qualifiedItem(start, end, _, _)
+ or
+ this.charSet(start, end)
+ ) and
+ this.firstPart(start, end)
+ }
+
+ /**
+ * Whether the item at [start, end) is one of the last items
+ * to be matched.
+ */
+ predicate lastItem(int start, int end) {
+ (
+ this.character(start, end)
+ or
+ this.qualifiedItem(start, end, _, _)
+ or
+ this.charSet(start, end)
+ ) and
+ this.lastPart(start, end)
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSCustomizations.qll
new file mode 100644
index 00000000000..3d3655ad3a9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSCustomizations.qll
@@ -0,0 +1,131 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * polynomial regular expression denial-of-service attacks, as well
+ * as extension points for adding your own.
+ */
+
+private import codeql.ruby.AST as AST
+private import codeql.ruby.CFG
+private import codeql.ruby.DataFlow
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.regexp.ParseRegExp as RegExp
+private import codeql.ruby.regexp.RegExpTreeView
+private import codeql.ruby.regexp.SuperlinearBackTracking
+
+module PolynomialReDoS {
+ /**
+ * A data flow source node for polynomial regular expression denial-of-service vulnerabilities.
+ */
+ abstract class Source extends DataFlow::Node { }
+
+ /**
+ * A data flow sink node for polynomial regular expression denial-of-service vulnerabilities.
+ */
+ abstract class Sink extends DataFlow::Node {
+ /** Gets the regex that is being executed by this node. */
+ abstract RegExpTerm getRegExp();
+
+ /** Gets the node to highlight in the alert message. */
+ DataFlow::Node getHighlight() { result = this }
+ }
+
+ /**
+ * A sanitizer for polynomial regular expression denial-of-service vulnerabilities.
+ */
+ abstract class Sanitizer extends DataFlow::Node { }
+
+ /**
+ * A sanitizer guard for polynomial regular expression denial of service
+ * vulnerabilities.
+ */
+ abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
+
+ /**
+ * A source of remote user input, considered as a flow source.
+ */
+ class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+
+ /**
+ * Gets the AST of a regular expression object that can flow to `node`.
+ */
+ RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) {
+ exists(DataFlow::LocalSourceNode regexp |
+ regexp.flowsTo(node) and
+ result = regexp.asExpr().(CfgNodes::ExprNodes::RegExpLiteralCfgNode).getExpr().getParsed()
+ )
+ }
+
+ /**
+ * A regexp match against a superlinear backtracking term, seen as a sink for
+ * polynomial regular expression denial-of-service vulnerabilities.
+ */
+ class PolynomialBackTrackingTermMatch extends Sink {
+ PolynomialBackTrackingTerm term;
+ DataFlow::ExprNode matchNode;
+
+ PolynomialBackTrackingTermMatch() {
+ exists(DataFlow::Node regexp |
+ term.getRootTerm() = getRegExpObjectFromNode(regexp) and
+ (
+ // `=~` or `!~`
+ exists(CfgNodes::ExprNodes::BinaryOperationCfgNode op |
+ matchNode.asExpr() = op and
+ (
+ op.getExpr() instanceof AST::RegExpMatchExpr or
+ op.getExpr() instanceof AST::NoRegExpMatchExpr
+ ) and
+ (
+ this.asExpr() = op.getLeftOperand() and regexp.asExpr() = op.getRightOperand()
+ or
+ this.asExpr() = op.getRightOperand() and regexp.asExpr() = op.getLeftOperand()
+ )
+ )
+ or
+ // Any of the methods on `String` that take a regexp.
+ exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
+ matchNode.asExpr() = call and
+ call.getExpr().getMethodName() =
+ [
+ "[]", "gsub", "gsub!", "index", "match", "match?", "partition", "rindex",
+ "rpartition", "scan", "slice!", "split", "sub", "sub!"
+ ] and
+ this.asExpr() = call.getReceiver() and
+ regexp.asExpr() = call.getArgument(0)
+ )
+ or
+ // A call to `match` or `match?` where the regexp is the receiver.
+ exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
+ matchNode.asExpr() = call and
+ call.getExpr().getMethodName() = ["match", "match?"] and
+ regexp.asExpr() = call.getReceiver() and
+ this.asExpr() = call.getArgument(0)
+ )
+ )
+ )
+ }
+
+ override RegExpTerm getRegExp() { result = term }
+
+ override DataFlow::Node getHighlight() { result = matchNode }
+ }
+
+ /**
+ * A check on the length of a string, seen as a sanitizer guard.
+ */
+ class LengthGuard extends SanitizerGuard, CfgNodes::ExprNodes::RelationalOperationCfgNode {
+ private DataFlow::Node input;
+
+ LengthGuard() {
+ exists(DataFlow::CallNode length, DataFlow::ExprNode operand |
+ length.asExpr().getExpr().(AST::MethodCall).getMethodName() = "length" and
+ length.getReceiver() = input and
+ length.flowsTo(operand) and
+ operand.getExprNode() = this.getAnOperand()
+ )
+ }
+
+ override predicate checks(CfgNode node, boolean branch) {
+ node = input.asExpr() and branch = true
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSQuery.qll
new file mode 100644
index 00000000000..db7269d7fdb
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/PolynomialReDoSQuery.qll
@@ -0,0 +1,37 @@
+/**
+ * Provides a taint tracking configuration for reasoning about polynomial
+ * regular expression denial-of-service attacks.
+ *
+ * Note, for performance reasons: only import this file if `Configuration` is
+ * needed. Otherwise, `PolynomialReDoSCustomizations` should be imported
+ * instead.
+ */
+
+private import codeql.ruby.DataFlow
+private import codeql.ruby.TaintTracking
+
+/**
+ * Provides a taint-tracking configuration for detecting polynomial regular
+ * expression denial of service vulnerabilities.
+ */
+module PolynomialReDoS {
+ import PolynomialReDoSCustomizations::PolynomialReDoS
+
+ /**
+ * A taint-tracking configuration for detecting polynomial regular expression
+ * denial of service vulnerabilities.
+ */
+ class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "PolynomialReDoS" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+ override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard node) {
+ node instanceof SanitizerGuard
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ReDoSUtil.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ReDoSUtil.qll
new file mode 100644
index 00000000000..496983ea849
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/ReDoSUtil.qll
@@ -0,0 +1,1186 @@
+/**
+ * Provides classes for working with regular expressions that can
+ * perform backtracking in superlinear/exponential time.
+ *
+ * This module contains a number of utility predicates for compiling a regular expression into a NFA and reasoning about this NFA.
+ *
+ * The `ReDoSConfiguration` contains a `isReDoSCandidate` predicate that is used to
+ * to determine which states the prefix/suffix search should happen on.
+ * There is only meant to exist one `ReDoSConfiguration` at a time.
+ *
+ * The predicate `hasReDoSResult` outputs a de-duplicated set of
+ * states that will cause backtracking (a rejecting suffix exists).
+ */
+
+import RegExpTreeView
+private import codeql.Locations
+
+/**
+ * A configuration for which parts of a regular expression should be considered relevant for
+ * the different predicates in `ReDoS.qll`.
+ * Used to adjust the computations for either superlinear or exponential backtracking.
+ */
+abstract class ReDoSConfiguration extends string {
+ bindingset[this]
+ ReDoSConfiguration() { any() }
+
+ /**
+ * Holds if `state` with the pump string `pump` is a candidate for a
+ * ReDoS vulnerable state.
+ * This is used to determine which states are considered for the prefix/suffix construction.
+ */
+ abstract predicate isReDoSCandidate(State state, string pump);
+}
+
+/**
+ * Holds if repeating `pump' starting at `state` is a candidate for causing backtracking.
+ * No check whether a rejected suffix exists has been made.
+ */
+private predicate isReDoSCandidate(State state, string pump) {
+ any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
+ (
+ not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
+ or
+ epsilonSucc+(state) = state and
+ state =
+ max(State s, Location l |
+ s = epsilonSucc+(state) and
+ l = s.getRepr().getLocation() and
+ any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
+ s.getRepr() instanceof InfiniteRepetitionQuantifier
+ |
+ s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
+ )
+ )
+}
+
+/**
+ * Gets the char after `c` (from a simplified ASCII table).
+ */
+private string nextChar(string c) { exists(int code | code = ascii(c) | code + 1 = ascii(result)) }
+
+/**
+ * Gets an approximation for the ASCII code for `char`.
+ * Only the easily printable chars are included (so no newline, tab, null, etc).
+ */
+private int ascii(string char) {
+ char =
+ rank[result](string c |
+ c =
+ "! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
+ .charAt(_)
+ )
+}
+
+/**
+ * Holds if `t` matches at least an epsilon symbol.
+ *
+ * That is, this term does not restrict the language of the enclosing regular expression.
+ *
+ * This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
+ */
+predicate matchesEpsilon(RegExpTerm t) {
+ t instanceof RegExpStar
+ or
+ t instanceof RegExpOpt
+ or
+ t.(RegExpRange).getLowerBound() = 0
+ or
+ exists(RegExpTerm child |
+ child = t.getAChild() and
+ matchesEpsilon(child)
+ |
+ t instanceof RegExpAlt or
+ t instanceof RegExpGroup or
+ t instanceof RegExpPlus or
+ t instanceof RegExpRange
+ )
+ or
+ matchesEpsilon(t.(RegExpBackRef).getGroup())
+ or
+ forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child))
+}
+
+/**
+ * A lookahead/lookbehind that matches the empty string.
+ */
+class EmptyPositiveSubPatttern extends RegExpSubPattern {
+ EmptyPositiveSubPatttern() {
+ (
+ this instanceof RegExpPositiveLookahead
+ or
+ this instanceof RegExpPositiveLookbehind
+ ) and
+ matchesEpsilon(this.getOperand())
+ }
+}
+
+/**
+ * A branch in a disjunction that is the root node in a literal, or a literal
+ * whose root node is not a disjunction.
+ */
+class RegExpRoot extends RegExpTerm {
+ RegExpParent parent;
+
+ RegExpRoot() {
+ exists(RegExpAlt alt |
+ alt.isRootTerm() and
+ this = alt.getAChild() and
+ parent = alt.getParent()
+ )
+ or
+ this.isRootTerm() and
+ not this instanceof RegExpAlt and
+ parent = this.getParent()
+ }
+
+ /**
+ * Holds if this root term is relevant to the ReDoS analysis.
+ */
+ predicate isRelevant() {
+ // there is at least one repetition
+ getRoot(any(InfiniteRepetitionQuantifier q)) = this and
+ // there are no lookbehinds
+ not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and
+ // is actually used as a RegExp
+ isUsedAsRegExp() //and
+ // // pragmatic performance optimization: ignore minified files.
+ // not getRootTerm().getParent().(Expr).getTopLevel().isMinified()
+ }
+}
+
+/**
+ * A constant in a regular expression that represents valid Unicode character(s).
+ */
+private class RegexpCharacterConstant extends RegExpConstant {
+ RegexpCharacterConstant() { this.isCharacter() }
+}
+
+/**
+ * Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
+ *
+ * Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
+ * The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
+ */
+private predicate isCanonicalTerm(RegExpTerm term, string str) {
+ term =
+ rank[1](RegExpTerm t, Location loc, File file |
+ loc = t.getLocation() and
+ file = t.getFile() and
+ str = t.getRawValue()
+ |
+ t order by t.getFile().getRelativePath(), loc.getStartLine(), loc.getStartColumn()
+ )
+}
+
+/**
+ * An abstract input symbol, representing a set of concrete characters.
+ */
+private newtype TInputSymbol =
+ /** An input symbol corresponding to character `c`. */
+ Char(string c) {
+ c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
+ } or
+ /**
+ * An input symbol representing all characters matched by
+ * a (non-universal) character class that has string representation `charClassString`.
+ */
+ CharClass(string charClassString) {
+ exists(RegExpTerm term | term.getRawValue() = charClassString | getRoot(term).isRelevant()) and
+ exists(RegExpTerm recc | isCanonicalTerm(recc, charClassString) |
+ recc instanceof RegExpCharacterClass and
+ not recc.(RegExpCharacterClass).isUniversalClass()
+ or
+ recc instanceof RegExpCharacterClassEscape
+ or
+ recc instanceof RegExpNamedCharacterProperty
+ )
+ } or
+ /** An input symbol representing all characters matched by `.`. */
+ Dot() or
+ /** An input symbol representing all characters. */
+ Any() or
+ /** An epsilon transition in the automaton. */
+ Epsilon()
+
+/**
+ * Gets the canonical CharClass for `term`.
+ */
+CharClass getCanonicalCharClass(RegExpTerm term) {
+ exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
+}
+
+/**
+ * Holds if `a` and `b` are input symbols from the same regexp.
+ */
+private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
+ exists(RegExpRoot root |
+ belongsTo(a, root) and
+ belongsTo(b, root)
+ )
+}
+
+/**
+ * Holds if the `a` is an input symbol from a regexp that has root `root`.
+ */
+private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
+ exists(State s | getRoot(s.getRepr()) = root |
+ delta(s, a, _)
+ or
+ delta(_, a, s)
+ )
+}
+
+/**
+ * An abstract input symbol, representing a set of concrete characters.
+ */
+class InputSymbol extends TInputSymbol {
+ InputSymbol() { not this instanceof Epsilon }
+
+ /**
+ * Gets a string representation of this input symbol.
+ */
+ string toString() {
+ this = Char(result)
+ or
+ this = CharClass(result)
+ or
+ this = Dot() and result = "."
+ or
+ this = Any() and result = "[^]"
+ }
+}
+
+/**
+ * An abstract input symbol that represents a character class.
+ */
+abstract private class CharacterClass extends InputSymbol {
+ /**
+ * Gets a character that is relevant for intersection-tests involving this
+ * character class.
+ *
+ * Specifically, this is any of the characters mentioned explicitly in the
+ * character class, offset by one if it is inverted. For character class escapes,
+ * the result is as if the class had been written out as a series of intervals.
+ *
+ * This set is large enough to ensure that for any two intersecting character
+ * classes, one contains a relevant character from the other.
+ */
+ abstract string getARelevantChar();
+
+ /**
+ * Holds if this character class matches `char`.
+ */
+ bindingset[char]
+ abstract predicate matches(string char);
+
+ /**
+ * Gets a character matched by this character class.
+ */
+ string choose() { result = getARelevantChar() and matches(result) }
+}
+
+/**
+ * Provides implementations for `CharacterClass`.
+ */
+private module CharacterClasses {
+ /**
+ * Holds if the character class `cc` has a child (constant or range) that matches `char`.
+ */
+ pragma[noinline]
+ predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
+ exists(getCanonicalCharClass(cc)) and
+ exists(RegExpTerm child | child = cc.getAChild() |
+ char = child.(RegexpCharacterConstant).getValue()
+ or
+ rangeMatchesOnLetterOrDigits(child, char)
+ or
+ not rangeMatchesOnLetterOrDigits(child, _) and
+ char = getARelevantChar() and
+ exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) |
+ lo <= char and
+ char <= hi
+ )
+ or
+ exists(RegExpCharacterClassEscape escape | escape = child |
+ escape.getValue() = escape.getValue().toLowerCase() and
+ classEscapeMatches(escape.getValue(), char)
+ or
+ char = getARelevantChar() and
+ escape.getValue() = escape.getValue().toUpperCase() and
+ not classEscapeMatches(escape.getValue().toLowerCase(), char)
+ )
+ or
+ exists(RegExpNamedCharacterProperty charProp | charProp = child |
+ not charProp.isInverted() and
+ namedCharacterPropertyMatches(charProp.getName(), char)
+ or
+ char = getARelevantChar() and
+ charProp.isInverted() and
+ not namedCharacterPropertyMatches(charProp.getName(), char)
+ )
+ )
+ }
+
+ /**
+ * Holds if `range` is a range on lower-case, upper-case, or digits, and matches `char`.
+ * This predicate is used to restrict the searchspace for ranges by only joining `getAnyPossiblyMatchedChar`
+ * on a few ranges.
+ */
+ private predicate rangeMatchesOnLetterOrDigits(RegExpCharacterRange range, string char) {
+ exists(string lo, string hi |
+ range.isRange(lo, hi) and lo = lowercaseLetter() and hi = lowercaseLetter()
+ |
+ lo <= char and
+ char <= hi and
+ char = lowercaseLetter()
+ )
+ or
+ exists(string lo, string hi |
+ range.isRange(lo, hi) and lo = upperCaseLetter() and hi = upperCaseLetter()
+ |
+ lo <= char and
+ char <= hi and
+ char = upperCaseLetter()
+ )
+ or
+ exists(string lo, string hi | range.isRange(lo, hi) and lo = digit() and hi = digit() |
+ lo <= char and
+ char <= hi and
+ char = digit()
+ )
+ }
+
+ private string lowercaseLetter() { result = "abdcefghijklmnopqrstuvwxyz".charAt(_) }
+
+ private string upperCaseLetter() { result = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".charAt(_) }
+
+ private string digit() { result = [0 .. 9].toString() }
+
+ /**
+ * Gets a char that could be matched by a regular expression.
+ * Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`.
+ */
+ string getARelevantChar() {
+ exists(ascii(result))
+ or
+ exists(RegexpCharacterConstant c | result = c.getValue().charAt(_))
+ or
+ classEscapeMatches(_, result)
+ }
+
+ /**
+ * Gets a char that is mentioned in the character class `c`.
+ */
+ private string getAMentionedChar(RegExpCharacterClass c) {
+ exists(RegExpTerm child | child = c.getAChild() |
+ result = child.(RegexpCharacterConstant).getValue()
+ or
+ child.(RegExpCharacterRange).isRange(result, _)
+ or
+ child.(RegExpCharacterRange).isRange(_, result)
+ or
+ exists(RegExpCharacterClassEscape escape | child = escape |
+ result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+ or
+ result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+ )
+ or
+ exists(RegExpNamedCharacterProperty charProp | child = charProp |
+ result = min(string s | namedCharacterPropertyMatches(charProp.getName(), s))
+ or
+ result = max(string s | namedCharacterPropertyMatches(charProp.getName(), s))
+ )
+ )
+ }
+
+ /**
+ * An implementation of `CharacterClass` for positive (non inverted) character classes.
+ */
+ private class PositiveCharacterClass extends CharacterClass {
+ RegExpCharacterClass cc;
+
+ PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
+
+ override string getARelevantChar() { result = getAMentionedChar(cc) }
+
+ override predicate matches(string char) { hasChildThatMatches(cc, char) }
+ }
+
+ /**
+ * An implementation of `CharacterClass` for inverted character classes.
+ */
+ private class InvertedCharacterClass extends CharacterClass {
+ RegExpCharacterClass cc;
+
+ InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
+
+ override string getARelevantChar() {
+ result = nextChar(getAMentionedChar(cc)) or
+ nextChar(result) = getAMentionedChar(cc)
+ }
+
+ bindingset[char]
+ override predicate matches(string char) { not hasChildThatMatches(cc, char) }
+ }
+
+ /**
+ * Holds if the character class escape `clazz` (\d, \s, or \w) matches `char`.
+ */
+ pragma[noinline]
+ private predicate classEscapeMatches(string clazz, string char) {
+ clazz = "d" and
+ char = "0123456789".charAt(_)
+ or
+ clazz = "s" and
+ char = [" ", "\t", "\r", "\n", 11.toUnicode(), 12.toUnicode()] // 11.toUnicode() = \v, 12.toUnicode() = \f'
+ or
+ clazz = "w" and
+ char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
+ }
+
+ /**
+ * Holds if the named character property (e.g. from a POSIX bracket
+ * expression) `propName` matches `char`. For example, it holds when `name` is
+ * `"word"` and `char` is `"a"`.
+ *
+ * TODO: expand to cover more properties.
+ */
+ private predicate namedCharacterPropertyMatches(string propName, string char) {
+ propName = ["digit", "Digit"] and
+ char = "0123456789".charAt(_)
+ or
+ propName = ["space", "Space"] and
+ (
+ char = [" ", "\t", "\r", "\n"]
+ or
+ char = getARelevantChar() and
+ char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed)
+ )
+ or
+ propName = ["word", "Word"] and
+ char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
+ }
+
+ /**
+ * An implementation of `CharacterClass` for \d, \s, and \w.
+ */
+ private class PositiveCharacterClassEscape extends CharacterClass {
+ RegExpCharacterClassEscape cc;
+
+ PositiveCharacterClassEscape() {
+ this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+ }
+
+ override string getARelevantChar() {
+ cc.getValue() = "d" and
+ result = ["0", "9"]
+ or
+ cc.getValue() = "s" and
+ result = [" "]
+ or
+ cc.getValue() = "w" and
+ result = ["a", "Z", "_", "0", "9"]
+ }
+
+ override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+
+ override string choose() {
+ cc.getValue() = "d" and
+ result = "9"
+ or
+ cc.getValue() = "s" and
+ result = [" "]
+ or
+ cc.getValue() = "w" and
+ result = "a"
+ }
+ }
+
+ /**
+ * An implementation of `CharacterClass` for \D, \S, and \W.
+ */
+ private class NegativeCharacterClassEscape extends CharacterClass {
+ RegExpCharacterClassEscape cc;
+
+ NegativeCharacterClassEscape() {
+ this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+ }
+
+ override string getARelevantChar() {
+ cc.getValue() = "D" and
+ result = ["a", "Z", "!"]
+ or
+ cc.getValue() = "S" and
+ result = ["a", "9", "!"]
+ or
+ cc.getValue() = "W" and
+ result = [" ", "!"]
+ }
+
+ bindingset[char]
+ override predicate matches(string char) {
+ not classEscapeMatches(cc.getValue().toLowerCase(), char)
+ }
+ }
+
+ /**
+ * An implementation of `NamedCharacterProperty` for positive (non-inverted)
+ * character properties.
+ */
+ private class PositiveNamedCharacterProperty extends CharacterClass {
+ RegExpNamedCharacterProperty cp;
+
+ PositiveNamedCharacterProperty() { this = getCanonicalCharClass(cp) and not cp.isInverted() }
+
+ override string getARelevantChar() {
+ exists(string lowerName | lowerName = cp.getName().toLowerCase() |
+ lowerName = "digit" and
+ result = ["0", "9"]
+ or
+ lowerName = "space" and
+ result = [" "]
+ or
+ lowerName = "word" and
+ result = ["a", "Z", "_", "0", "9"]
+ )
+ }
+
+ override predicate matches(string char) { namedCharacterPropertyMatches(cp.getName(), char) }
+
+ override string choose() {
+ exists(string lowerName | lowerName = cp.getName().toLowerCase() |
+ lowerName = "digit" and
+ result = "9"
+ or
+ lowerName = "space" and
+ result = " "
+ or
+ lowerName = "word" and
+ result = "a"
+ )
+ }
+ }
+
+ private class InvertedNamedCharacterProperty extends CharacterClass {
+ RegExpNamedCharacterProperty cp;
+
+ InvertedNamedCharacterProperty() { this = getCanonicalCharClass(cp) and cp.isInverted() }
+
+ override string getARelevantChar() {
+ exists(string lowerName | lowerName = cp.getName().toLowerCase() |
+ lowerName = "digit" and
+ result = ["a", "Z", "!"]
+ or
+ lowerName = "space" and
+ result = ["a", "9", "!"]
+ or
+ lowerName = "word" and
+ result = [" ", "!"]
+ )
+ }
+
+ bindingset[char]
+ override predicate matches(string char) {
+ not namedCharacterPropertyMatches(cp.getName(), char)
+ }
+ }
+}
+
+private class EdgeLabel extends TInputSymbol {
+ string toString() {
+ this = Epsilon() and result = ""
+ or
+ exists(InputSymbol s | this = s and result = s.toString())
+ }
+}
+
+/**
+ * Gets the state before matching `t`.
+ */
+pragma[inline]
+private State before(RegExpTerm t) { result = Match(t, 0) }
+
+/**
+ * Gets a state the NFA may be in after matching `t`.
+ */
+private State after(RegExpTerm t) {
+ exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
+ or
+ exists(RegExpSequence seq, int i | t = seq.getChild(i) |
+ result = before(seq.getChild(i + 1))
+ or
+ i + 1 = seq.getNumChild() and result = after(seq)
+ )
+ or
+ exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
+ or
+ exists(RegExpStar star | t = star.getAChild() | result = before(star))
+ or
+ exists(RegExpPlus plus | t = plus.getAChild() |
+ result = before(plus) or
+ result = after(plus)
+ )
+ or
+ exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
+ or
+ exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
+}
+
+/**
+ * Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`.
+ */
+predicate delta(State q1, EdgeLabel lbl, State q2) {
+ exists(RegexpCharacterConstant s, int i |
+ q1 = Match(s, i) and
+ lbl = Char(s.getValue().charAt(i)) and
+ (
+ q2 = Match(s, i + 1)
+ or
+ s.getValue().length() = i + 1 and
+ q2 = after(s)
+ )
+ )
+ or
+ exists(RegExpDot dot | q1 = before(dot) and q2 = after(dot) |
+ if dot.getLiteral().isDotAll() then lbl = Any() else lbl = Dot()
+ )
+ or
+ exists(RegExpCharacterClass cc |
+ cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
+ or
+ q1 = before(cc) and
+ lbl = CharClass(cc.getRawValue()) and
+ q2 = after(cc)
+ )
+ or
+ exists(RegExpCharacterClassEscape cc |
+ q1 = before(cc) and
+ lbl = CharClass(cc.getRawValue()) and
+ q2 = after(cc)
+ )
+ or
+ exists(RegExpNamedCharacterProperty cp |
+ q1 = before(cp) and
+ lbl = CharClass(cp.getRawValue()) and
+ q2 = after(cp)
+ )
+ or
+ exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
+ or
+ exists(RegExpSequence seq | lbl = Epsilon() | q1 = before(seq) and q2 = before(seq.getChild(0)))
+ or
+ exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
+ or
+ exists(RegExpStar star | lbl = Epsilon() |
+ q1 = before(star) and q2 = before(star.getChild(0))
+ or
+ q1 = before(star) and q2 = after(star)
+ )
+ or
+ exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
+ or
+ exists(RegExpOpt opt | lbl = Epsilon() |
+ q1 = before(opt) and q2 = before(opt.getChild(0))
+ or
+ q1 = before(opt) and q2 = after(opt)
+ )
+ or
+ exists(RegExpRoot root | q1 = AcceptAnySuffix(root) |
+ lbl = Any() and q2 = q1
+ or
+ lbl = Epsilon() and q2 = Accept(root)
+ )
+ or
+ exists(RegExpRoot root | q1 = Match(root, 0) | lbl = Any() and q2 = q1)
+ or
+ exists(RegExpDollar dollar | q1 = before(dollar) |
+ lbl = Epsilon() and q2 = Accept(getRoot(dollar))
+ )
+ or
+ exists(EmptyPositiveSubPatttern empty | q1 = before(empty) |
+ lbl = Epsilon() and q2 = after(empty)
+ )
+}
+
+/**
+ * Gets a state that `q` has an epsilon transition to.
+ */
+State epsilonSucc(State q) { delta(q, Epsilon(), result) }
+
+/**
+ * Gets a state that has an epsilon transition to `q`.
+ */
+State epsilonPred(State q) { q = epsilonSucc(result) }
+
+/**
+ * Holds if there is a state `q` that can be reached from `q1`
+ * along epsilon edges, such that there is a transition from
+ * `q` to `q2` that consumes symbol `s`.
+ */
+predicate deltaClosed(State q1, InputSymbol s, State q2) { delta(epsilonSucc*(q1), s, q2) }
+
+/**
+ * Gets the root containing the given term, that is, the root of the literal,
+ * or a branch of the root disjunction.
+ */
+RegExpRoot getRoot(RegExpTerm term) {
+ result = term or
+ result = getRoot(term.getParent())
+}
+
+private newtype TState =
+ Match(RegExpTerm t, int i) {
+ getRoot(t).isRelevant() and
+ (
+ i = 0
+ or
+ exists(t.(RegexpCharacterConstant).getValue().charAt(i))
+ )
+ } or
+ Accept(RegExpRoot l) { l.isRelevant() } or
+ AcceptAnySuffix(RegExpRoot l) { l.isRelevant() }
+
+/**
+ * Gets a state that is about to match the regular expression `t`.
+ */
+State mkMatch(RegExpTerm t) { result = Match(t, 0) }
+
+/**
+ * A state in the NFA corresponding to a regular expression.
+ *
+ * Each regular expression literal `l` has one accepting state
+ * `Accept(l)`, one state that accepts all suffixes `AcceptAnySuffix(l)`,
+ * and a state `Match(t, i)` for every subterm `t`,
+ * which represents the state of the NFA before starting to
+ * match `t`, or the `i`th character in `t` if `t` is a constant.
+ */
+class State extends TState {
+ RegExpTerm repr;
+
+ State() {
+ this = Match(repr, _) or
+ this = Accept(repr) or
+ this = AcceptAnySuffix(repr)
+ }
+
+ /**
+ * Gets a string representation for this state in a regular expression.
+ */
+ string toString() {
+ exists(int i | this = Match(repr, i) | result = "Match(" + repr + "," + i + ")")
+ or
+ this instanceof Accept and
+ result = "Accept(" + repr + ")"
+ or
+ this instanceof AcceptAnySuffix and
+ result = "AcceptAny(" + repr + ")"
+ }
+
+ /**
+ * Gets the location for this state.
+ */
+ Location getLocation() { result = repr.getLocation() }
+
+ /**
+ * Gets the term represented by this state.
+ */
+ RegExpTerm getRepr() { result = repr }
+}
+
+/**
+ * Gets the minimum char that is matched by both the character classes `c` and `d`.
+ */
+private string getMinOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
+ result = min(getAOverlapBetweenCharacterClasses(c, d))
+}
+
+/**
+ * Gets a char that is matched by both the character classes `c` and `d`.
+ * And `c` and `d` is not the same character class.
+ */
+private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
+ sharesRoot(c, d) and
+ result = [c.getARelevantChar(), d.getARelevantChar()] and
+ c.matches(result) and
+ d.matches(result) and
+ not c = d
+}
+
+/**
+ * Gets a character that is represented by both `c` and `d`.
+ */
+string intersect(InputSymbol c, InputSymbol d) {
+ (sharesRoot(c, d) or [c, d] = Any()) and
+ (
+ c = Char(result) and
+ d = getAnInputSymbolMatching(result)
+ or
+ result = getMinOverlapBetweenCharacterClasses(c, d)
+ or
+ result = c.(CharacterClass).choose() and
+ (
+ d = c
+ or
+ d = Dot() and
+ not (result = "\n" or result = "\r")
+ or
+ d = Any()
+ )
+ or
+ (c = Dot() or c = Any()) and
+ (d = Dot() or d = Any()) and
+ result = "a"
+ )
+ or
+ result = intersect(d, c)
+}
+
+/**
+ * Gets a symbol that matches `char`.
+ */
+bindingset[char]
+InputSymbol getAnInputSymbolMatching(string char) {
+ result = Char(char)
+ or
+ result.(CharacterClass).matches(char)
+ or
+ result = Dot() and
+ not (char = "\n" or char = "\r")
+ or
+ result = Any()
+}
+
+/**
+ * Predicates for constructing a prefix string that leads to a given state.
+ */
+private module PrefixConstruction {
+ /**
+ * Holds if `state` starts the string matched by the regular expression.
+ */
+ private predicate isStartState(State state) {
+ state instanceof StateInPumpableRegexp and
+ (
+ state = Match(any(RegExpRoot r), _)
+ or
+ exists(RegExpCaret car | state = after(car))
+ )
+ }
+
+ /**
+ * Holds if `state` is the textually last start state for the regular expression.
+ */
+ private predicate lastStartState(State state) {
+ exists(RegExpRoot root |
+ state =
+ max(State s, Location l |
+ isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
+ |
+ s
+ order by
+ l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
+ l.getEndLine()
+ )
+ )
+ }
+
+ /**
+ * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
+ */
+ private predicate existsTransition(State a, State b) { delta(a, _, b) }
+
+ /**
+ * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
+ */
+ int prefixLength(State start, State state) =
+ shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
+
+ /**
+ * Gets the minimum number of transitions it takes to reach `state` from the start state.
+ */
+ private int lengthFromStart(State state) { result = prefixLength(_, state) }
+
+ /**
+ * Gets a string for which the regular expression will reach `state`.
+ *
+ * Has at most one result for any given `state`.
+ * This predicate will not always have a result even if there is a ReDoS issue in
+ * the regular expression.
+ */
+ string prefix(State state) {
+ lastStartState(state) and
+ result = ""
+ or
+ // the search stops past the last redos candidate state.
+ lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
+ exists(State prev |
+ // select a unique predecessor (by an arbitrary measure)
+ prev =
+ min(State s, Location loc |
+ lengthFromStart(s) = lengthFromStart(state) - 1 and
+ loc = s.getRepr().getLocation() and
+ delta(s, _, state)
+ |
+ s
+ order by
+ loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
+ s.getRepr().toString()
+ )
+ |
+ // greedy search for the shortest prefix
+ result = prefix(prev) and delta(prev, Epsilon(), state)
+ or
+ not delta(prev, Epsilon(), state) and
+ result = prefix(prev) + getCanonicalEdgeChar(prev, state)
+ )
+ }
+
+ /**
+ * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
+ */
+ private string getCanonicalEdgeChar(State prev, State next) {
+ result =
+ min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
+ }
+
+ /**
+ * A state within a regular expression that has a pumpable state.
+ */
+ class StateInPumpableRegexp extends State {
+ pragma[noinline]
+ StateInPumpableRegexp() {
+ exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
+ }
+ }
+}
+
+/**
+ * Predicates for testing the presence of a rejecting suffix.
+ *
+ * These predicates are used to ensure that the all states reached from the fork
+ * by repeating `w` have a rejecting suffix.
+ *
+ * For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
+ * some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
+ *
+ * A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
+ * as the suffix "X" will cause both the regular expressions to be rejected.
+ *
+ * The string `w` is repeated any number of times because it needs to be
+ * infinitely repeatedable for the attack to work.
+ * For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
+ * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
+ */
+private module SuffixConstruction {
+ import PrefixConstruction
+
+ /**
+ * Holds if all states reachable from `fork` by repeating `w`
+ * are likely rejectable by appending some suffix.
+ */
+ predicate reachesOnlyRejectableSuffixes(State fork, string w) {
+ isReDoSCandidate(fork, w) and
+ forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
+ }
+
+ /**
+ * Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
+ * This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
+ */
+ pragma[noinline]
+ private predicate isLikelyRejectable(StateInPumpableRegexp s) {
+ // exists a reject edge with some char.
+ hasRejectEdge(s)
+ or
+ hasEdgeToLikelyRejectable(s)
+ or
+ // stopping here is rejection
+ isRejectState(s)
+ }
+
+ /**
+ * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
+ */
+ predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
+
+ /**
+ * Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
+ */
+ pragma[noopt]
+ predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
+ // all edges (at least one) with some char leads to another state that is rejectable.
+ // the `next` states might not share a common suffix, which can cause FPs.
+ exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
+ // noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
+ exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
+ forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
+ )
+ }
+
+ /**
+ * Gets a char for there exists a transition away from `s`,
+ * and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
+ */
+ pragma[noinline]
+ private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
+ not hasRejectEdge(s) and
+ not isRejectState(s) and
+ deltaClosedChar(s, result, _)
+ }
+
+ /**
+ * Holds if there is a state `next` that can be reached from `prev`
+ * along epsilon edges, such that there is a transition from
+ * `prev` to `next` that the character symbol `char`.
+ */
+ predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
+ deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
+ }
+
+ pragma[noinline]
+ InputSymbol getAnInputSymbolMatchingRelevant(string char) {
+ char = relevant(_) and
+ result = getAnInputSymbolMatching(char)
+ }
+
+ /**
+ * Gets a char used for finding possible suffixes inside `root`.
+ */
+ pragma[noinline]
+ private string relevant(RegExpRoot root) {
+ exists(ascii(result))
+ or
+ exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
+ or
+ // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
+ // The three chars must be kept in sync with `hasSimpleRejectEdge`.
+ result = ["|", "\n", "Z"]
+ }
+
+ /**
+ * Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
+ * The NFA does not model reject states, so the above is the same as saying there is a reject edge.
+ */
+ private predicate hasRejectEdge(State s) {
+ hasSimpleRejectEdge(s)
+ or
+ not hasSimpleRejectEdge(s) and
+ exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
+ }
+
+ /**
+ * Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
+ * This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
+ */
+ private predicate hasSimpleRejectEdge(State s) {
+ // The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
+ exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
+ }
+
+ /**
+ * Gets a state that can be reached from pumpable `fork` consuming all
+ * chars in `w` any number of times followed by the first `i+1` characters of `w`.
+ */
+ pragma[noopt]
+ private State process(State fork, string w, int i) {
+ exists(State prev | prev = getProcessPrevious(fork, i, w) |
+ exists(string char, InputSymbol sym |
+ char = w.charAt(i) and
+ deltaClosed(prev, sym, result) and
+ // noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
+ // Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
+ sym = getAProcessInputSymbol(char)
+ )
+ )
+ }
+
+ /**
+ * Gets a state that can be reached from pumpable `fork` consuming all
+ * chars in `w` any number of times followed by the first `i` characters of `w`.
+ */
+ private State getProcessPrevious(State fork, int i, string w) {
+ isReDoSCandidate(fork, w) and
+ (
+ i = 0 and result = fork
+ or
+ result = process(fork, w, i - 1)
+ or
+ // repeat until fixpoint
+ i = 0 and
+ result = process(fork, w, w.length() - 1)
+ )
+ }
+
+ /**
+ * Gets an InputSymbol that matches `char`.
+ * The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
+ */
+ private InputSymbol getAProcessInputSymbol(string char) {
+ char = getAProcessChar() and
+ result = getAnInputSymbolMatching(char)
+ }
+
+ /**
+ * Gets a `char` that occurs in a `pump` string.
+ */
+ private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) }
+}
+
+/**
+ * Gets the result of backslash-escaping newlines, carriage-returns and
+ * backslashes in `s`.
+ */
+bindingset[s]
+private string escape(string s) {
+ result =
+ s.replaceAll("\\", "\\\\")
+ .replaceAll("\n", "\\n")
+ .replaceAll("\r", "\\r")
+ .replaceAll("\t", "\\t")
+}
+
+/**
+ * Gets `str` with the last `i` characters moved to the front.
+ *
+ * We use this to adjust the pump string to match with the beginning of
+ * a RegExpTerm, so it doesn't start in the middle of a constant.
+ */
+bindingset[str, i]
+private string rotate(string str, int i) {
+ result = str.suffix(str.length() - i) + str.prefix(str.length() - i)
+}
+
+/**
+ * Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
+ * Gets the shortest string that causes superlinear backtracking.
+ */
+private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
+ exists(int i, string c | s = Match(term, i) |
+ c =
+ min(string w |
+ any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
+ SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
+ |
+ w order by w.length(), w
+ ) and
+ pump = escape(rotate(c, i))
+ )
+}
+
+/**
+ * Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
+ *
+ * `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
+ */
+predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
+ not t.getRegExp().hasFreeSpacingFlag() and // exclude free-spacing mode regexes
+ isReDoSAttackable(t, pump, s) and
+ (
+ prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
+ not PrefixConstruction::prefix(s) = ""
+ or
+ PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+ or
+ not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
new file mode 100644
index 00000000000..11fd0836ce1
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
@@ -0,0 +1,724 @@
+private import codeql.ruby.ast.Literal as AST
+private import codeql.Locations
+private import ParseRegExp
+
+/**
+ * An element containing a regular expression term, that is, either
+ * a string literal (parsed as a regular expression)
+ * or another regular expression term.
+ */
+class RegExpParent extends TRegExpParent {
+ string toString() { result = "RegExpParent" }
+
+ RegExpTerm getChild(int i) { none() }
+
+ RegExpTerm getAChild() { result = getChild(_) }
+
+ int getNumChild() { result = count(getAChild()) }
+
+ /**
+ * Gets the name of a primary CodeQL class to which this regular
+ * expression term belongs.
+ */
+ string getAPrimaryQlClass() { result = "RegExpParent" }
+
+ /**
+ * Gets a comma-separated list of the names of the primary CodeQL classes to
+ * which this regular expression term belongs.
+ */
+ final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
+}
+
+class RegExpLiteral extends TRegExpLiteral, RegExpParent {
+ RegExp re;
+
+ RegExpLiteral() { this = TRegExpLiteral(re) }
+
+ override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
+
+ predicate isDotAll() { re.hasMultilineFlag() }
+
+ override string getAPrimaryQlClass() { result = "RegExpLiteral" }
+}
+
+class RegExpTerm extends RegExpParent {
+ RegExp re;
+ int start;
+ int end;
+
+ RegExpTerm() {
+ this = TRegExpAlt(re, start, end)
+ or
+ this = TRegExpBackRef(re, start, end)
+ or
+ this = TRegExpCharacterClass(re, start, end)
+ or
+ this = TRegExpCharacterRange(re, start, end)
+ or
+ this = TRegExpNormalChar(re, start, end)
+ or
+ this = TRegExpGroup(re, start, end)
+ or
+ this = TRegExpQuantifier(re, start, end)
+ or
+ this = TRegExpSequence(re, start, end) and
+ exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
+ or
+ this = TRegExpSpecialChar(re, start, end)
+ or
+ this = TRegExpNamedCharacterProperty(re, start, end)
+ }
+
+ RegExpTerm getRootTerm() {
+ this.isRootTerm() and result = this
+ or
+ result = getParent().(RegExpTerm).getRootTerm()
+ }
+
+ predicate isUsedAsRegExp() { any() }
+
+ predicate isRootTerm() { start = 0 and end = re.getText().length() }
+
+ override RegExpTerm getChild(int i) {
+ result = this.(RegExpAlt).getChild(i)
+ or
+ result = this.(RegExpBackRef).getChild(i)
+ or
+ result = this.(RegExpCharacterClass).getChild(i)
+ or
+ result = this.(RegExpCharacterRange).getChild(i)
+ or
+ result = this.(RegExpNormalChar).getChild(i)
+ or
+ result = this.(RegExpGroup).getChild(i)
+ or
+ result = this.(RegExpQuantifier).getChild(i)
+ or
+ result = this.(RegExpSequence).getChild(i)
+ or
+ result = this.(RegExpSpecialChar).getChild(i)
+ or
+ result = this.(RegExpNamedCharacterProperty).getChild(i)
+ }
+
+ RegExpParent getParent() { result.getAChild() = this }
+
+ RegExp getRegExp() { result = re }
+
+ int getStart() { result = start }
+
+ int getEnd() { result = end }
+
+ override string toString() { result = re.getText().substring(start, end) }
+
+ override string getAPrimaryQlClass() { result = "RegExpTerm" }
+
+ Location getLocation() { result = re.getLocation() }
+
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ exists(int re_start, int re_end |
+ re.getComponent(0).getLocation().hasLocationInfo(filepath, startline, re_start, _, _) and
+ re.getComponent(re.getNumberOfComponents() - 1)
+ .getLocation()
+ .hasLocationInfo(filepath, _, _, endline, re_end)
+ |
+ startcolumn = re_start + start and
+ endcolumn = re_start + end - 1
+ )
+ }
+
+ File getFile() { result = this.getLocation().getFile() }
+
+ string getRawValue() { result = this.toString() }
+
+ RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
+
+ /** Gets the regular expression term that is matched (textually) before this one, if any. */
+ RegExpTerm getPredecessor() {
+ exists(RegExpTerm parent | parent = getParent() |
+ result = parent.(RegExpSequence).previousElement(this)
+ or
+ not exists(parent.(RegExpSequence).previousElement(this)) and
+ not parent instanceof RegExpSubPattern and
+ result = parent.getPredecessor()
+ )
+ }
+
+ /** Gets the regular expression term that is matched (textually) after this one, if any. */
+ RegExpTerm getSuccessor() {
+ exists(RegExpTerm parent | parent = getParent() |
+ result = parent.(RegExpSequence).nextElement(this)
+ or
+ not exists(parent.(RegExpSequence).nextElement(this)) and
+ not parent instanceof RegExpSubPattern and
+ result = parent.getSuccessor()
+ )
+ }
+}
+
+newtype TRegExpParent =
+ TRegExpLiteral(RegExp re) or
+ TRegExpQuantifier(RegExp re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
+ TRegExpSequence(RegExp re, int start, int end) { re.sequence(start, end) } or
+ TRegExpAlt(RegExp re, int start, int end) { re.alternation(start, end) } or
+ TRegExpCharacterClass(RegExp re, int start, int end) { re.charSet(start, end) } or
+ TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or
+ TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
+ TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
+ TRegExpNormalChar(RegExp re, int start, int end) { re.normalCharacter(start, end) } or
+ TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or
+ TRegExpNamedCharacterProperty(RegExp re, int start, int end) {
+ re.namedCharacterProperty(start, end, _)
+ }
+
+class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
+ int part_end;
+ boolean maybe_empty;
+ boolean may_repeat_forever;
+
+ RegExpQuantifier() {
+ this = TRegExpQuantifier(re, start, end) and
+ re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
+ }
+
+ override RegExpTerm getChild(int i) {
+ i = 0 and
+ result.getRegExp() = re and
+ result.getStart() = start and
+ result.getEnd() = part_end
+ }
+
+ predicate mayRepeatForever() { may_repeat_forever = true }
+
+ string getQualifier() { result = re.getText().substring(part_end, end) }
+
+ override string getAPrimaryQlClass() { result = "RegExpQuantifier" }
+}
+
+class InfiniteRepetitionQuantifier extends RegExpQuantifier {
+ InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
+
+ override string getAPrimaryQlClass() { result = "InfiniteRepetitionQuantifier" }
+}
+
+class RegExpStar extends InfiniteRepetitionQuantifier {
+ RegExpStar() { this.getQualifier().charAt(0) = "*" }
+
+ override string getAPrimaryQlClass() { result = "RegExpStar" }
+}
+
+class RegExpPlus extends InfiniteRepetitionQuantifier {
+ RegExpPlus() { this.getQualifier().charAt(0) = "+" }
+
+ override string getAPrimaryQlClass() { result = "RegExpPlus" }
+}
+
+class RegExpOpt extends RegExpQuantifier {
+ RegExpOpt() { this.getQualifier().charAt(0) = "?" }
+
+ override string getAPrimaryQlClass() { result = "RegExpOpt" }
+}
+
+class RegExpRange extends RegExpQuantifier {
+ string upper;
+ string lower;
+
+ RegExpRange() { re.multiples(part_end, end, lower, upper) }
+
+ string getUpper() { result = upper }
+
+ string getLower() { result = lower }
+
+ /**
+ * Gets the upper bound of the range, if any.
+ *
+ * If there is no upper bound, any number of repetitions is allowed.
+ * For a term of the form `r{lo}`, both the lower and the upper bound
+ * are `lo`.
+ */
+ int getUpperBound() { result = this.getUpper().toInt() }
+
+ /** Gets the lower bound of the range. */
+ int getLowerBound() { result = this.getLower().toInt() }
+
+ override string getAPrimaryQlClass() { result = "RegExpRange" }
+}
+
+class RegExpSequence extends RegExpTerm, TRegExpSequence {
+ RegExpSequence() {
+ this = TRegExpSequence(re, start, end) and
+ exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
+ }
+
+ override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
+
+ /** Gets the element preceding `element` in this sequence. */
+ RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
+
+ /** Gets the element following `element` in this sequence. */
+ RegExpTerm nextElement(RegExpTerm element) {
+ exists(int i |
+ element = this.getChild(i) and
+ result = this.getChild(i + 1)
+ )
+ }
+
+ override string getAPrimaryQlClass() { result = "RegExpSequence" }
+}
+
+pragma[nomagic]
+private int seqChildEnd(RegExp re, int start, int end, int i) {
+ result = seqChild(re, start, end, i).getEnd()
+}
+
+// moved out so we can use it in the charpred
+private RegExpTerm seqChild(RegExp re, int start, int end, int i) {
+ re.sequence(start, end) and
+ (
+ i = 0 and
+ result.getRegExp() = re and
+ result.getStart() = start and
+ exists(int itemEnd |
+ re.item(start, itemEnd) and
+ result.getEnd() = itemEnd
+ )
+ or
+ i > 0 and
+ result.getRegExp() = re and
+ exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
+ result.getStart() = itemStart and
+ re.item(itemStart, result.getEnd())
+ )
+ )
+}
+
+class RegExpAlt extends RegExpTerm, TRegExpAlt {
+ RegExpAlt() { this = TRegExpAlt(re, start, end) }
+
+ override RegExpTerm getChild(int i) {
+ i = 0 and
+ result.getRegExp() = re and
+ result.getStart() = start and
+ exists(int part_end |
+ re.alternationOption(start, end, start, part_end) and
+ result.getEnd() = part_end
+ )
+ or
+ i > 0 and
+ result.getRegExp() = re and
+ exists(int part_start |
+ part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
+ |
+ result.getStart() = part_start and
+ re.alternationOption(start, end, part_start, result.getEnd())
+ )
+ }
+
+ override string getAPrimaryQlClass() { result = "RegExpAlt" }
+}
+
+class RegExpEscape extends RegExpNormalChar {
+ RegExpEscape() { re.escapedCharacter(start, end) }
+
+ /**
+ * Gets the name of the escaped; for example, `w` for `\w`.
+ * TODO: Handle named escapes.
+ */
+ override string getValue() {
+ this.isIdentityEscape() and result = this.getUnescaped()
+ or
+ this.getUnescaped() = "n" and result = "\n"
+ or
+ this.getUnescaped() = "r" and result = "\r"
+ or
+ this.getUnescaped() = "t" and result = "\t"
+ or
+ isUnicode() and
+ result = getUnicode()
+ }
+
+ predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t"] }
+
+ /**
+ * Gets the text for this escape. That is e.g. "\w".
+ */
+ private string getText() { result = re.getText().substring(start, end) }
+
+ /**
+ * Holds if this is a unicode escape.
+ */
+ private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
+
+ /**
+ * Gets the unicode char for this escape.
+ * E.g. for `\u0061` this returns "a".
+ */
+ private string getUnicode() {
+ exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
+ result = codepoint.toUnicode()
+ )
+ }
+
+ /**
+ * Gets int value for the `index`th char in the hex number of the unicode escape.
+ * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
+ */
+ private int getHexValueFromUnicode(int index) {
+ isUnicode() and
+ exists(string hex, string char | hex = getText().suffix(2) |
+ char = hex.charAt(index) and
+ result = 16.pow(hex.length() - index - 1) * toHex(char)
+ )
+ }
+
+ string getUnescaped() { result = this.getText().suffix(1) }
+
+ override string getAPrimaryQlClass() { result = "RegExpEscape" }
+}
+
+/**
+ * Gets the hex number for the `hex` char.
+ */
+private int toHex(string hex) {
+ hex = [0 .. 9].toString() and
+ result = hex.toInt()
+ or
+ result = 10 and hex = ["a", "A"]
+ or
+ result = 11 and hex = ["b", "B"]
+ or
+ result = 12 and hex = ["c", "C"]
+ or
+ result = 13 and hex = ["d", "D"]
+ or
+ result = 14 and hex = ["e", "E"]
+ or
+ result = 15 and hex = ["f", "F"]
+}
+
+/**
+ * A character class escape in a regular expression.
+ * That is, an escaped character that denotes multiple characters.
+ *
+ * Examples:
+ *
+ * ```
+ * \w
+ * \S
+ * ```
+ */
+class RegExpCharacterClassEscape extends RegExpEscape {
+ RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W", "h", "H"] }
+
+ /** Gets the name of the character class; for example, `w` for `\w`. */
+ // override string getValue() { result = value }
+ override RegExpTerm getChild(int i) { none() }
+
+ override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" }
+}
+
+/**
+ * A character class.
+ *
+ * Examples:
+ *
+ * ```rb
+ * /[a-fA-F0-9]/
+ * /[^abc]/
+ * ```
+ */
+class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
+ RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
+
+ predicate isInverted() { re.getChar(start + 1) = "^" }
+
+ predicate isUniversalClass() {
+ // [^]
+ isInverted() and not exists(getAChild())
+ or
+ // [\w\W] and similar
+ not isInverted() and
+ exists(string cce1, string cce2 |
+ cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
+ cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
+ |
+ cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
+ )
+ }
+
+ override RegExpTerm getChild(int i) {
+ i = 0 and
+ result.getRegExp() = re and
+ exists(int itemStart, int itemEnd |
+ result.getStart() = itemStart and
+ re.charSetStart(start, itemStart) and
+ re.charSetChild(start, itemStart, itemEnd) and
+ result.getEnd() = itemEnd
+ )
+ or
+ i > 0 and
+ result.getRegExp() = re and
+ exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
+ result.getStart() = itemStart and
+ re.charSetChild(start, itemStart, result.getEnd())
+ )
+ }
+
+ override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
+}
+
+class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
+ int lower_end;
+ int upper_start;
+
+ RegExpCharacterRange() {
+ this = TRegExpCharacterRange(re, start, end) and
+ re.charRange(_, start, lower_end, upper_start, end)
+ }
+
+ predicate isRange(string lo, string hi) {
+ lo = re.getText().substring(start, lower_end) and
+ hi = re.getText().substring(upper_start, end)
+ }
+
+ override RegExpTerm getChild(int i) {
+ i = 0 and
+ result.getRegExp() = re and
+ result.getStart() = start and
+ result.getEnd() = lower_end
+ or
+ i = 1 and
+ result.getRegExp() = re and
+ result.getStart() = upper_start and
+ result.getEnd() = end
+ }
+
+ override string getAPrimaryQlClass() { result = "RegExpCharacterRange" }
+}
+
+class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
+ RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
+
+ predicate isCharacter() { any() }
+
+ string getValue() { result = re.getText().substring(start, end) }
+
+ override RegExpTerm getChild(int i) { none() }
+
+ override string getAPrimaryQlClass() { result = "RegExpNormalChar" }
+}
+
+class RegExpConstant extends RegExpTerm {
+ string value;
+
+ RegExpConstant() {
+ this = TRegExpNormalChar(re, start, end) and
+ not this instanceof RegExpCharacterClassEscape and
+ // exclude chars in qualifiers
+ // TODO: push this into regex library
+ not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
+ qstart <= start and end <= qend
+ ) and
+ value = this.(RegExpNormalChar).getValue()
+ or
+ this = TRegExpSpecialChar(re, start, end) and
+ re.inCharSet(start) and
+ value = this.(RegExpSpecialChar).getChar()
+ }
+
+ predicate isCharacter() { any() }
+
+ string getValue() { result = value }
+
+ override RegExpTerm getChild(int i) { none() }
+
+ override string getAPrimaryQlClass() { result = "RegExpConstant" }
+}
+
+class RegExpGroup extends RegExpTerm, TRegExpGroup {
+ RegExpGroup() { this = TRegExpGroup(re, start, end) }
+
+ /**
+ * Gets the index of this capture group within the enclosing regular
+ * expression literal.
+ *
+ * For example, in the regular expression `/((a?).)(?:b)/`, the
+ * group `((a?).)` has index 1, the group `(a?)` nested inside it
+ * has index 2, and the group `(?:b)` has no index, since it is
+ * not a capture group.
+ */
+ int getNumber() { result = re.getGroupNumber(start, end) }
+
+ /** Holds if this is a named capture group. */
+ predicate isNamed() { exists(this.getName()) }
+
+ /** Gets the name of this capture group, if any. */
+ string getName() { result = re.getGroupName(start, end) }
+
+ predicate isCharacter() { any() }
+
+ string getValue() { result = re.getText().substring(start, end) }
+
+ override RegExpTerm getChild(int i) {
+ result.getRegExp() = re and
+ i = 0 and
+ re.groupContents(start, end, result.getStart(), result.getEnd())
+ }
+
+ override string getAPrimaryQlClass() { result = "RegExpGroup" }
+}
+
+class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
+ string char;
+
+ RegExpSpecialChar() {
+ this = TRegExpSpecialChar(re, start, end) and
+ re.specialCharacter(start, end, char)
+ }
+
+ predicate isCharacter() { any() }
+
+ string getChar() { result = char }
+
+ override RegExpTerm getChild(int i) { none() }
+
+ override string getAPrimaryQlClass() { result = "RegExpSpecialChar" }
+}
+
+class RegExpDot extends RegExpSpecialChar {
+ RegExpDot() { this.getChar() = "." }
+
+ override string getAPrimaryQlClass() { result = "RegExpDot" }
+}
+
+class RegExpDollar extends RegExpSpecialChar {
+ RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] }
+
+ override string getAPrimaryQlClass() { result = "RegExpDollar" }
+}
+
+class RegExpCaret extends RegExpSpecialChar {
+ RegExpCaret() { this.getChar() = ["^", "\\A"] }
+
+ override string getAPrimaryQlClass() { result = "RegExpCaret" }
+}
+
+class RegExpZeroWidthMatch extends RegExpGroup {
+ RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
+
+ override predicate isCharacter() { any() }
+
+ override RegExpTerm getChild(int i) { none() }
+
+ override string getAPrimaryQlClass() { result = "RegExpZeroWidthMatch" }
+}
+
+/**
+ * A zero-width lookahead or lookbehind assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?=\w)
+ * (?!\n)
+ * (?<=\.)
+ * (? (d,e,f)` in the product automaton
+ * iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
+ * transitions all match a shared character `char`. (see `getAThreewayIntersect`)
+ *
+ * We start a search in the product automaton at `(pivot, pivot, succ)`,
+ * and search for a series of transitions (a `Trace`), such that we end
+ * at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
+ *
+ * For example, consider the regular expression `/^\d*5\w*$/`.
+ * The search will start at the tuple `(\d*, \d*, \w*)` and search
+ * for a path to `(\d*, \w*, \w*)`.
+ * This path exists, and consists of a single transition in the product automaton,
+ * where the three corresponding NFA edges all match the character `"5"`.
+ *
+ * The start-state in the NFA has an any-transition to itself, this allows us to
+ * flag regular expressions such as `/a*$/` - which does not have a start anchor -
+ * and can thus start matching anywhere.
+ *
+ * The implementation is not perfect.
+ * It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
+ * It also doesn't find all transitions in the product automaton, which can cause false negatives.
+ */
+
+/**
+ * An instantiaion of `ReDoSConfiguration` for superlinear ReDoS.
+ */
+class SuperLinearReDoSConfiguration extends ReDoSConfiguration {
+ SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" }
+
+ override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
+}
+
+/**
+ * Gets any root (start) state of a regular expression.
+ */
+private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
+
+private newtype TStateTuple =
+ MkStateTuple(State q1, State q2, State q3) {
+ // starts at (pivot, pivot, succ)
+ isStartLoops(q1, q3) and q1 = q2
+ or
+ step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
+ }
+
+/**
+ * A state in the product automaton.
+ * The product automaton contains 3-tuples of states.
+ *
+ * We lazily only construct those states that we are actually
+ * going to need.
+ * Either a start state `(pivot, pivot, succ)`, or a state
+ * where there exists a transition from an already existing state.
+ *
+ * The exponential variant of this query (`js/redos`) uses an optimization
+ * trick where `q1 <= q2`. This trick cannot be used here as the order
+ * of the elements matter.
+ */
+class StateTuple extends TStateTuple {
+ State q1;
+ State q2;
+ State q3;
+
+ StateTuple() { this = MkStateTuple(q1, q2, q3) }
+
+ /**
+ * Gest a string repesentation of this tuple.
+ */
+ string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
+
+ /**
+ * Holds if this tuple is `(r1, r2, r3)`.
+ */
+ pragma[noinline]
+ predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
+}
+
+/**
+ * A module for determining feasible tuples for the product automaton.
+ *
+ * The implementation is split into many predicates for performance reasons.
+ */
+private module FeasibleTuple {
+ /**
+ * Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
+ */
+ pragma[inline]
+ predicate isFeasibleTuple(State r1, State r2, State r3) {
+ // The first element is either inside a repetition (or the start state itself)
+ isRepetitionOrStart(r1) and
+ // The last element is inside a repetition
+ stateInsideRepetition(r3) and
+ // The states are reachable in the NFA in the order r1 -> r2 -> r3
+ delta+(r1) = r2 and
+ delta+(r2) = r3 and
+ // The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
+ canReachABeginning(r1) and
+ // The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
+ canReachATarget(r3)
+ }
+
+ /**
+ * Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
+ */
+ pragma[noinline]
+ private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
+
+ /**
+ * Holds if state `s` might be inside a backtracking repetition.
+ */
+ pragma[noinline]
+ private predicate stateInsideRepetition(State s) {
+ s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
+ }
+
+ /**
+ * Holds if there exists a path in the NFA from `s` to a "pivot" state
+ * (from a `(pivot, succ)` pair that starts the search).
+ */
+ pragma[noinline]
+ private predicate canReachABeginning(State s) {
+ delta+(s) = any(State pivot | isStartLoops(pivot, _))
+ }
+
+ /**
+ * Holds if there exists a path in the NFA from `s` to a "succ" state
+ * (from a `(pivot, succ)` pair that starts the search).
+ */
+ pragma[noinline]
+ private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
+}
+
+/**
+ * Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
+ *
+ * There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
+ * The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
+ */
+predicate isStartLoops(State pivot, State succ) {
+ pivot != succ and
+ succ.getRepr() instanceof InfiniteRepetitionQuantifier and
+ delta+(pivot) = succ and
+ (
+ pivot.getRepr() instanceof InfiniteRepetitionQuantifier
+ or
+ pivot = mkMatch(any(RegExpRoot root))
+ )
+}
+
+/**
+ * Gets a state for which there exists a transition in the NFA from `s'.
+ */
+State delta(State s) { delta(s, _, result) }
+
+/**
+ * Holds if there are transitions from the components of `q` to the corresponding
+ * components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
+ */
+pragma[noinline]
+predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
+ exists(State r1, State r2, State r3 |
+ step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
+ )
+}
+
+/**
+ * Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
+ * labelled with `s1`, `s2`, and `s3`, respectively.
+ */
+pragma[noopt]
+predicate step(
+ StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
+) {
+ exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
+ deltaClosed(q1, s1, r1) and
+ deltaClosed(q2, s2, r2) and
+ deltaClosed(q3, s3, r3) and
+ // use noopt to force the join on `getAThreewayIntersect` to happen last.
+ exists(getAThreewayIntersect(s1, s2, s3))
+ )
+}
+
+/**
+ * Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
+ *
+ * The result is not complete, and might miss some combination of edges that share some character.
+ */
+pragma[noinline]
+string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
+ result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
+ or
+ result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
+ or
+ result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
+}
+
+/**
+ * Gets the minimum and maximum characters that intersect between `a` and `b`.
+ * This predicate is used to limit the size of `getAThreewayIntersect`.
+ */
+pragma[noinline]
+string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
+ result = [min(intersect(a, b)), max(intersect(a, b))]
+}
+
+private newtype TTrace =
+ Nil() or
+ Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
+ exists(StateTuple p |
+ isReachableFromStartTuple(_, _, p, t, _) and
+ step(p, s1, s2, s3, _)
+ )
+ or
+ exists(State pivot, State succ | isStartLoops(pivot, succ) |
+ t = Nil() and step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, _)
+ )
+ }
+
+/**
+ * A list of tuples of input symbols that describe a path in the product automaton
+ * starting from some start state.
+ */
+class Trace extends TTrace {
+ /**
+ * Gets a string representation of this Trace that can be used for debug purposes.
+ */
+ string toString() {
+ this = Nil() and result = "Nil()"
+ or
+ exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
+ result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
+ )
+ }
+}
+
+/**
+ * Gets a string corresponding to the trace `t`.
+ */
+string concretise(Trace t) {
+ t = Nil() and result = ""
+ or
+ exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace rest | t = Step(s1, s2, s3, rest) |
+ result = concretise(rest) + getAThreewayIntersect(s1, s2, s3)
+ )
+}
+
+/**
+ * Holds if there exists a transition from `r` to `q` in the product automaton.
+ * Notice that the arguments are flipped, and thus the direction is backwards.
+ */
+pragma[noinline]
+predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
+
+/**
+ * Holds if `tuple` is an end state in our search.
+ * That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
+ */
+predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
+
+/**
+ * Gets the minimum length of a path from `r` to some an end state `end`.
+ *
+ * The implementation searches backwards from the end-tuple.
+ * This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
+ * The `end` argument must always be an end state.
+ */
+int distBackFromEnd(StateTuple r, StateTuple end) =
+ shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
+
+/**
+ * Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
+ * `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
+ * and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
+ * and a path from a start-state to `tuple` follows the transitions in `trace`.
+ */
+predicate isReachableFromStartTuple(State pivot, State succ, StateTuple tuple, Trace trace, int dist) {
+ // base case. The first step is inlined to start the search after all possible 1-steps, and not just the ones with the shortest path.
+ exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, State q1, State q2, State q3 |
+ isStartLoops(pivot, succ) and
+ step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
+ tuple = MkStateTuple(q1, q2, q3) and
+ trace = Step(s1, s2, s3, Nil()) and
+ dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
+ )
+ or
+ // recursive case
+ exists(StateTuple p, Trace v, InputSymbol s1, InputSymbol s2, InputSymbol s3 |
+ isReachableFromStartTuple(pivot, succ, p, v, dist + 1) and
+ dist = isReachableFromStartTupleHelper(pivot, succ, tuple, p, s1, s2, s3) and
+ trace = Step(s1, s2, s3, v)
+ )
+}
+
+/**
+ * Helper predicate for the recursive case in `isReachableFromStartTuple`.
+ */
+pragma[noinline]
+private int isReachableFromStartTupleHelper(
+ State pivot, State succ, StateTuple r, StateTuple p, InputSymbol s1, InputSymbol s2,
+ InputSymbol s3
+) {
+ result = distBackFromEnd(r, MkStateTuple(pivot, succ, succ)) and
+ step(p, s1, s2, s3, r)
+}
+
+/**
+ * Gets the tuple `(pivot, succ, succ)` from the product automaton.
+ */
+StateTuple getAnEndTuple(State pivot, State succ) {
+ isStartLoops(pivot, succ) and
+ result = MkStateTuple(pivot, succ, succ)
+}
+
+/**
+ * Holds if matching repetitions of `pump` can:
+ * 1) Transition from `pivot` back to `pivot`.
+ * 2) Transition from `pivot` to `succ`.
+ * 3) Transition from `succ` to `succ`.
+ *
+ * From theorem 3 in the paper linked in the top of this file we can therefore conclude that
+ * the regular expression has polynomial backtracking - if a rejecting suffix exists.
+ *
+ * This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
+ * available in the `hasReDoSResult` predicate.
+ */
+predicate isPumpable(State pivot, State succ, string pump) {
+ exists(StateTuple q, Trace t |
+ isReachableFromStartTuple(pivot, succ, q, t, _) and
+ q = getAnEndTuple(pivot, succ) and
+ pump = concretise(t)
+ )
+}
+
+/**
+ * Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
+ */
+predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
+ exists(State s, State pivot |
+ hasReDoSResult(t, pump, s, prefixMsg) and
+ isPumpable(pivot, s, _) and
+ prev = pivot.getRepr()
+ )
+}
+
+/**
+ * Gets a message for why `term` can cause polynomial backtracking.
+ */
+string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
+ polynimalReDoS(term, pump, prefixMsg, prev) and
+ result =
+ "Strings " + prefixMsg + "with many repetitions of '" + pump +
+ "' can start matching anywhere after the start of the preceeding " + prev
+}
+
+/**
+ * A term that may cause a regular expression engine to perform a
+ * polynomial number of match attempts, relative to the input length.
+ */
+class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
+ string reason;
+ string pump;
+ string prefixMsg;
+ RegExpTerm prev;
+
+ PolynomialBackTrackingTerm() {
+ reason = getReasonString(this, pump, prefixMsg, prev) and
+ // there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
+ reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
+ }
+
+ /**
+ * Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
+ */
+ predicate isAtEndLine() {
+ forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
+ succ instanceof RegExpDollar
+ )
+ }
+
+ /**
+ * Gets the string that should be repeated to cause this regular expression to perform polynomially.
+ */
+ string getPumpString() { result = pump }
+
+ /**
+ * Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
+ */
+ string getPrefixMessage() { result = prefixMsg }
+
+ /**
+ * Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
+ */
+ RegExpTerm getPreviousLoop() { result = prev }
+
+ /**
+ * Gets the reason for the number of match attempts.
+ */
+ string getReason() { result = reason }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionCustomizations.qll
new file mode 100644
index 00000000000..4baceba42db
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionCustomizations.qll
@@ -0,0 +1,40 @@
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.Concepts
+private import codeql.ruby.Frameworks
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.dataflow.BarrierGuards
+
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "Code injection" vulnerabilities, as well as extension points for
+ * adding your own.
+ */
+module CodeInjection {
+ /**
+ * A data flow source for "Code injection" vulnerabilities.
+ */
+ abstract class Source extends DataFlow::Node { }
+
+ /**
+ * A data flow sink for "Code injection" vulnerabilities.
+ */
+ abstract class Sink extends DataFlow::Node { }
+
+ /**
+ * A sanitizer guard for "Code injection" vulnerabilities.
+ */
+ abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
+
+ /**
+ * A source of remote user input, considered as a flow source.
+ */
+ class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+
+ /**
+ * A call that evaluates its arguments as Ruby code, considered as a flow sink.
+ */
+ class CodeExecutionAsSink extends Sink {
+ CodeExecutionAsSink() { this = any(CodeExecution c).getCode() }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionQuery.qll
new file mode 100644
index 00000000000..95e08a82dc3
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CodeInjectionQuery.qll
@@ -0,0 +1,29 @@
+/**
+ * Provides a taint-tracking configuration for detecting "Code injection" vulnerabilities.
+ *
+ * Note, for performance reasons: only import this file if `Configuration` is needed,
+ * otherwise `CodeInjectionCustomizations` should be imported instead.
+ */
+
+import codeql.ruby.DataFlow::DataFlow::PathGraph
+import codeql.ruby.DataFlow
+import codeql.ruby.TaintTracking
+import CodeInjectionCustomizations::CodeInjection
+import codeql.ruby.dataflow.BarrierGuards
+
+/**
+ * A taint-tracking configuration for detecting "Code injection" vulnerabilities.
+ */
+class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "CodeInjection" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof SanitizerGuard or
+ guard instanceof StringConstCompare or
+ guard instanceof StringConstArrayInclusionCall
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionCustomizations.qll
new file mode 100644
index 00000000000..b39455195be
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionCustomizations.qll
@@ -0,0 +1,54 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * command-injection vulnerabilities, as well as extension points for
+ * adding your own.
+ */
+
+private import codeql.ruby.DataFlow
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.Concepts
+private import codeql.ruby.Frameworks
+private import codeql.ruby.ApiGraphs
+
+module CommandInjection {
+ /**
+ * A data flow source for command-injection vulnerabilities.
+ */
+ abstract class Source extends DataFlow::Node {
+ /** Gets a string that describes the type of this remote flow source. */
+ abstract string getSourceType();
+ }
+
+ /**
+ * A data flow sink for command-injection vulnerabilities.
+ */
+ abstract class Sink extends DataFlow::Node { }
+
+ /**
+ * A sanitizer for command-injection vulnerabilities.
+ */
+ abstract class Sanitizer extends DataFlow::Node { }
+
+ /** A source of remote user input, considered as a flow source for command injection. */
+ class RemoteFlowSourceAsSource extends Source {
+ RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource }
+
+ override string getSourceType() { result = "a user-provided value" }
+ }
+
+ /**
+ * A command argument to a function that initiates an operating system command.
+ */
+ class SystemCommandExecutionSink extends Sink {
+ SystemCommandExecutionSink() { exists(SystemCommandExecution c | c.isShellInterpreted(this)) }
+ }
+
+ /**
+ * A call to `Shellwords.escape` or `Shellwords.shellescape` sanitizes its input.
+ */
+ class ShellwordsEscapeAsSanitizer extends Sanitizer {
+ ShellwordsEscapeAsSanitizer() {
+ this = API::getTopLevelMember("Shellwords").getAMethodCall(["escape", "shellescape"])
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionQuery.qll
new file mode 100644
index 00000000000..25460ad65df
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/CommandInjectionQuery.qll
@@ -0,0 +1,32 @@
+/**
+ * Provides a taint tracking configuration for reasoning about
+ * command-injection vulnerabilities (CWE-078).
+ *
+ * Note, for performance reasons: only import this file if
+ * `CommandInjection::Configuration` is needed, otherwise
+ * `CommandInjectionCustomizations` should be imported instead.
+ */
+
+import ruby
+import codeql.ruby.TaintTracking
+import CommandInjectionCustomizations::CommandInjection
+import codeql.ruby.DataFlow
+import codeql.ruby.dataflow.BarrierGuards
+
+/**
+ * A taint-tracking configuration for reasoning about command-injection vulnerabilities.
+ */
+class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "CommandInjection" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+ override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof StringConstCompare or
+ guard instanceof StringConstArrayInclusionCall
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/ReflectedXSSQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/ReflectedXSSQuery.qll
new file mode 100644
index 00000000000..60e152a06fc
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/ReflectedXSSQuery.qll
@@ -0,0 +1,39 @@
+/**
+ * Provides a taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
+ *
+ * Note, for performance reasons: only import this file if
+ * `ReflectedXSS::Configuration` is needed, otherwise
+ * `XSS::ReflectedXSS` should be imported instead.
+ */
+
+private import ruby
+import codeql.ruby.DataFlow
+import codeql.ruby.TaintTracking
+
+/**
+ * Provides a taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
+ */
+module ReflectedXSS {
+ import XSS::ReflectedXSS
+
+ /**
+ * A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
+ */
+ class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "ReflectedXSS" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+ override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof SanitizerGuard
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
+ isAdditionalXSSTaintStep(node1, node2)
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/StoredXSSQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/StoredXSSQuery.qll
new file mode 100644
index 00000000000..2a089050e5a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/StoredXSSQuery.qll
@@ -0,0 +1,40 @@
+/**
+ * Provides a taint-tracking configuration for reasoning about stored
+ * cross-site scripting vulnerabilities.
+ *
+ * Note, for performance reasons: only import this file if
+ * `StoredXSS::Configuration` is needed, otherwise
+ * `XSS::StoredXSS` should be imported instead.
+ */
+
+import ruby
+import codeql.ruby.DataFlow
+import codeql.ruby.TaintTracking
+
+module StoredXSS {
+ import XSS::StoredXSS
+
+ /**
+ * A taint-tracking configuration for reasoning about Stored XSS.
+ */
+ class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "StoredXss" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+ override predicate isSanitizer(DataFlow::Node node) {
+ super.isSanitizer(node) or
+ node instanceof Sanitizer
+ }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof SanitizerGuard
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
+ isAdditionalXSSTaintStep(node1, node2)
+ }
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll
new file mode 100644
index 00000000000..0e39e053b2a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll
@@ -0,0 +1,190 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about unsafe
+ * deserialization, as well as extension points for adding your own.
+ */
+
+private import ruby
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.CFG
+private import codeql.ruby.DataFlow
+private import codeql.ruby.dataflow.RemoteFlowSources
+
+module UnsafeDeserialization {
+ /**
+ * A data flow source for unsafe deserialization vulnerabilities.
+ */
+ abstract class Source extends DataFlow::Node { }
+
+ /**
+ * A data flow sink for unsafe deserialization vulnerabilities.
+ */
+ abstract class Sink extends DataFlow::Node { }
+
+ /**
+ * A sanitizer for unsafe deserialization vulnerabilities.
+ */
+ abstract class Sanitizer extends DataFlow::Node { }
+
+ /**
+ * Additional taint steps for "unsafe deserialization" vulnerabilities.
+ */
+ predicate isAdditionalTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
+ base64DecodeTaintStep(fromNode, toNode)
+ }
+
+ /** A source of remote user input, considered as a flow source for unsafe deserialization. */
+ class RemoteFlowSourceAsSource extends Source {
+ RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource }
+ }
+
+ /**
+ * An argument in a call to `Marshal.load` or `Marshal.restore`, considered a
+ * sink for unsafe deserialization.
+ */
+ class MarshalLoadOrRestoreArgument extends Sink {
+ MarshalLoadOrRestoreArgument() {
+ this = API::getTopLevelMember("Marshal").getAMethodCall(["load", "restore"]).getArgument(0)
+ }
+ }
+
+ /**
+ * An argument in a call to `YAML.load`, considered a sink for unsafe
+ * deserialization.
+ */
+ class YamlLoadArgument extends Sink {
+ YamlLoadArgument() {
+ this = API::getTopLevelMember("YAML").getAMethodCall("load").getArgument(0)
+ }
+ }
+
+ /**
+ * An argument in a call to `JSON.load` or `JSON.restore`, considered a sink
+ * for unsafe deserialization.
+ */
+ class JsonLoadArgument extends Sink {
+ JsonLoadArgument() {
+ this = API::getTopLevelMember("JSON").getAMethodCall(["load", "restore"]).getArgument(0)
+ }
+ }
+
+ private string getAKnownOjModeName(boolean isSafe) {
+ result = ["compat", "custom", "json", "null", "rails", "strict", "wab"] and isSafe = true
+ or
+ result = "object" and isSafe = false
+ }
+
+ private predicate isOjModePair(Pair p, string modeValue) {
+ p.getKey().getValueText() = "mode" and
+ exists(DataFlow::LocalSourceNode symbolLiteral, DataFlow::Node value |
+ symbolLiteral.asExpr().getExpr().(SymbolLiteral).getValueText() = modeValue and
+ symbolLiteral.flowsTo(value) and
+ value.asExpr().getExpr() = p.getValue()
+ )
+ }
+
+ /**
+ * A node representing a hash that contains the key `:mode`.
+ */
+ private class OjOptionsHashWithModeKey extends DataFlow::Node {
+ private string modeValue;
+
+ OjOptionsHashWithModeKey() {
+ exists(DataFlow::LocalSourceNode options |
+ options.flowsTo(this) and
+ isOjModePair(options.asExpr().getExpr().(HashLiteral).getAKeyValuePair(), modeValue)
+ )
+ }
+
+ /**
+ * Holds if this hash node contains a `:mode` key whose value is one known
+ * to be `isSafe` with untrusted data.
+ */
+ predicate hasKnownMode(boolean isSafe) { modeValue = getAKnownOjModeName(isSafe) }
+
+ /**
+ * Holds if this hash node contains a `:mode` key whose value is one of the
+ * `Oj` modes known to be safe to use with untrusted data.
+ */
+ predicate hasSafeMode() { this.hasKnownMode(true) }
+ }
+
+ /**
+ * A call node that sets `Oj.default_options`.
+ *
+ * ```rb
+ * Oj.default_options = { allow_blank: true, mode: :compat }
+ * ```
+ */
+ private class SetOjDefaultOptionsCall extends DataFlow::CallNode {
+ SetOjDefaultOptionsCall() {
+ this = API::getTopLevelMember("Oj").getAMethodCall("default_options=")
+ }
+
+ /**
+ * Gets the value being assigned to `Oj.default_options`.
+ */
+ DataFlow::Node getValue() {
+ result.asExpr() =
+ this.getArgument(0).asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs()
+ }
+ }
+
+ /**
+ * A call to `Oj.load`.
+ */
+ private class OjLoadCall extends DataFlow::CallNode {
+ OjLoadCall() { this = API::getTopLevelMember("Oj").getAMethodCall("load") }
+
+ /**
+ * Holds if this call to `Oj.load` includes an explicit options hash
+ * argument that sets the mode to one that is known to be `isSafe`.
+ */
+ predicate hasExplicitKnownMode(boolean isSafe) {
+ exists(DataFlow::Node arg, int i | i >= 1 and arg = this.getArgument(i) |
+ arg.(OjOptionsHashWithModeKey).hasKnownMode(isSafe)
+ or
+ isOjModePair(arg.asExpr().getExpr(), getAKnownOjModeName(isSafe))
+ )
+ }
+ }
+
+ /**
+ * An argument in a call to `Oj.load` where the mode is `:object` (which is
+ * the default), considered a sink for unsafe deserialization.
+ */
+ class UnsafeOjLoadArgument extends Sink {
+ UnsafeOjLoadArgument() {
+ exists(OjLoadCall ojLoad |
+ this = ojLoad.getArgument(0) and
+ // Exclude calls that explicitly pass a safe mode option.
+ not ojLoad.hasExplicitKnownMode(true) and
+ (
+ // Sinks to include:
+ // - Calls with an explicit, unsafe mode option.
+ ojLoad.hasExplicitKnownMode(false)
+ or
+ // - Calls with no explicit mode option, unless there exists a call
+ // anywhere to set the default options to a known safe mode.
+ not ojLoad.hasExplicitKnownMode(_) and
+ not exists(SetOjDefaultOptionsCall setOpts |
+ setOpts.getValue().(OjOptionsHashWithModeKey).hasSafeMode()
+ )
+ )
+ )
+ }
+ }
+
+ /**
+ * `Base64.decode64` propagates taint from its argument to its return value.
+ */
+ predicate base64DecodeTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
+ exists(DataFlow::CallNode callNode |
+ callNode =
+ API::getTopLevelMember("Base64")
+ .getAMethodCall(["decode64", "strict_decode64", "urlsafe_decode64"])
+ |
+ fromNode = callNode.getArgument(0) and
+ toNode = callNode
+ )
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationQuery.qll
new file mode 100644
index 00000000000..d08b73da936
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationQuery.qll
@@ -0,0 +1,34 @@
+/**
+ * Provides a taint-tracking configuration for reasoning about unsafe deserialization.
+ *
+ * Note, for performance reasons: only import this file if
+ * `UnsafeDeserialization::Configuration` is needed, otherwise
+ * `UnsafeDeserializationCustomizations` should be imported instead.
+ */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.TaintTracking
+import UnsafeDeserializationCustomizations
+
+/**
+ * A taint-tracking configuration for reasoning about unsafe deserialization.
+ */
+class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "UnsafeDeserialization" }
+
+ override predicate isSource(DataFlow::Node source) {
+ source instanceof UnsafeDeserialization::Source
+ }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof UnsafeDeserialization::Sink }
+
+ override predicate isSanitizer(DataFlow::Node node) {
+ super.isSanitizer(node) or
+ node instanceof UnsafeDeserialization::Sanitizer
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
+ UnsafeDeserialization::isAdditionalTaintStep(fromNode, toNode)
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectCustomizations.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectCustomizations.qll
new file mode 100644
index 00000000000..caaf2264018
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectCustomizations.qll
@@ -0,0 +1,127 @@
+/**
+ * Provides default sources, sinks and sanitizers for detecting "URL
+ * redirection" vulnerabilities, as well as extension points for adding your
+ * own.
+ */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.Concepts
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.dataflow.BarrierGuards
+
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "URL redirection" vulnerabilities, as well as extension points for
+ * adding your own.
+ */
+module UrlRedirect {
+ /**
+ * A data flow source for "URL redirection" vulnerabilities.
+ */
+ abstract class Source extends DataFlow::Node { }
+
+ /**
+ * A data flow sink for "URL redirection" vulnerabilities.
+ */
+ abstract class Sink extends DataFlow::Node { }
+
+ /**
+ * A sanitizer for "URL redirection" vulnerabilities.
+ */
+ abstract class Sanitizer extends DataFlow::Node { }
+
+ /**
+ * A sanitizer guard for "URL redirection" vulnerabilities.
+ */
+ abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
+
+ /**
+ * Additional taint steps for "URL redirection" vulnerabilities.
+ */
+ predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
+ taintStepViaMethodCallReturnValue(node1, node2)
+ }
+
+ /**
+ * A source of remote user input, considered as a flow source.
+ */
+ class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+
+ /**
+ * A HTTP redirect response, considered as a flow sink.
+ */
+ class RedirectLocationAsSink extends Sink {
+ RedirectLocationAsSink() {
+ exists(HTTP::Server::HttpRedirectResponse e |
+ this = e.getRedirectLocation() and
+ // As a rough heuristic, assume that methods with these names are handlers for POST/PUT/PATCH/DELETE requests,
+ // which are not as vulnerable to URL redirection because browsers will not initiate them from clicking a link.
+ not this.getEnclosingCallable()
+ .asCallable()
+ .(Method)
+ .getName()
+ .regexpMatch(".*(create|update|destroy).*")
+ )
+ }
+ }
+
+ /**
+ * A comparison with a constant string, considered as a sanitizer-guard.
+ */
+ class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
+
+ /**
+ * Some methods will propagate taint to their return values.
+ * Here we cover a few common ones related to `ActionController::Parameters`.
+ * TODO: use ApiGraphs or something to restrict these method calls to the correct receiver, rather
+ * than matching on method name alone.
+ */
+ predicate taintStepViaMethodCallReturnValue(DataFlow::Node node1, DataFlow::Node node2) {
+ exists(MethodCall m | m = node2.asExpr().getExpr() |
+ m.getReceiver() = node1.asExpr().getExpr() and
+ (actionControllerTaintedMethod(m) or hashTaintedMethod(m))
+ )
+ }
+
+ /**
+ * String interpolation is considered safe, provided the string is prefixed by a non-tainted value.
+ * In most cases this will prevent the tainted value from controlling e.g. the host of the URL.
+ *
+ * For example:
+ *
+ * ```ruby
+ * redirect_to "/users/#{params[:key]}" # safe
+ * redirect_to "#{params[:key]}/users" # unsafe
+ * ```
+ *
+ * There are prefixed interpolations that are not safe, e.g.
+ *
+ * ```ruby
+ * redirect_to "foo#{params[:key]}/users" # => "foo-malicious-site.com/users"
+ * ```
+ *
+ * We currently don't catch these cases.
+ */
+ class StringInterpolationAsSanitizer extends Sanitizer {
+ StringInterpolationAsSanitizer() {
+ exists(StringlikeLiteral str, int n | str.getComponent(n) = this.asExpr().getExpr() and n > 0)
+ }
+ }
+
+ /**
+ * These methods return a new `ActionController::Parameters` or a `Hash` containing a subset of
+ * the original values. This may still contain user input, so the results are tainted.
+ * TODO: flesh this out to cover the whole API.
+ */
+ predicate actionControllerTaintedMethod(MethodCall m) {
+ m.getMethodName() in ["to_unsafe_hash", "to_unsafe_h", "permit", "require"]
+ }
+
+ /**
+ * These `Hash` methods preserve taint because they return a new hash which may still contain keys
+ * with user input.
+ * TODO: flesh this out to cover the whole API.
+ */
+ predicate hashTaintedMethod(MethodCall m) { m.getMethodName() in ["merge", "fetch"] }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectQuery.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectQuery.qll
new file mode 100644
index 00000000000..5a984d1fd6e
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/UrlRedirectQuery.qll
@@ -0,0 +1,34 @@
+/**
+ * Provides a taint-tracking configuration for detecting "URL redirection" vulnerabilities.
+ *
+ * Note, for performance reasons: only import this file if `Configuration` is needed,
+ * otherwise `UrlRedirectCustomizations` should be imported instead.
+ */
+
+private import ruby
+import codeql.ruby.DataFlow::DataFlow::PathGraph
+import codeql.ruby.DataFlow
+import codeql.ruby.TaintTracking
+import UrlRedirectCustomizations
+import UrlRedirectCustomizations::UrlRedirect
+
+/**
+ * A taint-tracking configuration for detecting "URL redirection" vulnerabilities.
+ */
+class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "UrlRedirect" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+ override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof SanitizerGuard
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
+ UrlRedirect::isAdditionalTaintStep(node1, node2)
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/XSS.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/XSS.qll
new file mode 100644
index 00000000000..8f8f15b630a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/security/XSS.qll
@@ -0,0 +1,369 @@
+/**
+ * Provides classes and predicates used by the XSS queries.
+ */
+
+private import ruby
+private import codeql.ruby.DataFlow
+private import codeql.ruby.DataFlow2
+private import codeql.ruby.CFG
+private import codeql.ruby.Concepts
+private import codeql.ruby.Frameworks
+private import codeql.ruby.frameworks.ActionController
+private import codeql.ruby.frameworks.ActionView
+private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.dataflow.BarrierGuards
+private import codeql.ruby.dataflow.internal.DataFlowDispatch
+
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "server-side cross-site scripting" vulnerabilities, as well as
+ * extension points for adding your own.
+ */
+private module Shared {
+ /**
+ * A data flow source for "server-side cross-site scripting" vulnerabilities.
+ */
+ abstract class Source extends DataFlow::Node { }
+
+ /**
+ * A data flow sink for "server-side cross-site scripting" vulnerabilities.
+ */
+ abstract class Sink extends DataFlow::Node { }
+
+ /**
+ * A sanitizer for "server-side cross-site scripting" vulnerabilities.
+ */
+ abstract class Sanitizer extends DataFlow::Node { }
+
+ /**
+ * A sanitizer guard for "server-side cross-site scripting" vulnerabilities.
+ */
+ abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
+
+ private class ErbOutputMethodCallArgumentNode extends DataFlow::Node {
+ private MethodCall call;
+
+ ErbOutputMethodCallArgumentNode() {
+ exists(ErbOutputDirective d |
+ call = d.getTerminalStmt() and
+ this.asExpr().getExpr() = call.getAnArgument()
+ )
+ }
+
+ MethodCall getCall() { result = call }
+ }
+
+ /**
+ * An `html_safe` call marking the output as not requiring HTML escaping,
+ * considered as a flow sink.
+ */
+ class HtmlSafeCallAsSink extends Sink {
+ HtmlSafeCallAsSink() {
+ exists(HtmlSafeCall c, ErbOutputDirective d |
+ this.asExpr().getExpr() = c.getReceiver() and
+ c = d.getTerminalStmt()
+ )
+ }
+ }
+
+ /**
+ * An argument to a call to the `raw` method, considered as a flow sink.
+ */
+ class RawCallArgumentAsSink extends Sink, ErbOutputMethodCallArgumentNode {
+ RawCallArgumentAsSink() { this.getCall() instanceof RawCall }
+ }
+
+ /**
+ * A argument to a call to the `link_to` method, which does not expect
+ * unsanitized user-input, considered as a flow sink.
+ */
+ class LinkToCallArgumentAsSink extends Sink, ErbOutputMethodCallArgumentNode {
+ LinkToCallArgumentAsSink() {
+ this.asExpr().getExpr() = this.getCall().(LinkToCall).getPathArgument()
+ }
+ }
+
+ /**
+ * An HTML escaping, considered as a sanitizer.
+ */
+ class HtmlEscapingAsSanitizer extends Sanitizer {
+ HtmlEscapingAsSanitizer() { this = any(HtmlEscaping esc).getOutput() }
+ }
+
+ /**
+ * A comparison with a constant string, considered as a sanitizer-guard.
+ */
+ class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
+
+ /**
+ * An inclusion check against an array of constant strings, considered as a sanitizer-guard.
+ */
+ class StringConstArrayInclusionCallAsSanitizerGuard extends SanitizerGuard,
+ StringConstArrayInclusionCall { }
+
+ /**
+ * A `VariableWriteAccessCfgNode` that is not succeeded (locally) by another
+ * write to that variable.
+ */
+ private class FinalInstanceVarWrite extends CfgNodes::ExprNodes::InstanceVariableWriteAccessCfgNode {
+ private InstanceVariable var;
+
+ FinalInstanceVarWrite() {
+ var = this.getExpr().getVariable() and
+ not exists(CfgNodes::ExprNodes::InstanceVariableWriteAccessCfgNode succWrite |
+ succWrite.getExpr().getVariable() = var
+ |
+ succWrite = this.getASuccessor+()
+ )
+ }
+
+ InstanceVariable getVariable() { result = var }
+
+ AssignExpr getAnAssignExpr() { result.getLeftOperand() = this.getExpr() }
+ }
+
+ /**
+ * Holds if `call` is a method call in ERB file `erb`, targeting a method
+ * named `name`.
+ */
+ pragma[noinline]
+ private predicate isMethodCall(MethodCall call, string name, ErbFile erb) {
+ name = call.getMethodName() and
+ erb = call.getLocation().getFile()
+ }
+
+ /**
+ * Holds if some render call passes `value` for `hashKey` in the `locals`
+ * argument, in ERB file `erb`.
+ */
+ pragma[noinline]
+ private predicate renderCallLocals(string hashKey, Expr value, ErbFile erb) {
+ exists(RenderCall call, Pair kvPair |
+ call.getLocals().getAKeyValuePair() = kvPair and
+ kvPair.getValue() = value and
+ kvPair.getKey().getValueText() = hashKey and
+ call.getTemplateFile() = erb
+ )
+ }
+
+ pragma[noinline]
+ private predicate isFlowFromLocals0(
+ CfgNodes::ExprNodes::ElementReferenceCfgNode refNode, string hashKey, ErbFile erb
+ ) {
+ exists(DataFlow::Node argNode, CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode |
+ argNode.asExpr() = refNode.getArgument(0) and
+ refNode.getReceiver().getExpr().(MethodCall).getMethodName() = "local_assigns" and
+ argNode.getALocalSource() = DataFlow::exprNode(strNode) and
+ strNode.getExpr().getValueText() = hashKey and
+ erb = refNode.getFile()
+ )
+ }
+
+ private predicate isFlowFromLocals(DataFlow::Node node1, DataFlow::Node node2) {
+ exists(string hashKey, ErbFile erb |
+ // node1 is a `locals` argument to a render call...
+ renderCallLocals(hashKey, node1.asExpr().getExpr(), erb)
+ |
+ // node2 is an element reference against `local_assigns`
+ isFlowFromLocals0(node2.asExpr(), hashKey, erb)
+ or
+ // ...node2 is a "method call" to a "method" with `hashKey` as its name
+ // TODO: This may be a variable read in reality that we interpret as a method call
+ isMethodCall(node2.asExpr().getExpr(), hashKey, erb)
+ )
+ }
+
+ /**
+ * Holds if `action` contains an assignment of `value` to an instance
+ * variable named `name`, in ERB file `erb`.
+ */
+ pragma[noinline]
+ private predicate actionAssigns(
+ ActionControllerActionMethod action, string name, Expr value, ErbFile erb
+ ) {
+ exists(AssignExpr ae, FinalInstanceVarWrite controllerVarWrite |
+ action.getDefaultTemplateFile() = erb and
+ ae.getParent+() = action and
+ ae = controllerVarWrite.getAnAssignExpr() and
+ name = controllerVarWrite.getVariable().getName() and
+ value = ae.getRightOperand()
+ )
+ }
+
+ pragma[noinline]
+ private predicate isVariableReadAccess(VariableReadAccess viewVarRead, string name, ErbFile erb) {
+ erb = viewVarRead.getLocation().getFile() and
+ viewVarRead.getVariable().getName() = name
+ }
+
+ private predicate isFlowFromControllerInstanceVariable(DataFlow::Node node1, DataFlow::Node node2) {
+ // instance variables in the controller
+ exists(ActionControllerActionMethod action, string name, ErbFile template |
+ // match read to write on variable name
+ actionAssigns(action, name, node1.asExpr().getExpr(), template) and
+ // propagate taint from assignment RHS expr to variable read access in view
+ isVariableReadAccess(node2.asExpr().getExpr(), name, template)
+ )
+ }
+
+ /**
+ * Holds if `helperMethod` is a helper method named `name` that is associated
+ * with ERB file `erb`.
+ */
+ pragma[noinline]
+ private predicate isHelperMethod(
+ ActionControllerHelperMethod helperMethod, string name, ErbFile erb
+ ) {
+ helperMethod.getName() = name and
+ helperMethod.getControllerClass() = getAssociatedControllerClass(erb)
+ }
+
+ private predicate isFlowIntoHelperMethod(DataFlow::Node node1, DataFlow::Node node2) {
+ // flow from template into controller helper method
+ exists(
+ ErbFile template, ActionControllerHelperMethod helperMethod, string name,
+ CfgNodes::ExprNodes::MethodCallCfgNode helperMethodCall, int argIdx
+ |
+ isHelperMethod(helperMethod, name, template) and
+ isMethodCall(helperMethodCall.getExpr(), name, template) and
+ helperMethodCall.getArgument(pragma[only_bind_into](argIdx)) = node1.asExpr() and
+ helperMethod.getParameter(pragma[only_bind_into](argIdx)) = node2.asExpr().getExpr()
+ )
+ }
+
+ private predicate isFlowFromHelperMethod(DataFlow::Node node1, DataFlow::Node node2) {
+ // flow out of controller helper method into template
+ exists(ErbFile template, ActionControllerHelperMethod helperMethod, string name |
+ // `node1` is an expr node that may be returned by the helper method
+ exprNodeReturnedFrom(node1, helperMethod) and
+ // `node2` is a call to the helper method
+ isHelperMethod(helperMethod, name, template) and
+ isMethodCall(node2.asExpr().getExpr(), name, template)
+ )
+ }
+
+ /**
+ * An additional step that is preserves dataflow in the context of XSS.
+ */
+ predicate isAdditionalXSSFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ isFlowFromLocals(node1, node2)
+ or
+ isFlowFromControllerInstanceVariable(node1, node2)
+ or
+ isFlowIntoHelperMethod(node1, node2)
+ or
+ isFlowFromHelperMethod(node1, node2)
+ }
+}
+
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "reflected cross-site scripting" vulnerabilities, as well as
+ * extension points for adding your own.
+ */
+module ReflectedXSS {
+ /** A data flow source for stored XSS vulnerabilities. */
+ abstract class Source extends Shared::Source { }
+
+ /** A data flow sink for stored XSS vulnerabilities. */
+ abstract class Sink extends Shared::Sink { }
+
+ /** A sanitizer for stored XSS vulnerabilities. */
+ abstract class Sanitizer extends Shared::Sanitizer { }
+
+ /** A sanitizer guard for stored XSS vulnerabilities. */
+ abstract class SanitizerGuard extends Shared::SanitizerGuard { }
+
+ // Consider all arbitrary XSS sinks to be reflected XSS sinks
+ private class AnySink extends Sink instanceof Shared::Sink { }
+
+ // Consider all arbitrary XSS sanitizers to be reflected XSS sanitizers
+ private class AnySanitizer extends Sanitizer instanceof Shared::Sanitizer { }
+
+ // Consider all arbitrary XSS sanitizer guards to be reflected XSS sanitizer guards
+ private class AnySanitizerGuard extends SanitizerGuard instanceof Shared::SanitizerGuard {
+ override predicate checks(CfgNode expr, boolean branch) {
+ Shared::SanitizerGuard.super.checks(expr, branch)
+ }
+ }
+
+ /**
+ * An additional step that is preserves dataflow in the context of reflected XSS.
+ */
+ predicate isAdditionalXSSTaintStep = Shared::isAdditionalXSSFlowStep/2;
+
+ /**
+ * A source of remote user input, considered as a flow source.
+ */
+ class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+}
+
+private module OrmTracking {
+ /**
+ * A data flow configuration to track flow from finder calls to field accesses.
+ */
+ class Configuration extends DataFlow2::Configuration {
+ Configuration() { this = "OrmTracking" }
+
+ override predicate isSource(DataFlow2::Node source) { source instanceof OrmInstantiation }
+
+ // Select any call node and narrow down later
+ override predicate isSink(DataFlow2::Node sink) { sink instanceof DataFlow2::CallNode }
+
+ override predicate isAdditionalFlowStep(DataFlow2::Node node1, DataFlow2::Node node2) {
+ Shared::isAdditionalXSSFlowStep(node1, node2)
+ or
+ // Propagate flow through arbitrary method calls
+ node2.(DataFlow2::CallNode).getReceiver() = node1
+ or
+ // Propagate flow through "or" expressions `or`/`||`
+ node2.asExpr().getExpr().(LogicalOrExpr).getAnOperand() = node1.asExpr().getExpr()
+ }
+ }
+}
+
+module StoredXSS {
+ /** A data flow source for stored XSS vulnerabilities. */
+ abstract class Source extends Shared::Source { }
+
+ /** A data flow sink for stored XSS vulnerabilities. */
+ abstract class Sink extends Shared::Sink { }
+
+ /** A sanitizer for stored XSS vulnerabilities. */
+ abstract class Sanitizer extends Shared::Sanitizer { }
+
+ /** A sanitizer guard for stored XSS vulnerabilities. */
+ abstract class SanitizerGuard extends Shared::SanitizerGuard { }
+
+ // Consider all arbitrary XSS sinks to be stored XSS sinks
+ private class AnySink extends Sink instanceof Shared::Sink { }
+
+ // Consider all arbitrary XSS sanitizers to be stored XSS sanitizers
+ private class AnySanitizer extends Sanitizer instanceof Shared::Sanitizer { }
+
+ // Consider all arbitrary XSS sanitizer guards to be stored XSS sanitizer guards
+ private class AnySanitizerGuard extends SanitizerGuard instanceof Shared::SanitizerGuard {
+ override predicate checks(CfgNode expr, boolean branch) {
+ Shared::SanitizerGuard.super.checks(expr, branch)
+ }
+ }
+
+ /**
+ * An additional step that preserves dataflow in the context of stored XSS.
+ */
+ predicate isAdditionalXSSTaintStep = Shared::isAdditionalXSSFlowStep/2;
+
+ private class OrmFieldAsSource extends Source instanceof DataFlow2::CallNode {
+ OrmFieldAsSource() {
+ exists(OrmTracking::Configuration subConfig, DataFlow2::CallNode subSrc, MethodCall call |
+ subConfig.hasFlow(subSrc, this) and
+ call = this.asExpr().getExpr() and
+ subSrc.(OrmInstantiation).methodCallMayAccessField(call.getMethodName())
+ )
+ }
+ }
+
+ /** A file read, considered as a flow source for stored XSS. */
+ private class FileSystemReadAccessAsSource extends Source instanceof FileSystemReadAccess { }
+ // TODO: Consider `FileNameSource` flowing to script tag `src` attributes and similar
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll
new file mode 100644
index 00000000000..6ced6a8206e
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll
@@ -0,0 +1,470 @@
+/** Step Summaries and Type Tracking */
+
+private import TypeTrackerSpecific
+
+/**
+ * Any string that may appear as the name of a piece of content. This will usually include things like:
+ * - Attribute names (in Python)
+ * - Property names (in JavaScript)
+ *
+ * In general, this can also be used to model things like stores to specific list indices. To ensure
+ * correctness, it is important that
+ *
+ * - different types of content do not have overlapping names, and
+ * - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
+ * content instead.
+ */
+class ContentName extends string {
+ ContentName() { this = getPossibleContentName() }
+}
+
+/** Either a content name, or the empty string (representing no content). */
+class OptionalContentName extends string {
+ OptionalContentName() { this instanceof ContentName or this = "" }
+}
+
+cached
+private module Cached {
+ /**
+ * A description of a step on an inter-procedural data flow path.
+ */
+ cached
+ newtype TStepSummary =
+ LevelStep() or
+ CallStep() or
+ ReturnStep() or
+ StoreStep(ContentName content) or
+ LoadStep(ContentName content)
+
+ /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
+ cached
+ TypeTracker append(TypeTracker tt, StepSummary step) {
+ exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) |
+ step = LevelStep() and result = tt
+ or
+ step = CallStep() and result = MkTypeTracker(true, content)
+ or
+ step = ReturnStep() and hasCall = false and result = tt
+ or
+ step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
+ or
+ exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
+ )
+ }
+
+ /**
+ * Gets the summary that corresponds to having taken a forwards
+ * heap and/or intra-procedural step from `nodeFrom` to `nodeTo`.
+ *
+ * Steps contained in this predicate should _not_ depend on the call graph.
+ */
+ cached
+ predicate stepNoCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
+ exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
+ }
+
+ /**
+ * Gets the summary that corresponds to having taken a forwards
+ * inter-procedural step from `nodeFrom` to `nodeTo`.
+ */
+ cached
+ predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
+ exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
+ }
+}
+
+private import Cached
+
+/**
+ * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
+ *
+ * A description of a step on an inter-procedural data flow path.
+ */
+class StepSummary extends TStepSummary {
+ /** Gets a textual representation of this step summary. */
+ string toString() {
+ this instanceof LevelStep and result = "level"
+ or
+ this instanceof CallStep and result = "call"
+ or
+ this instanceof ReturnStep and result = "return"
+ or
+ exists(string content | this = StoreStep(content) | result = "store " + content)
+ or
+ exists(string content | this = LoadStep(content) | result = "load " + content)
+ }
+}
+
+pragma[noinline]
+private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
+ jumpStep(nodeFrom, nodeTo) and
+ summary = LevelStep()
+ or
+ exists(string content |
+ StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
+ summary = StoreStep(content)
+ or
+ basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
+ )
+}
+
+pragma[noinline]
+private predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
+ callStep(nodeFrom, nodeTo) and summary = CallStep()
+ or
+ returnStep(nodeFrom, nodeTo) and
+ summary = ReturnStep()
+}
+
+/** Provides predicates for updating step summaries (`StepSummary`s). */
+module StepSummary {
+ /**
+ * Gets the summary that corresponds to having taken a forwards
+ * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+ *
+ * This predicate is inlined, which enables better join-orders when
+ * the call graph construction and type tracking are mutually recursive.
+ * In such cases, non-linear recursion involving `step` will be limited
+ * to non-linear recursion for the parts of `step` that involve the
+ * call graph.
+ */
+ pragma[inline]
+ predicate step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
+ stepNoCall(nodeFrom, nodeTo, summary)
+ or
+ stepCall(nodeFrom, nodeTo, summary)
+ }
+
+ /**
+ * Gets the summary that corresponds to having taken a forwards
+ * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+ *
+ * Unlike `StepSummary::step`, this predicate does not compress
+ * type-preserving steps.
+ */
+ pragma[inline]
+ predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
+ smallstepNoCall(nodeFrom, nodeTo, summary)
+ or
+ smallstepCall(nodeFrom, nodeTo, summary)
+ }
+
+ /**
+ * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
+ *
+ * Note that `nodeTo` will always be a local source node that flows to the place where the content
+ * is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
+ * from the point of view of the execution of the program.
+ *
+ * For instance, if we interpret attribute writes in Python as writing to content with the same
+ * name as the attribute and consider the following snippet
+ *
+ * ```python
+ * def foo(y):
+ * x = Foo()
+ * bar(x)
+ * x.attr = y
+ * baz(x)
+ *
+ * def bar(x):
+ * z = x.attr
+ * ```
+ * for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
+ * `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
+ * function. This means we will track the fact that `x.attr` can have the type of `y` into the
+ * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
+ */
+ predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, string content) {
+ exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
+ }
+}
+
+private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to track a value to a given dataflow node.
+ *
+ * This can be used to track objects that implement a certain API in order to
+ * recognize calls to that API. Note that type-tracking does not by itself provide a
+ * source/sink relation, that is, it may determine that a node has a given type,
+ * but it won't determine where that type came from.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for tracking some type `myType`:
+ * ```ql
+ * DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
+ * t.start() and
+ * result = < source of myType >
+ * or
+ * exists (DataFlow::TypeTracker t2 |
+ * result = myType(t2).track(t2, t)
+ * )
+ * }
+ *
+ * DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
+ * ```
+ *
+ * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
+ * `t = t2.step(myType(t2), result)`. If you additionally want to track individual
+ * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
+ */
+class TypeTracker extends TTypeTracker {
+ Boolean hasCall;
+ OptionalContentName content;
+
+ TypeTracker() { this = MkTypeTracker(hasCall, content) }
+
+ /** Gets the summary resulting from appending `step` to this type-tracking summary. */
+ TypeTracker append(StepSummary step) { result = append(this, step) }
+
+ /** Gets a textual representation of this summary. */
+ string toString() {
+ exists(string withCall, string withContent |
+ (if hasCall = true then withCall = "with" else withCall = "without") and
+ (if content != "" then withContent = " with content " + content else withContent = "") and
+ result = "type tracker " + withCall + " call steps" + withContent
+ )
+ }
+
+ /**
+ * Holds if this is the starting point of type tracking.
+ */
+ predicate start() { hasCall = false and content = "" }
+
+ /**
+ * Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
+ * The type tracking only ends after the content has been loaded.
+ */
+ predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
+
+ /**
+ * Holds if this is the starting point of type tracking
+ * when tracking a parameter into a call, but not out of it.
+ */
+ predicate call() { hasCall = true and content = "" }
+
+ /**
+ * Holds if this is the end point of type tracking.
+ */
+ predicate end() { content = "" }
+
+ /**
+ * INTERNAL. DO NOT USE.
+ *
+ * Holds if this type has been tracked into a call.
+ */
+ boolean hasCall() { result = hasCall }
+
+ /**
+ * INTERNAL. DO NOT USE.
+ *
+ * Gets the content associated with this type tracker.
+ */
+ string getContent() { result = content }
+
+ /**
+ * Gets a type tracker that starts where this one has left off to allow continued
+ * tracking.
+ *
+ * This predicate is only defined if the type is not associated to a piece of content.
+ */
+ TypeTracker continue() { content = "" and result = this }
+
+ /**
+ * Gets the summary that corresponds to having taken a forwards
+ * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+ */
+ pragma[inline]
+ TypeTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
+ exists(StepSummary summary |
+ StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
+ result = this.append(pragma[only_bind_into](summary))
+ )
+ }
+
+ /**
+ * Gets the summary that corresponds to having taken a forwards
+ * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+ *
+ * Unlike `TypeTracker::step`, this predicate exposes all edges
+ * in the flow graph, and not just the edges between `Node`s.
+ * It may therefore be less performant.
+ *
+ * Type tracking predicates using small steps typically take the following form:
+ * ```ql
+ * DataFlow::Node myType(DataFlow::TypeTracker t) {
+ * t.start() and
+ * result = < source of myType >
+ * or
+ * exists (DataFlow::TypeTracker t2 |
+ * t = t2.smallstep(myType(t2), result)
+ * )
+ * }
+ *
+ * DataFlow::Node myType() {
+ * result = myType(DataFlow::TypeTracker::end())
+ * }
+ * ```
+ */
+ pragma[inline]
+ TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
+ exists(StepSummary summary |
+ StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+ result = this.append(summary)
+ )
+ or
+ simpleLocalFlowStep(nodeFrom, nodeTo) and
+ result = this
+ }
+}
+
+/** Provides predicates for implementing custom `TypeTracker`s. */
+module TypeTracker {
+ /**
+ * Gets a valid end point of type tracking.
+ */
+ TypeTracker end() { result.end() }
+}
+
+private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to back-track a use of a value to a given dataflow node.
+ *
+ * This can for example be used to track callbacks that are passed to a certain API,
+ * so we can model specific parameters of that callback as having a certain type.
+ *
+ * Note that type back-tracking does not provide a source/sink relation, that is,
+ * it may determine that a node will be used in an API call somewhere, but it won't
+ * determine exactly where that use was, or the path that led to the use.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for back-tracking some callback type `myCallback`:
+ *
+ * ```ql
+ * DataFlow::TypeTrackingNode myCallback(DataFlow::TypeBackTracker t) {
+ * t.start() and
+ * result = (< some API call >).getArgument(< n >).getALocalSource()
+ * or
+ * exists (DataFlow::TypeBackTracker t2 |
+ * result = myCallback(t2).backtrack(t2, t)
+ * )
+ * }
+ *
+ * DataFlow::TypeTrackingNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
+ * ```
+ *
+ * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
+ * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
+ * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
+ */
+class TypeBackTracker extends TTypeBackTracker {
+ Boolean hasReturn;
+ string content;
+
+ TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
+
+ /** Gets the summary resulting from prepending `step` to this type-tracking summary. */
+ TypeBackTracker prepend(StepSummary step) {
+ step = LevelStep() and result = this
+ or
+ step = CallStep() and hasReturn = false and result = this
+ or
+ step = ReturnStep() and result = MkTypeBackTracker(true, content)
+ or
+ exists(string p |
+ step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
+ )
+ or
+ step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
+ }
+
+ /** Gets a textual representation of this summary. */
+ string toString() {
+ exists(string withReturn, string withContent |
+ (if hasReturn = true then withReturn = "with" else withReturn = "without") and
+ (if content != "" then withContent = " with content " + content else withContent = "") and
+ result = "type back-tracker " + withReturn + " return steps" + withContent
+ )
+ }
+
+ /**
+ * Holds if this is the starting point of type tracking.
+ */
+ predicate start() { hasReturn = false and content = "" }
+
+ /**
+ * Holds if this is the end point of type tracking.
+ */
+ predicate end() { content = "" }
+
+ /**
+ * INTERNAL. DO NOT USE.
+ *
+ * Holds if this type has been back-tracked into a call through return edge.
+ */
+ boolean hasReturn() { result = hasReturn }
+
+ /**
+ * Gets a type tracker that starts where this one has left off to allow continued
+ * tracking.
+ *
+ * This predicate is only defined if the type has not been tracked into a piece of content.
+ */
+ TypeBackTracker continue() { content = "" and result = this }
+
+ /**
+ * Gets the summary that corresponds to having taken a backwards
+ * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+ */
+ pragma[inline]
+ TypeBackTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
+ exists(StepSummary summary |
+ StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
+ this = result.prepend(pragma[only_bind_into](summary))
+ )
+ }
+
+ /**
+ * Gets the summary that corresponds to having taken a backwards
+ * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+ *
+ * Unlike `TypeBackTracker::step`, this predicate exposes all edges
+ * in the flowgraph, and not just the edges between
+ * `TypeTrackingNode`s. It may therefore be less performant.
+ *
+ * Type tracking predicates using small steps typically take the following form:
+ * ```ql
+ * DataFlow::Node myType(DataFlow::TypeBackTracker t) {
+ * t.start() and
+ * result = < some API call >.getArgument(< n >)
+ * or
+ * exists (DataFlow::TypeBackTracker t2 |
+ * t = t2.smallstep(result, myType(t2))
+ * )
+ * }
+ *
+ * DataFlow::Node myType() {
+ * result = myType(DataFlow::TypeBackTracker::end())
+ * }
+ * ```
+ */
+ pragma[inline]
+ TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
+ exists(StepSummary summary |
+ StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+ this = result.prepend(summary)
+ )
+ or
+ simpleLocalFlowStep(nodeFrom, nodeTo) and
+ this = result
+ }
+}
+
+/** Provides predicates for implementing custom `TypeBackTracker`s. */
+module TypeBackTracker {
+ /**
+ * Gets a valid end point of type back-tracking.
+ */
+ TypeBackTracker end() { result.end() }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll
new file mode 100644
index 00000000000..40beb734d37
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll
@@ -0,0 +1,146 @@
+private import codeql.ruby.AST as AST
+private import codeql.ruby.CFG as CFG
+private import CFG::CfgNodes
+private import codeql.ruby.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
+private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlowPublic
+private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
+private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch
+private import codeql.ruby.dataflow.internal.SsaImpl as SsaImpl
+
+class Node = DataFlowPublic::Node;
+
+class TypeTrackingNode = DataFlowPublic::LocalSourceNode;
+
+predicate simpleLocalFlowStep = DataFlowPrivate::localFlowStepTypeTracker/2;
+
+predicate jumpStep = DataFlowPrivate::jumpStep/2;
+
+/**
+ * Gets the name of a possible piece of content. This will usually include things like
+ *
+ * - Attribute names (in Python)
+ * - Property names (in JavaScript)
+ */
+string getPossibleContentName() { result = getSetterCallAttributeName(_) }
+
+/**
+ * Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
+ *
+ * Flow into summarized library methods is not included, as that will lead to negative
+ * recursion (or, at best, terrible performance), since identifying calls to library
+ * methods is done using API graphs (which uses type tracking).
+ */
+predicate callStep(Node nodeFrom, Node nodeTo) {
+ exists(ExprNodes::CallCfgNode call, CFG::CfgScope callable, int i |
+ DataFlowDispatch::getTarget(call) = callable and
+ nodeFrom.(DataFlowPrivate::ArgumentNode).sourceArgumentOf(call, i) and
+ nodeTo.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(callable, i)
+ )
+ or
+ // In normal data-flow, this will be a local flow step. But for type tracking
+ // we model it as a call step, in order to avoid computing a potential
+ // self-cross product of all calls to a function that returns one of its parameters
+ // (only to later filter that flow out using `TypeTracker::append`).
+ nodeTo =
+ DataFlowPrivate::LocalFlow::getParameterDefNode(nodeFrom
+ .(DataFlowPublic::ParameterNode)
+ .getParameter())
+}
+
+/**
+ * Holds if `nodeFrom` steps to `nodeTo` by being returned from a call.
+ *
+ * Flow out of summarized library methods is not included, as that will lead to negative
+ * recursion (or, at best, terrible performance), since identifying calls to library
+ * methods is done using API graphs (which uses type tracking).
+ */
+predicate returnStep(Node nodeFrom, Node nodeTo) {
+ exists(ExprNodes::CallCfgNode call |
+ nodeFrom instanceof DataFlowPrivate::ReturnNode and
+ nodeFrom.(DataFlowPrivate::NodeImpl).getCfgScope() = DataFlowDispatch::getTarget(call) and
+ nodeTo.asExpr().getNode() = call.getNode()
+ )
+ or
+ // In normal data-flow, this will be a local flow step. But for type tracking
+ // we model it as a returning flow step, in order to avoid computing a potential
+ // self-cross product of all calls to a function that returns one of its parameters
+ // (only to later filter that flow out using `TypeTracker::append`).
+ nodeTo.(DataFlowPrivate::SynthReturnNode).getAnInput() = nodeFrom
+}
+
+/**
+ * Holds if `nodeFrom` is being written to the `content` content of the object
+ * in `nodeTo`.
+ *
+ * Note that the choice of `nodeTo` does not have to make sense
+ * "chronologically". All we care about is whether the `content` content of
+ * `nodeTo` can have a specific type, and the assumption is that if a specific
+ * type appears here, then any access of that particular content can yield
+ * something of that particular type.
+ *
+ * Thus, in an example such as
+ *
+ * ```rb
+ * def foo(y)
+ * x = Foo.new
+ * bar(x)
+ * x.content = y
+ * baz(x)
+ * end
+ *
+ * def bar(x)
+ * z = x.content
+ * end
+ * ```
+ * for the content write `x.content = y`, we will have `content` being the
+ * literal string `"content"`, `nodeFrom` will be `y`, and `nodeTo` will be the
+ * `Foo` object created on the first line of the function. This means we will
+ * track the fact that `x.content` can have the type of `y` into the assignment
+ * to `z` inside `bar`, even though this content write happens _after_ `bar` is
+ * called.
+ */
+predicate basicStoreStep(Node nodeFrom, DataFlowPublic::LocalSourceNode nodeTo, string content) {
+ // TODO: support SetterMethodCall inside TuplePattern
+ exists(ExprNodes::MethodCallCfgNode call |
+ content = getSetterCallAttributeName(call.getExpr()) and
+ nodeTo.(DataFlowPublic::ExprNode).getExprNode() = call.getReceiver() and
+ call.getExpr() instanceof AST::SetterMethodCall and
+ call.getArgument(call.getNumberOfArguments() - 1) =
+ nodeFrom.(DataFlowPublic::ExprNode).getExprNode()
+ )
+}
+
+/**
+ * Returns the name of the attribute being set by the setter method call, i.e.
+ * the name of the setter method without the trailing `=`. In the following
+ * example, the result is `"bar"`.
+ *
+ * ```rb
+ * foo.bar = 1
+ * ```
+ */
+private string getSetterCallAttributeName(AST::SetterMethodCall call) {
+ // TODO: this should be exposed in `SetterMethodCall`
+ exists(string setterName |
+ setterName = call.getMethodName() and result = setterName.prefix(setterName.length() - 1)
+ )
+}
+
+/**
+ * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
+ */
+predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
+ exists(ExprNodes::MethodCallCfgNode call |
+ call.getExpr().getNumberOfArguments() = 0 and
+ content = call.getExpr().(AST::MethodCall).getMethodName() and
+ nodeFrom.asExpr() = call.getReceiver() and
+ nodeTo.asExpr() = call
+ )
+}
+
+/**
+ * A utility class that is equivalent to `boolean` but does not require type joining.
+ */
+class Boolean extends boolean {
+ Boolean() { this = true or this = false }
+}
diff --git a/repo-tests/codeql-ruby/ql/lib/qlpack.yml b/repo-tests/codeql-ruby/ql/lib/qlpack.yml
new file mode 100644
index 00000000000..91f40532fc9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/qlpack.yml
@@ -0,0 +1,6 @@
+name: codeql/ruby-all
+version: 0.0.2
+extractor: ruby
+dbscheme: ruby.dbscheme
+upgrades: upgrades
+library: true
diff --git a/repo-tests/codeql-ruby/ql/lib/ruby.dbscheme b/repo-tests/codeql-ruby/ql/lib/ruby.dbscheme
new file mode 100644
index 00000000000..f36dd8a35ce
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/ruby.dbscheme
@@ -0,0 +1,1318 @@
+// CodeQL database schema for Ruby
+// Automatically generated from the tree-sitter grammar; do not edit
+
+@location = @location_default
+
+locations_default(
+ unique int id: @location_default,
+ int file: @file ref,
+ int start_line: int ref,
+ int start_column: int ref,
+ int end_line: int ref,
+ int end_column: int ref
+);
+
+files(
+ unique int id: @file,
+ string name: string ref
+);
+
+folders(
+ unique int id: @folder,
+ string name: string ref
+);
+
+@container = @file | @folder
+
+containerparent(
+ int parent: @container ref,
+ unique int child: @container ref
+);
+
+sourceLocationPrefix(
+ string prefix: string ref
+);
+
+diagnostics(
+ unique int id: @diagnostic,
+ int severity: int ref,
+ string error_tag: string ref,
+ string error_message: string ref,
+ string full_error_message: string ref,
+ int location: @location_default ref
+);
+
+case @diagnostic.severity of
+ 10 = @diagnostic_debug
+| 20 = @diagnostic_info
+| 30 = @diagnostic_warning
+| 40 = @diagnostic_error
+;
+
+
+@ruby_underscore_arg = @ruby_assignment | @ruby_binary | @ruby_conditional | @ruby_operator_assignment | @ruby_range | @ruby_unary | @ruby_underscore_primary
+
+@ruby_underscore_lhs = @ruby_call | @ruby_element_reference | @ruby_scope_resolution | @ruby_token_false | @ruby_token_nil | @ruby_token_true | @ruby_underscore_variable
+
+@ruby_underscore_method_name = @ruby_delimited_symbol | @ruby_setter | @ruby_token_class_variable | @ruby_token_constant | @ruby_token_global_variable | @ruby_token_identifier | @ruby_token_instance_variable | @ruby_token_operator | @ruby_token_simple_symbol
+
+@ruby_underscore_primary = @ruby_array | @ruby_begin | @ruby_break | @ruby_case__ | @ruby_chained_string | @ruby_class | @ruby_delimited_symbol | @ruby_for | @ruby_hash | @ruby_if | @ruby_lambda | @ruby_method | @ruby_module | @ruby_next | @ruby_parenthesized_statements | @ruby_rational | @ruby_redo | @ruby_regex | @ruby_retry | @ruby_return | @ruby_singleton_class | @ruby_singleton_method | @ruby_string__ | @ruby_string_array | @ruby_subshell | @ruby_symbol_array | @ruby_token_character | @ruby_token_complex | @ruby_token_float | @ruby_token_heredoc_beginning | @ruby_token_integer | @ruby_token_simple_symbol | @ruby_unary | @ruby_underscore_lhs | @ruby_unless | @ruby_until | @ruby_while | @ruby_yield
+
+@ruby_underscore_statement = @ruby_alias | @ruby_assignment | @ruby_begin_block | @ruby_binary | @ruby_break | @ruby_call | @ruby_end_block | @ruby_if_modifier | @ruby_next | @ruby_operator_assignment | @ruby_rescue_modifier | @ruby_return | @ruby_unary | @ruby_undef | @ruby_underscore_arg | @ruby_unless_modifier | @ruby_until_modifier | @ruby_while_modifier | @ruby_yield
+
+@ruby_underscore_variable = @ruby_token_class_variable | @ruby_token_constant | @ruby_token_global_variable | @ruby_token_identifier | @ruby_token_instance_variable | @ruby_token_self | @ruby_token_super
+
+ruby_alias_def(
+ unique int id: @ruby_alias,
+ int alias: @ruby_underscore_method_name ref,
+ int name: @ruby_underscore_method_name ref,
+ int loc: @location ref
+);
+
+@ruby_argument_list_child_type = @ruby_block_argument | @ruby_break | @ruby_call | @ruby_hash_splat_argument | @ruby_next | @ruby_pair | @ruby_return | @ruby_splat_argument | @ruby_token_forward_argument | @ruby_underscore_arg | @ruby_yield
+
+#keyset[ruby_argument_list, index]
+ruby_argument_list_child(
+ int ruby_argument_list: @ruby_argument_list ref,
+ int index: int ref,
+ unique int child: @ruby_argument_list_child_type ref
+);
+
+ruby_argument_list_def(
+ unique int id: @ruby_argument_list,
+ int loc: @location ref
+);
+
+@ruby_array_child_type = @ruby_block_argument | @ruby_break | @ruby_call | @ruby_hash_splat_argument | @ruby_next | @ruby_pair | @ruby_return | @ruby_splat_argument | @ruby_token_forward_argument | @ruby_underscore_arg | @ruby_yield
+
+#keyset[ruby_array, index]
+ruby_array_child(
+ int ruby_array: @ruby_array ref,
+ int index: int ref,
+ unique int child: @ruby_array_child_type ref
+);
+
+ruby_array_def(
+ unique int id: @ruby_array,
+ int loc: @location ref
+);
+
+@ruby_assignment_left_type = @ruby_left_assignment_list | @ruby_underscore_lhs
+
+@ruby_assignment_right_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_right_assignment_list | @ruby_splat_argument | @ruby_underscore_arg | @ruby_yield
+
+ruby_assignment_def(
+ unique int id: @ruby_assignment,
+ int left: @ruby_assignment_left_type ref,
+ int right: @ruby_assignment_right_type ref,
+ int loc: @location ref
+);
+
+@ruby_bare_string_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content
+
+#keyset[ruby_bare_string, index]
+ruby_bare_string_child(
+ int ruby_bare_string: @ruby_bare_string ref,
+ int index: int ref,
+ unique int child: @ruby_bare_string_child_type ref
+);
+
+ruby_bare_string_def(
+ unique int id: @ruby_bare_string,
+ int loc: @location ref
+);
+
+@ruby_bare_symbol_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content
+
+#keyset[ruby_bare_symbol, index]
+ruby_bare_symbol_child(
+ int ruby_bare_symbol: @ruby_bare_symbol ref,
+ int index: int ref,
+ unique int child: @ruby_bare_symbol_child_type ref
+);
+
+ruby_bare_symbol_def(
+ unique int id: @ruby_bare_symbol,
+ int loc: @location ref
+);
+
+@ruby_begin_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_begin, index]
+ruby_begin_child(
+ int ruby_begin: @ruby_begin ref,
+ int index: int ref,
+ unique int child: @ruby_begin_child_type ref
+);
+
+ruby_begin_def(
+ unique int id: @ruby_begin,
+ int loc: @location ref
+);
+
+@ruby_begin_block_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_begin_block, index]
+ruby_begin_block_child(
+ int ruby_begin_block: @ruby_begin_block ref,
+ int index: int ref,
+ unique int child: @ruby_begin_block_child_type ref
+);
+
+ruby_begin_block_def(
+ unique int id: @ruby_begin_block,
+ int loc: @location ref
+);
+
+@ruby_binary_left_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+case @ruby_binary.operator of
+ 0 = @ruby_binary_bangequal
+| 1 = @ruby_binary_bangtilde
+| 2 = @ruby_binary_percent
+| 3 = @ruby_binary_ampersand
+| 4 = @ruby_binary_ampersandampersand
+| 5 = @ruby_binary_star
+| 6 = @ruby_binary_starstar
+| 7 = @ruby_binary_plus
+| 8 = @ruby_binary_minus
+| 9 = @ruby_binary_slash
+| 10 = @ruby_binary_langle
+| 11 = @ruby_binary_langlelangle
+| 12 = @ruby_binary_langleequal
+| 13 = @ruby_binary_langleequalrangle
+| 14 = @ruby_binary_equalequal
+| 15 = @ruby_binary_equalequalequal
+| 16 = @ruby_binary_equaltilde
+| 17 = @ruby_binary_rangle
+| 18 = @ruby_binary_rangleequal
+| 19 = @ruby_binary_ranglerangle
+| 20 = @ruby_binary_caret
+| 21 = @ruby_binary_and
+| 22 = @ruby_binary_or
+| 23 = @ruby_binary_pipe
+| 24 = @ruby_binary_pipepipe
+;
+
+
+@ruby_binary_right_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_binary_def(
+ unique int id: @ruby_binary,
+ int left: @ruby_binary_left_type ref,
+ int operator: int ref,
+ int right: @ruby_binary_right_type ref,
+ int loc: @location ref
+);
+
+ruby_block_parameters(
+ unique int ruby_block: @ruby_block ref,
+ unique int parameters: @ruby_block_parameters ref
+);
+
+@ruby_block_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_block, index]
+ruby_block_child(
+ int ruby_block: @ruby_block ref,
+ int index: int ref,
+ unique int child: @ruby_block_child_type ref
+);
+
+ruby_block_def(
+ unique int id: @ruby_block,
+ int loc: @location ref
+);
+
+ruby_block_argument_def(
+ unique int id: @ruby_block_argument,
+ int child: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+ruby_block_parameter_def(
+ unique int id: @ruby_block_parameter,
+ int name: @ruby_token_identifier ref,
+ int loc: @location ref
+);
+
+@ruby_block_parameters_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier
+
+#keyset[ruby_block_parameters, index]
+ruby_block_parameters_child(
+ int ruby_block_parameters: @ruby_block_parameters ref,
+ int index: int ref,
+ unique int child: @ruby_block_parameters_child_type ref
+);
+
+ruby_block_parameters_def(
+ unique int id: @ruby_block_parameters,
+ int loc: @location ref
+);
+
+ruby_break_child(
+ unique int ruby_break: @ruby_break ref,
+ unique int child: @ruby_argument_list ref
+);
+
+ruby_break_def(
+ unique int id: @ruby_break,
+ int loc: @location ref
+);
+
+ruby_call_arguments(
+ unique int ruby_call: @ruby_call ref,
+ unique int arguments: @ruby_argument_list ref
+);
+
+@ruby_call_block_type = @ruby_block | @ruby_do_block
+
+ruby_call_block(
+ unique int ruby_call: @ruby_call ref,
+ unique int block: @ruby_call_block_type ref
+);
+
+@ruby_call_method_type = @ruby_argument_list | @ruby_scope_resolution | @ruby_token_operator | @ruby_underscore_variable
+
+@ruby_call_receiver_type = @ruby_call | @ruby_underscore_primary
+
+ruby_call_receiver(
+ unique int ruby_call: @ruby_call ref,
+ unique int receiver: @ruby_call_receiver_type ref
+);
+
+ruby_call_def(
+ unique int id: @ruby_call,
+ int method: @ruby_call_method_type ref,
+ int loc: @location ref
+);
+
+ruby_case_value(
+ unique int ruby_case__: @ruby_case__ ref,
+ unique int value: @ruby_underscore_statement ref
+);
+
+@ruby_case_child_type = @ruby_else | @ruby_when
+
+#keyset[ruby_case__, index]
+ruby_case_child(
+ int ruby_case__: @ruby_case__ ref,
+ int index: int ref,
+ unique int child: @ruby_case_child_type ref
+);
+
+ruby_case_def(
+ unique int id: @ruby_case__,
+ int loc: @location ref
+);
+
+#keyset[ruby_chained_string, index]
+ruby_chained_string_child(
+ int ruby_chained_string: @ruby_chained_string ref,
+ int index: int ref,
+ unique int child: @ruby_string__ ref
+);
+
+ruby_chained_string_def(
+ unique int id: @ruby_chained_string,
+ int loc: @location ref
+);
+
+@ruby_class_name_type = @ruby_scope_resolution | @ruby_token_constant
+
+ruby_class_superclass(
+ unique int ruby_class: @ruby_class ref,
+ unique int superclass: @ruby_superclass ref
+);
+
+@ruby_class_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_class, index]
+ruby_class_child(
+ int ruby_class: @ruby_class ref,
+ int index: int ref,
+ unique int child: @ruby_class_child_type ref
+);
+
+ruby_class_def(
+ unique int id: @ruby_class,
+ int name: @ruby_class_name_type ref,
+ int loc: @location ref
+);
+
+ruby_conditional_def(
+ unique int id: @ruby_conditional,
+ int alternative: @ruby_underscore_arg ref,
+ int condition: @ruby_underscore_arg ref,
+ int consequence: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+@ruby_delimited_symbol_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content
+
+#keyset[ruby_delimited_symbol, index]
+ruby_delimited_symbol_child(
+ int ruby_delimited_symbol: @ruby_delimited_symbol ref,
+ int index: int ref,
+ unique int child: @ruby_delimited_symbol_child_type ref
+);
+
+ruby_delimited_symbol_def(
+ unique int id: @ruby_delimited_symbol,
+ int loc: @location ref
+);
+
+@ruby_destructured_left_assignment_child_type = @ruby_destructured_left_assignment | @ruby_rest_assignment | @ruby_underscore_lhs
+
+#keyset[ruby_destructured_left_assignment, index]
+ruby_destructured_left_assignment_child(
+ int ruby_destructured_left_assignment: @ruby_destructured_left_assignment ref,
+ int index: int ref,
+ unique int child: @ruby_destructured_left_assignment_child_type ref
+);
+
+ruby_destructured_left_assignment_def(
+ unique int id: @ruby_destructured_left_assignment,
+ int loc: @location ref
+);
+
+@ruby_destructured_parameter_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier
+
+#keyset[ruby_destructured_parameter, index]
+ruby_destructured_parameter_child(
+ int ruby_destructured_parameter: @ruby_destructured_parameter ref,
+ int index: int ref,
+ unique int child: @ruby_destructured_parameter_child_type ref
+);
+
+ruby_destructured_parameter_def(
+ unique int id: @ruby_destructured_parameter,
+ int loc: @location ref
+);
+
+@ruby_do_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_do, index]
+ruby_do_child(
+ int ruby_do: @ruby_do ref,
+ int index: int ref,
+ unique int child: @ruby_do_child_type ref
+);
+
+ruby_do_def(
+ unique int id: @ruby_do,
+ int loc: @location ref
+);
+
+ruby_do_block_parameters(
+ unique int ruby_do_block: @ruby_do_block ref,
+ unique int parameters: @ruby_block_parameters ref
+);
+
+@ruby_do_block_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_do_block, index]
+ruby_do_block_child(
+ int ruby_do_block: @ruby_do_block ref,
+ int index: int ref,
+ unique int child: @ruby_do_block_child_type ref
+);
+
+ruby_do_block_def(
+ unique int id: @ruby_do_block,
+ int loc: @location ref
+);
+
+@ruby_element_reference_child_type = @ruby_block_argument | @ruby_break | @ruby_call | @ruby_hash_splat_argument | @ruby_next | @ruby_pair | @ruby_return | @ruby_splat_argument | @ruby_token_forward_argument | @ruby_underscore_arg | @ruby_yield
+
+#keyset[ruby_element_reference, index]
+ruby_element_reference_child(
+ int ruby_element_reference: @ruby_element_reference ref,
+ int index: int ref,
+ unique int child: @ruby_element_reference_child_type ref
+);
+
+ruby_element_reference_def(
+ unique int id: @ruby_element_reference,
+ int object: @ruby_underscore_primary ref,
+ int loc: @location ref
+);
+
+@ruby_else_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_else, index]
+ruby_else_child(
+ int ruby_else: @ruby_else ref,
+ int index: int ref,
+ unique int child: @ruby_else_child_type ref
+);
+
+ruby_else_def(
+ unique int id: @ruby_else,
+ int loc: @location ref
+);
+
+@ruby_elsif_alternative_type = @ruby_else | @ruby_elsif
+
+ruby_elsif_alternative(
+ unique int ruby_elsif: @ruby_elsif ref,
+ unique int alternative: @ruby_elsif_alternative_type ref
+);
+
+ruby_elsif_consequence(
+ unique int ruby_elsif: @ruby_elsif ref,
+ unique int consequence: @ruby_then ref
+);
+
+ruby_elsif_def(
+ unique int id: @ruby_elsif,
+ int condition: @ruby_underscore_statement ref,
+ int loc: @location ref
+);
+
+@ruby_end_block_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_end_block, index]
+ruby_end_block_child(
+ int ruby_end_block: @ruby_end_block ref,
+ int index: int ref,
+ unique int child: @ruby_end_block_child_type ref
+);
+
+ruby_end_block_def(
+ unique int id: @ruby_end_block,
+ int loc: @location ref
+);
+
+@ruby_ensure_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_ensure, index]
+ruby_ensure_child(
+ int ruby_ensure: @ruby_ensure ref,
+ int index: int ref,
+ unique int child: @ruby_ensure_child_type ref
+);
+
+ruby_ensure_def(
+ unique int id: @ruby_ensure,
+ int loc: @location ref
+);
+
+ruby_exception_variable_def(
+ unique int id: @ruby_exception_variable,
+ int child: @ruby_underscore_lhs ref,
+ int loc: @location ref
+);
+
+@ruby_exceptions_child_type = @ruby_splat_argument | @ruby_underscore_arg
+
+#keyset[ruby_exceptions, index]
+ruby_exceptions_child(
+ int ruby_exceptions: @ruby_exceptions ref,
+ int index: int ref,
+ unique int child: @ruby_exceptions_child_type ref
+);
+
+ruby_exceptions_def(
+ unique int id: @ruby_exceptions,
+ int loc: @location ref
+);
+
+@ruby_for_pattern_type = @ruby_left_assignment_list | @ruby_underscore_lhs
+
+ruby_for_def(
+ unique int id: @ruby_for,
+ int body: @ruby_do ref,
+ int pattern: @ruby_for_pattern_type ref,
+ int value: @ruby_in ref,
+ int loc: @location ref
+);
+
+@ruby_hash_child_type = @ruby_hash_splat_argument | @ruby_pair
+
+#keyset[ruby_hash, index]
+ruby_hash_child(
+ int ruby_hash: @ruby_hash ref,
+ int index: int ref,
+ unique int child: @ruby_hash_child_type ref
+);
+
+ruby_hash_def(
+ unique int id: @ruby_hash,
+ int loc: @location ref
+);
+
+ruby_hash_splat_argument_def(
+ unique int id: @ruby_hash_splat_argument,
+ int child: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+ruby_hash_splat_parameter_name(
+ unique int ruby_hash_splat_parameter: @ruby_hash_splat_parameter ref,
+ unique int name: @ruby_token_identifier ref
+);
+
+ruby_hash_splat_parameter_def(
+ unique int id: @ruby_hash_splat_parameter,
+ int loc: @location ref
+);
+
+@ruby_heredoc_body_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_heredoc_content | @ruby_token_heredoc_end
+
+#keyset[ruby_heredoc_body, index]
+ruby_heredoc_body_child(
+ int ruby_heredoc_body: @ruby_heredoc_body ref,
+ int index: int ref,
+ unique int child: @ruby_heredoc_body_child_type ref
+);
+
+ruby_heredoc_body_def(
+ unique int id: @ruby_heredoc_body,
+ int loc: @location ref
+);
+
+@ruby_if_alternative_type = @ruby_else | @ruby_elsif
+
+ruby_if_alternative(
+ unique int ruby_if: @ruby_if ref,
+ unique int alternative: @ruby_if_alternative_type ref
+);
+
+ruby_if_consequence(
+ unique int ruby_if: @ruby_if ref,
+ unique int consequence: @ruby_then ref
+);
+
+ruby_if_def(
+ unique int id: @ruby_if,
+ int condition: @ruby_underscore_statement ref,
+ int loc: @location ref
+);
+
+@ruby_if_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_if_modifier_def(
+ unique int id: @ruby_if_modifier,
+ int body: @ruby_underscore_statement ref,
+ int condition: @ruby_if_modifier_condition_type ref,
+ int loc: @location ref
+);
+
+ruby_in_def(
+ unique int id: @ruby_in,
+ int child: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+@ruby_interpolation_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_interpolation, index]
+ruby_interpolation_child(
+ int ruby_interpolation: @ruby_interpolation ref,
+ int index: int ref,
+ unique int child: @ruby_interpolation_child_type ref
+);
+
+ruby_interpolation_def(
+ unique int id: @ruby_interpolation,
+ int loc: @location ref
+);
+
+ruby_keyword_parameter_value(
+ unique int ruby_keyword_parameter: @ruby_keyword_parameter ref,
+ unique int value: @ruby_underscore_arg ref
+);
+
+ruby_keyword_parameter_def(
+ unique int id: @ruby_keyword_parameter,
+ int name: @ruby_token_identifier ref,
+ int loc: @location ref
+);
+
+@ruby_lambda_body_type = @ruby_block | @ruby_do_block
+
+ruby_lambda_parameters(
+ unique int ruby_lambda: @ruby_lambda ref,
+ unique int parameters: @ruby_lambda_parameters ref
+);
+
+ruby_lambda_def(
+ unique int id: @ruby_lambda,
+ int body: @ruby_lambda_body_type ref,
+ int loc: @location ref
+);
+
+@ruby_lambda_parameters_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier
+
+#keyset[ruby_lambda_parameters, index]
+ruby_lambda_parameters_child(
+ int ruby_lambda_parameters: @ruby_lambda_parameters ref,
+ int index: int ref,
+ unique int child: @ruby_lambda_parameters_child_type ref
+);
+
+ruby_lambda_parameters_def(
+ unique int id: @ruby_lambda_parameters,
+ int loc: @location ref
+);
+
+@ruby_left_assignment_list_child_type = @ruby_destructured_left_assignment | @ruby_rest_assignment | @ruby_underscore_lhs
+
+#keyset[ruby_left_assignment_list, index]
+ruby_left_assignment_list_child(
+ int ruby_left_assignment_list: @ruby_left_assignment_list ref,
+ int index: int ref,
+ unique int child: @ruby_left_assignment_list_child_type ref
+);
+
+ruby_left_assignment_list_def(
+ unique int id: @ruby_left_assignment_list,
+ int loc: @location ref
+);
+
+ruby_method_parameters(
+ unique int ruby_method: @ruby_method ref,
+ unique int parameters: @ruby_method_parameters ref
+);
+
+@ruby_method_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_method, index]
+ruby_method_child(
+ int ruby_method: @ruby_method ref,
+ int index: int ref,
+ unique int child: @ruby_method_child_type ref
+);
+
+ruby_method_def(
+ unique int id: @ruby_method,
+ int name: @ruby_underscore_method_name ref,
+ int loc: @location ref
+);
+
+@ruby_method_parameters_child_type = @ruby_block_parameter | @ruby_destructured_parameter | @ruby_hash_splat_parameter | @ruby_keyword_parameter | @ruby_optional_parameter | @ruby_splat_parameter | @ruby_token_forward_parameter | @ruby_token_identifier
+
+#keyset[ruby_method_parameters, index]
+ruby_method_parameters_child(
+ int ruby_method_parameters: @ruby_method_parameters ref,
+ int index: int ref,
+ unique int child: @ruby_method_parameters_child_type ref
+);
+
+ruby_method_parameters_def(
+ unique int id: @ruby_method_parameters,
+ int loc: @location ref
+);
+
+@ruby_module_name_type = @ruby_scope_resolution | @ruby_token_constant
+
+@ruby_module_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_module, index]
+ruby_module_child(
+ int ruby_module: @ruby_module ref,
+ int index: int ref,
+ unique int child: @ruby_module_child_type ref
+);
+
+ruby_module_def(
+ unique int id: @ruby_module,
+ int name: @ruby_module_name_type ref,
+ int loc: @location ref
+);
+
+ruby_next_child(
+ unique int ruby_next: @ruby_next ref,
+ unique int child: @ruby_argument_list ref
+);
+
+ruby_next_def(
+ unique int id: @ruby_next,
+ int loc: @location ref
+);
+
+case @ruby_operator_assignment.operator of
+ 0 = @ruby_operator_assignment_percentequal
+| 1 = @ruby_operator_assignment_ampersandampersandequal
+| 2 = @ruby_operator_assignment_ampersandequal
+| 3 = @ruby_operator_assignment_starstarequal
+| 4 = @ruby_operator_assignment_starequal
+| 5 = @ruby_operator_assignment_plusequal
+| 6 = @ruby_operator_assignment_minusequal
+| 7 = @ruby_operator_assignment_slashequal
+| 8 = @ruby_operator_assignment_langlelangleequal
+| 9 = @ruby_operator_assignment_ranglerangleequal
+| 10 = @ruby_operator_assignment_caretequal
+| 11 = @ruby_operator_assignment_pipeequal
+| 12 = @ruby_operator_assignment_pipepipeequal
+;
+
+
+@ruby_operator_assignment_right_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_operator_assignment_def(
+ unique int id: @ruby_operator_assignment,
+ int left: @ruby_underscore_lhs ref,
+ int operator: int ref,
+ int right: @ruby_operator_assignment_right_type ref,
+ int loc: @location ref
+);
+
+ruby_optional_parameter_def(
+ unique int id: @ruby_optional_parameter,
+ int name: @ruby_token_identifier ref,
+ int value: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+@ruby_pair_key_type = @ruby_string__ | @ruby_token_hash_key_symbol | @ruby_underscore_arg
+
+ruby_pair_def(
+ unique int id: @ruby_pair,
+ int key__: @ruby_pair_key_type ref,
+ int value: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+@ruby_parenthesized_statements_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_parenthesized_statements, index]
+ruby_parenthesized_statements_child(
+ int ruby_parenthesized_statements: @ruby_parenthesized_statements ref,
+ int index: int ref,
+ unique int child: @ruby_parenthesized_statements_child_type ref
+);
+
+ruby_parenthesized_statements_def(
+ unique int id: @ruby_parenthesized_statements,
+ int loc: @location ref
+);
+
+@ruby_pattern_child_type = @ruby_splat_argument | @ruby_underscore_arg
+
+ruby_pattern_def(
+ unique int id: @ruby_pattern,
+ int child: @ruby_pattern_child_type ref,
+ int loc: @location ref
+);
+
+@ruby_program_child_type = @ruby_token_empty_statement | @ruby_token_uninterpreted | @ruby_underscore_statement
+
+#keyset[ruby_program, index]
+ruby_program_child(
+ int ruby_program: @ruby_program ref,
+ int index: int ref,
+ unique int child: @ruby_program_child_type ref
+);
+
+ruby_program_def(
+ unique int id: @ruby_program,
+ int loc: @location ref
+);
+
+ruby_range_begin(
+ unique int ruby_range: @ruby_range ref,
+ unique int begin: @ruby_underscore_arg ref
+);
+
+ruby_range_end(
+ unique int ruby_range: @ruby_range ref,
+ unique int end: @ruby_underscore_arg ref
+);
+
+case @ruby_range.operator of
+ 0 = @ruby_range_dotdot
+| 1 = @ruby_range_dotdotdot
+;
+
+
+ruby_range_def(
+ unique int id: @ruby_range,
+ int operator: int ref,
+ int loc: @location ref
+);
+
+@ruby_rational_child_type = @ruby_token_float | @ruby_token_integer
+
+ruby_rational_def(
+ unique int id: @ruby_rational,
+ int child: @ruby_rational_child_type ref,
+ int loc: @location ref
+);
+
+ruby_redo_child(
+ unique int ruby_redo: @ruby_redo ref,
+ unique int child: @ruby_argument_list ref
+);
+
+ruby_redo_def(
+ unique int id: @ruby_redo,
+ int loc: @location ref
+);
+
+@ruby_regex_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content
+
+#keyset[ruby_regex, index]
+ruby_regex_child(
+ int ruby_regex: @ruby_regex ref,
+ int index: int ref,
+ unique int child: @ruby_regex_child_type ref
+);
+
+ruby_regex_def(
+ unique int id: @ruby_regex,
+ int loc: @location ref
+);
+
+ruby_rescue_body(
+ unique int ruby_rescue: @ruby_rescue ref,
+ unique int body: @ruby_then ref
+);
+
+ruby_rescue_exceptions(
+ unique int ruby_rescue: @ruby_rescue ref,
+ unique int exceptions: @ruby_exceptions ref
+);
+
+ruby_rescue_variable(
+ unique int ruby_rescue: @ruby_rescue ref,
+ unique int variable: @ruby_exception_variable ref
+);
+
+ruby_rescue_def(
+ unique int id: @ruby_rescue,
+ int loc: @location ref
+);
+
+@ruby_rescue_modifier_handler_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_rescue_modifier_def(
+ unique int id: @ruby_rescue_modifier,
+ int body: @ruby_underscore_statement ref,
+ int handler: @ruby_rescue_modifier_handler_type ref,
+ int loc: @location ref
+);
+
+ruby_rest_assignment_child(
+ unique int ruby_rest_assignment: @ruby_rest_assignment ref,
+ unique int child: @ruby_underscore_lhs ref
+);
+
+ruby_rest_assignment_def(
+ unique int id: @ruby_rest_assignment,
+ int loc: @location ref
+);
+
+ruby_retry_child(
+ unique int ruby_retry: @ruby_retry ref,
+ unique int child: @ruby_argument_list ref
+);
+
+ruby_retry_def(
+ unique int id: @ruby_retry,
+ int loc: @location ref
+);
+
+ruby_return_child(
+ unique int ruby_return: @ruby_return ref,
+ unique int child: @ruby_argument_list ref
+);
+
+ruby_return_def(
+ unique int id: @ruby_return,
+ int loc: @location ref
+);
+
+@ruby_right_assignment_list_child_type = @ruby_splat_argument | @ruby_underscore_arg
+
+#keyset[ruby_right_assignment_list, index]
+ruby_right_assignment_list_child(
+ int ruby_right_assignment_list: @ruby_right_assignment_list ref,
+ int index: int ref,
+ unique int child: @ruby_right_assignment_list_child_type ref
+);
+
+ruby_right_assignment_list_def(
+ unique int id: @ruby_right_assignment_list,
+ int loc: @location ref
+);
+
+@ruby_scope_resolution_name_type = @ruby_token_constant | @ruby_token_identifier
+
+ruby_scope_resolution_scope(
+ unique int ruby_scope_resolution: @ruby_scope_resolution ref,
+ unique int scope: @ruby_underscore_primary ref
+);
+
+ruby_scope_resolution_def(
+ unique int id: @ruby_scope_resolution,
+ int name: @ruby_scope_resolution_name_type ref,
+ int loc: @location ref
+);
+
+ruby_setter_def(
+ unique int id: @ruby_setter,
+ int name: @ruby_token_identifier ref,
+ int loc: @location ref
+);
+
+@ruby_singleton_class_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_singleton_class, index]
+ruby_singleton_class_child(
+ int ruby_singleton_class: @ruby_singleton_class ref,
+ int index: int ref,
+ unique int child: @ruby_singleton_class_child_type ref
+);
+
+ruby_singleton_class_def(
+ unique int id: @ruby_singleton_class,
+ int value: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+@ruby_singleton_method_object_type = @ruby_underscore_arg | @ruby_underscore_variable
+
+ruby_singleton_method_parameters(
+ unique int ruby_singleton_method: @ruby_singleton_method ref,
+ unique int parameters: @ruby_method_parameters ref
+);
+
+@ruby_singleton_method_child_type = @ruby_else | @ruby_ensure | @ruby_rescue | @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_singleton_method, index]
+ruby_singleton_method_child(
+ int ruby_singleton_method: @ruby_singleton_method ref,
+ int index: int ref,
+ unique int child: @ruby_singleton_method_child_type ref
+);
+
+ruby_singleton_method_def(
+ unique int id: @ruby_singleton_method,
+ int name: @ruby_underscore_method_name ref,
+ int object: @ruby_singleton_method_object_type ref,
+ int loc: @location ref
+);
+
+ruby_splat_argument_def(
+ unique int id: @ruby_splat_argument,
+ int child: @ruby_underscore_arg ref,
+ int loc: @location ref
+);
+
+ruby_splat_parameter_name(
+ unique int ruby_splat_parameter: @ruby_splat_parameter ref,
+ unique int name: @ruby_token_identifier ref
+);
+
+ruby_splat_parameter_def(
+ unique int id: @ruby_splat_parameter,
+ int loc: @location ref
+);
+
+@ruby_string_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content
+
+#keyset[ruby_string__, index]
+ruby_string_child(
+ int ruby_string__: @ruby_string__ ref,
+ int index: int ref,
+ unique int child: @ruby_string_child_type ref
+);
+
+ruby_string_def(
+ unique int id: @ruby_string__,
+ int loc: @location ref
+);
+
+#keyset[ruby_string_array, index]
+ruby_string_array_child(
+ int ruby_string_array: @ruby_string_array ref,
+ int index: int ref,
+ unique int child: @ruby_bare_string ref
+);
+
+ruby_string_array_def(
+ unique int id: @ruby_string_array,
+ int loc: @location ref
+);
+
+@ruby_subshell_child_type = @ruby_interpolation | @ruby_token_escape_sequence | @ruby_token_string_content
+
+#keyset[ruby_subshell, index]
+ruby_subshell_child(
+ int ruby_subshell: @ruby_subshell ref,
+ int index: int ref,
+ unique int child: @ruby_subshell_child_type ref
+);
+
+ruby_subshell_def(
+ unique int id: @ruby_subshell,
+ int loc: @location ref
+);
+
+@ruby_superclass_child_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_superclass_def(
+ unique int id: @ruby_superclass,
+ int child: @ruby_superclass_child_type ref,
+ int loc: @location ref
+);
+
+#keyset[ruby_symbol_array, index]
+ruby_symbol_array_child(
+ int ruby_symbol_array: @ruby_symbol_array ref,
+ int index: int ref,
+ unique int child: @ruby_bare_symbol ref
+);
+
+ruby_symbol_array_def(
+ unique int id: @ruby_symbol_array,
+ int loc: @location ref
+);
+
+@ruby_then_child_type = @ruby_token_empty_statement | @ruby_underscore_statement
+
+#keyset[ruby_then, index]
+ruby_then_child(
+ int ruby_then: @ruby_then ref,
+ int index: int ref,
+ unique int child: @ruby_then_child_type ref
+);
+
+ruby_then_def(
+ unique int id: @ruby_then,
+ int loc: @location ref
+);
+
+@ruby_unary_operand_type = @ruby_break | @ruby_call | @ruby_next | @ruby_parenthesized_statements | @ruby_return | @ruby_token_float | @ruby_token_integer | @ruby_underscore_arg | @ruby_yield
+
+case @ruby_unary.operator of
+ 0 = @ruby_unary_bang
+| 1 = @ruby_unary_plus
+| 2 = @ruby_unary_minus
+| 3 = @ruby_unary_definedquestion
+| 4 = @ruby_unary_not
+| 5 = @ruby_unary_tilde
+;
+
+
+ruby_unary_def(
+ unique int id: @ruby_unary,
+ int operand: @ruby_unary_operand_type ref,
+ int operator: int ref,
+ int loc: @location ref
+);
+
+#keyset[ruby_undef, index]
+ruby_undef_child(
+ int ruby_undef: @ruby_undef ref,
+ int index: int ref,
+ unique int child: @ruby_underscore_method_name ref
+);
+
+ruby_undef_def(
+ unique int id: @ruby_undef,
+ int loc: @location ref
+);
+
+@ruby_unless_alternative_type = @ruby_else | @ruby_elsif
+
+ruby_unless_alternative(
+ unique int ruby_unless: @ruby_unless ref,
+ unique int alternative: @ruby_unless_alternative_type ref
+);
+
+ruby_unless_consequence(
+ unique int ruby_unless: @ruby_unless ref,
+ unique int consequence: @ruby_then ref
+);
+
+ruby_unless_def(
+ unique int id: @ruby_unless,
+ int condition: @ruby_underscore_statement ref,
+ int loc: @location ref
+);
+
+@ruby_unless_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_unless_modifier_def(
+ unique int id: @ruby_unless_modifier,
+ int body: @ruby_underscore_statement ref,
+ int condition: @ruby_unless_modifier_condition_type ref,
+ int loc: @location ref
+);
+
+ruby_until_def(
+ unique int id: @ruby_until,
+ int body: @ruby_do ref,
+ int condition: @ruby_underscore_statement ref,
+ int loc: @location ref
+);
+
+@ruby_until_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_until_modifier_def(
+ unique int id: @ruby_until_modifier,
+ int body: @ruby_underscore_statement ref,
+ int condition: @ruby_until_modifier_condition_type ref,
+ int loc: @location ref
+);
+
+ruby_when_body(
+ unique int ruby_when: @ruby_when ref,
+ unique int body: @ruby_then ref
+);
+
+#keyset[ruby_when, index]
+ruby_when_pattern(
+ int ruby_when: @ruby_when ref,
+ int index: int ref,
+ unique int pattern: @ruby_pattern ref
+);
+
+ruby_when_def(
+ unique int id: @ruby_when,
+ int loc: @location ref
+);
+
+ruby_while_def(
+ unique int id: @ruby_while,
+ int body: @ruby_do ref,
+ int condition: @ruby_underscore_statement ref,
+ int loc: @location ref
+);
+
+@ruby_while_modifier_condition_type = @ruby_break | @ruby_call | @ruby_next | @ruby_return | @ruby_underscore_arg | @ruby_yield
+
+ruby_while_modifier_def(
+ unique int id: @ruby_while_modifier,
+ int body: @ruby_underscore_statement ref,
+ int condition: @ruby_while_modifier_condition_type ref,
+ int loc: @location ref
+);
+
+ruby_yield_child(
+ unique int ruby_yield: @ruby_yield ref,
+ unique int child: @ruby_argument_list ref
+);
+
+ruby_yield_def(
+ unique int id: @ruby_yield,
+ int loc: @location ref
+);
+
+ruby_tokeninfo(
+ unique int id: @ruby_token,
+ int kind: int ref,
+ string value: string ref,
+ int loc: @location ref
+);
+
+case @ruby_token.kind of
+ 0 = @ruby_reserved_word
+| 1 = @ruby_token_character
+| 2 = @ruby_token_class_variable
+| 3 = @ruby_token_comment
+| 4 = @ruby_token_complex
+| 5 = @ruby_token_constant
+| 6 = @ruby_token_empty_statement
+| 7 = @ruby_token_escape_sequence
+| 8 = @ruby_token_false
+| 9 = @ruby_token_float
+| 10 = @ruby_token_forward_argument
+| 11 = @ruby_token_forward_parameter
+| 12 = @ruby_token_global_variable
+| 13 = @ruby_token_hash_key_symbol
+| 14 = @ruby_token_heredoc_beginning
+| 15 = @ruby_token_heredoc_content
+| 16 = @ruby_token_heredoc_end
+| 17 = @ruby_token_identifier
+| 18 = @ruby_token_instance_variable
+| 19 = @ruby_token_integer
+| 20 = @ruby_token_nil
+| 21 = @ruby_token_operator
+| 22 = @ruby_token_self
+| 23 = @ruby_token_simple_symbol
+| 24 = @ruby_token_string_content
+| 25 = @ruby_token_super
+| 26 = @ruby_token_true
+| 27 = @ruby_token_uninterpreted
+;
+
+
+@ruby_ast_node = @ruby_alias | @ruby_argument_list | @ruby_array | @ruby_assignment | @ruby_bare_string | @ruby_bare_symbol | @ruby_begin | @ruby_begin_block | @ruby_binary | @ruby_block | @ruby_block_argument | @ruby_block_parameter | @ruby_block_parameters | @ruby_break | @ruby_call | @ruby_case__ | @ruby_chained_string | @ruby_class | @ruby_conditional | @ruby_delimited_symbol | @ruby_destructured_left_assignment | @ruby_destructured_parameter | @ruby_do | @ruby_do_block | @ruby_element_reference | @ruby_else | @ruby_elsif | @ruby_end_block | @ruby_ensure | @ruby_exception_variable | @ruby_exceptions | @ruby_for | @ruby_hash | @ruby_hash_splat_argument | @ruby_hash_splat_parameter | @ruby_heredoc_body | @ruby_if | @ruby_if_modifier | @ruby_in | @ruby_interpolation | @ruby_keyword_parameter | @ruby_lambda | @ruby_lambda_parameters | @ruby_left_assignment_list | @ruby_method | @ruby_method_parameters | @ruby_module | @ruby_next | @ruby_operator_assignment | @ruby_optional_parameter | @ruby_pair | @ruby_parenthesized_statements | @ruby_pattern | @ruby_program | @ruby_range | @ruby_rational | @ruby_redo | @ruby_regex | @ruby_rescue | @ruby_rescue_modifier | @ruby_rest_assignment | @ruby_retry | @ruby_return | @ruby_right_assignment_list | @ruby_scope_resolution | @ruby_setter | @ruby_singleton_class | @ruby_singleton_method | @ruby_splat_argument | @ruby_splat_parameter | @ruby_string__ | @ruby_string_array | @ruby_subshell | @ruby_superclass | @ruby_symbol_array | @ruby_then | @ruby_token | @ruby_unary | @ruby_undef | @ruby_unless | @ruby_unless_modifier | @ruby_until | @ruby_until_modifier | @ruby_when | @ruby_while | @ruby_while_modifier | @ruby_yield
+
+@ruby_ast_node_parent = @file | @ruby_ast_node
+
+#keyset[parent, parent_index]
+ruby_ast_node_parent(
+ int child: @ruby_ast_node ref,
+ int parent: @ruby_ast_node_parent ref,
+ int parent_index: int ref
+);
+
+erb_comment_directive_def(
+ unique int id: @erb_comment_directive,
+ int child: @erb_token_comment ref,
+ int loc: @location ref
+);
+
+erb_directive_def(
+ unique int id: @erb_directive,
+ int child: @erb_token_code ref,
+ int loc: @location ref
+);
+
+erb_graphql_directive_def(
+ unique int id: @erb_graphql_directive,
+ int child: @erb_token_code ref,
+ int loc: @location ref
+);
+
+erb_output_directive_def(
+ unique int id: @erb_output_directive,
+ int child: @erb_token_code ref,
+ int loc: @location ref
+);
+
+@erb_template_child_type = @erb_comment_directive | @erb_directive | @erb_graphql_directive | @erb_output_directive | @erb_token_content
+
+#keyset[erb_template, index]
+erb_template_child(
+ int erb_template: @erb_template ref,
+ int index: int ref,
+ unique int child: @erb_template_child_type ref
+);
+
+erb_template_def(
+ unique int id: @erb_template,
+ int loc: @location ref
+);
+
+erb_tokeninfo(
+ unique int id: @erb_token,
+ int kind: int ref,
+ string value: string ref,
+ int loc: @location ref
+);
+
+case @erb_token.kind of
+ 0 = @erb_reserved_word
+| 1 = @erb_token_code
+| 2 = @erb_token_comment
+| 3 = @erb_token_content
+;
+
+
+@erb_ast_node = @erb_comment_directive | @erb_directive | @erb_graphql_directive | @erb_output_directive | @erb_template | @erb_token
+
+@erb_ast_node_parent = @erb_ast_node | @file
+
+#keyset[parent, parent_index]
+erb_ast_node_parent(
+ int child: @erb_ast_node ref,
+ int parent: @erb_ast_node_parent ref,
+ int parent_index: int ref
+);
+
diff --git a/repo-tests/codeql-ruby/ql/lib/ruby.qll b/repo-tests/codeql-ruby/ql/lib/ruby.qll
new file mode 100644
index 00000000000..18468c9f8cf
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/ruby.qll
@@ -0,0 +1 @@
+import codeql.ruby.AST
diff --git a/repo-tests/codeql-ruby/ql/lib/tutorial.qll b/repo-tests/codeql-ruby/ql/lib/tutorial.qll
new file mode 100644
index 00000000000..8cb1797a532
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/lib/tutorial.qll
@@ -0,0 +1,1207 @@
+/**
+ * This library is used in the QL detective tutorials.
+ *
+ * Note: Data is usually stored in a separate database and the QL libraries only contain predicates,
+ * but for this tutorial both the data and the predicates are stored in the library.
+ */
+class Person extends string {
+ Person() {
+ this = "Ronil" or
+ this = "Dina" or
+ this = "Ravi" or
+ this = "Bruce" or
+ this = "Jo" or
+ this = "Aida" or
+ this = "Esme" or
+ this = "Charlie" or
+ this = "Fred" or
+ this = "Meera" or
+ this = "Maya" or
+ this = "Chad" or
+ this = "Tiana" or
+ this = "Laura" or
+ this = "George" or
+ this = "Will" or
+ this = "Mary" or
+ this = "Almira" or
+ this = "Susannah" or
+ this = "Rhoda" or
+ this = "Cynthia" or
+ this = "Eunice" or
+ this = "Olive" or
+ this = "Virginia" or
+ this = "Angeline" or
+ this = "Helen" or
+ this = "Cornelia" or
+ this = "Harriet" or
+ this = "Mahala" or
+ this = "Abby" or
+ this = "Margaret" or
+ this = "Deb" or
+ this = "Minerva" or
+ this = "Severus" or
+ this = "Lavina" or
+ this = "Adeline" or
+ this = "Cath" or
+ this = "Elisa" or
+ this = "Lucretia" or
+ this = "Anne" or
+ this = "Eleanor" or
+ this = "Joanna" or
+ this = "Adam" or
+ this = "Agnes" or
+ this = "Rosanna" or
+ this = "Clara" or
+ this = "Melissa" or
+ this = "Amy" or
+ this = "Isabel" or
+ this = "Jemima" or
+ this = "Cordelia" or
+ this = "Melinda" or
+ this = "Delila" or
+ this = "Jeremiah" or
+ this = "Elijah" or
+ this = "Hester" or
+ this = "Walter" or
+ this = "Oliver" or
+ this = "Hugh" or
+ this = "Aaron" or
+ this = "Reuben" or
+ this = "Eli" or
+ this = "Amos" or
+ this = "Augustus" or
+ this = "Theodore" or
+ this = "Ira" or
+ this = "Timothy" or
+ this = "Cyrus" or
+ this = "Horace" or
+ this = "Simon" or
+ this = "Asa" or
+ this = "Frank" or
+ this = "Nelson" or
+ this = "Leonard" or
+ this = "Harrison" or
+ this = "Anthony" or
+ this = "Louis" or
+ this = "Milton" or
+ this = "Noah" or
+ this = "Cornelius" or
+ this = "Abdul" or
+ this = "Warren" or
+ this = "Harvey" or
+ this = "Dennis" or
+ this = "Wesley" or
+ this = "Sylvester" or
+ this = "Gilbert" or
+ this = "Sullivan" or
+ this = "Edmund" or
+ this = "Wilson" or
+ this = "Perry" or
+ this = "Matthew" or
+ this = "Simba" or
+ this = "Nala" or
+ this = "Rafiki" or
+ this = "Shenzi" or
+ this = "Ernest" or
+ this = "Gertrude" or
+ this = "Oscar" or
+ this = "Lilian" or
+ this = "Raymond" or
+ this = "Elgar" or
+ this = "Elmer" or
+ this = "Herbert" or
+ this = "Maude" or
+ this = "Mae" or
+ this = "Otto" or
+ this = "Edwin" or
+ this = "Ophelia" or
+ this = "Parsley" or
+ this = "Sage" or
+ this = "Rosemary" or
+ this = "Thyme" or
+ this = "Garfunkel" or
+ this = "King Basil" or
+ this = "Stephen"
+ }
+
+ /** Gets the hair color of the person. If the person is bald, there is no result. */
+ string getHairColor() {
+ this = "Ronil" and result = "black"
+ or
+ this = "Dina" and result = "black"
+ or
+ this = "Ravi" and result = "black"
+ or
+ this = "Bruce" and result = "brown"
+ or
+ this = "Jo" and result = "red"
+ or
+ this = "Aida" and result = "blond"
+ or
+ this = "Esme" and result = "blond"
+ or
+ this = "Fred" and result = "gray"
+ or
+ this = "Meera" and result = "brown"
+ or
+ this = "Maya" and result = "brown"
+ or
+ this = "Chad" and result = "brown"
+ or
+ this = "Tiana" and result = "black"
+ or
+ this = "Laura" and result = "blond"
+ or
+ this = "George" and result = "blond"
+ or
+ this = "Will" and result = "blond"
+ or
+ this = "Mary" and result = "blond"
+ or
+ this = "Almira" and result = "black"
+ or
+ this = "Susannah" and result = "blond"
+ or
+ this = "Rhoda" and result = "blond"
+ or
+ this = "Cynthia" and result = "gray"
+ or
+ this = "Eunice" and result = "white"
+ or
+ this = "Olive" and result = "brown"
+ or
+ this = "Virginia" and result = "brown"
+ or
+ this = "Angeline" and result = "red"
+ or
+ this = "Helen" and result = "white"
+ or
+ this = "Cornelia" and result = "gray"
+ or
+ this = "Harriet" and result = "white"
+ or
+ this = "Mahala" and result = "black"
+ or
+ this = "Abby" and result = "red"
+ or
+ this = "Margaret" and result = "brown"
+ or
+ this = "Deb" and result = "brown"
+ or
+ this = "Minerva" and result = "brown"
+ or
+ this = "Severus" and result = "black"
+ or
+ this = "Lavina" and result = "brown"
+ or
+ this = "Adeline" and result = "brown"
+ or
+ this = "Cath" and result = "brown"
+ or
+ this = "Elisa" and result = "brown"
+ or
+ this = "Lucretia" and result = "gray"
+ or
+ this = "Anne" and result = "black"
+ or
+ this = "Eleanor" and result = "brown"
+ or
+ this = "Joanna" and result = "brown"
+ or
+ this = "Adam" and result = "black"
+ or
+ this = "Agnes" and result = "black"
+ or
+ this = "Rosanna" and result = "gray"
+ or
+ this = "Clara" and result = "blond"
+ or
+ this = "Melissa" and result = "brown"
+ or
+ this = "Amy" and result = "brown"
+ or
+ this = "Isabel" and result = "black"
+ or
+ this = "Jemima" and result = "red"
+ or
+ this = "Cordelia" and result = "red"
+ or
+ this = "Melinda" and result = "gray"
+ or
+ this = "Delila" and result = "white"
+ or
+ this = "Jeremiah" and result = "gray"
+ or
+ this = "Hester" and result = "black"
+ or
+ this = "Walter" and result = "black"
+ or
+ this = "Aaron" and result = "gray"
+ or
+ this = "Reuben" and result = "gray"
+ or
+ this = "Eli" and result = "gray"
+ or
+ this = "Amos" and result = "white"
+ or
+ this = "Augustus" and result = "white"
+ or
+ this = "Theodore" and result = "white"
+ or
+ this = "Timothy" and result = "brown"
+ or
+ this = "Cyrus" and result = "brown"
+ or
+ this = "Horace" and result = "brown"
+ or
+ this = "Simon" and result = "brown"
+ or
+ this = "Asa" and result = "brown"
+ or
+ this = "Frank" and result = "brown"
+ or
+ this = "Nelson" and result = "black"
+ or
+ this = "Leonard" and result = "black"
+ or
+ this = "Harrison" and result = "black"
+ or
+ this = "Anthony" and result = "black"
+ or
+ this = "Louis" and result = "black"
+ or
+ this = "Milton" and result = "blond"
+ or
+ this = "Noah" and result = "blond"
+ or
+ this = "Cornelius" and result = "red"
+ or
+ this = "Abdul" and result = "brown"
+ or
+ this = "Warren" and result = "red"
+ or
+ this = "Harvey" and result = "blond"
+ or
+ this = "Dennis" and result = "blond"
+ or
+ this = "Wesley" and result = "brown"
+ or
+ this = "Sylvester" and result = "brown"
+ or
+ this = "Gilbert" and result = "brown"
+ or
+ this = "Sullivan" and result = "brown"
+ or
+ this = "Edmund" and result = "brown"
+ or
+ this = "Wilson" and result = "blond"
+ or
+ this = "Perry" and result = "black"
+ or
+ this = "Simba" and result = "brown"
+ or
+ this = "Nala" and result = "brown"
+ or
+ this = "Rafiki" and result = "red"
+ or
+ this = "Shenzi" and result = "gray"
+ or
+ this = "Ernest" and result = "blond"
+ or
+ this = "Gertrude" and result = "brown"
+ or
+ this = "Oscar" and result = "blond"
+ or
+ this = "Lilian" and result = "brown"
+ or
+ this = "Raymond" and result = "brown"
+ or
+ this = "Elgar" and result = "brown"
+ or
+ this = "Elmer" and result = "brown"
+ or
+ this = "Herbert" and result = "brown"
+ or
+ this = "Maude" and result = "brown"
+ or
+ this = "Mae" and result = "brown"
+ or
+ this = "Otto" and result = "black"
+ or
+ this = "Edwin" and result = "black"
+ or
+ this = "Ophelia" and result = "brown"
+ or
+ this = "Parsley" and result = "brown"
+ or
+ this = "Sage" and result = "brown"
+ or
+ this = "Rosemary" and result = "brown"
+ or
+ this = "Thyme" and result = "brown"
+ or
+ this = "Garfunkel" and result = "brown"
+ or
+ this = "King Basil" and result = "brown"
+ or
+ this = "Stephen" and result = "black"
+ or
+ this = "Stephen" and result = "gray"
+ }
+
+ /** Gets the age of the person (in years). If the person is deceased, there is no result. */
+ int getAge() {
+ this = "Ronil" and result = 21
+ or
+ this = "Dina" and result = 53
+ or
+ this = "Ravi" and result = 16
+ or
+ this = "Bruce" and result = 35
+ or
+ this = "Jo" and result = 47
+ or
+ this = "Aida" and result = 26
+ or
+ this = "Esme" and result = 25
+ or
+ this = "Charlie" and result = 31
+ or
+ this = "Fred" and result = 68
+ or
+ this = "Meera" and result = 62
+ or
+ this = "Maya" and result = 29
+ or
+ this = "Chad" and result = 49
+ or
+ this = "Tiana" and result = 18
+ or
+ this = "Laura" and result = 2
+ or
+ this = "George" and result = 3
+ or
+ this = "Will" and result = 41
+ or
+ this = "Mary" and result = 51
+ or
+ this = "Almira" and result = 1
+ or
+ this = "Susannah" and result = 97
+ or
+ this = "Rhoda" and result = 39
+ or
+ this = "Cynthia" and result = 89
+ or
+ this = "Eunice" and result = 83
+ or
+ this = "Olive" and result = 25
+ or
+ this = "Virginia" and result = 52
+ or
+ this = "Angeline" and result = 22
+ or
+ this = "Helen" and result = 79
+ or
+ this = "Cornelia" and result = 59
+ or
+ this = "Harriet" and result = 57
+ or
+ this = "Mahala" and result = 61
+ or
+ this = "Abby" and result = 24
+ or
+ this = "Margaret" and result = 59
+ or
+ this = "Deb" and result = 31
+ or
+ this = "Minerva" and result = 72
+ or
+ this = "Severus" and result = 61
+ or
+ this = "Lavina" and result = 33
+ or
+ this = "Adeline" and result = 17
+ or
+ this = "Cath" and result = 22
+ or
+ this = "Elisa" and result = 9
+ or
+ this = "Lucretia" and result = 56
+ or
+ this = "Anne" and result = 11
+ or
+ this = "Eleanor" and result = 80
+ or
+ this = "Joanna" and result = 43
+ or
+ this = "Adam" and result = 37
+ or
+ this = "Agnes" and result = 47
+ or
+ this = "Rosanna" and result = 61
+ or
+ this = "Clara" and result = 31
+ or
+ this = "Melissa" and result = 37
+ or
+ this = "Amy" and result = 12
+ or
+ this = "Isabel" and result = 6
+ or
+ this = "Jemima" and result = 16
+ or
+ this = "Cordelia" and result = 21
+ or
+ this = "Melinda" and result = 55
+ or
+ this = "Delila" and result = 66
+ or
+ this = "Jeremiah" and result = 54
+ or
+ this = "Elijah" and result = 42
+ or
+ this = "Hester" and result = 68
+ or
+ this = "Walter" and result = 66
+ or
+ this = "Oliver" and result = 33
+ or
+ this = "Hugh" and result = 51
+ or
+ this = "Aaron" and result = 49
+ or
+ this = "Reuben" and result = 58
+ or
+ this = "Eli" and result = 70
+ or
+ this = "Amos" and result = 65
+ or
+ this = "Augustus" and result = 56
+ or
+ this = "Theodore" and result = 69
+ or
+ this = "Ira" and result = 1
+ or
+ this = "Timothy" and result = 54
+ or
+ this = "Cyrus" and result = 78
+ or
+ this = "Horace" and result = 34
+ or
+ this = "Simon" and result = 23
+ or
+ this = "Asa" and result = 28
+ or
+ this = "Frank" and result = 59
+ or
+ this = "Nelson" and result = 38
+ or
+ this = "Leonard" and result = 58
+ or
+ this = "Harrison" and result = 7
+ or
+ this = "Anthony" and result = 2
+ or
+ this = "Louis" and result = 34
+ or
+ this = "Milton" and result = 36
+ or
+ this = "Noah" and result = 48
+ or
+ this = "Cornelius" and result = 41
+ or
+ this = "Abdul" and result = 67
+ or
+ this = "Warren" and result = 47
+ or
+ this = "Harvey" and result = 31
+ or
+ this = "Dennis" and result = 39
+ or
+ this = "Wesley" and result = 13
+ or
+ this = "Sylvester" and result = 19
+ or
+ this = "Gilbert" and result = 16
+ or
+ this = "Sullivan" and result = 17
+ or
+ this = "Edmund" and result = 29
+ or
+ this = "Wilson" and result = 27
+ or
+ this = "Perry" and result = 31
+ or
+ this = "Matthew" and result = 55
+ or
+ this = "Simba" and result = 8
+ or
+ this = "Nala" and result = 7
+ or
+ this = "Rafiki" and result = 76
+ or
+ this = "Shenzi" and result = 67
+ }
+
+ /** Gets the height of the person (in cm). If the person is deceased, there is no result. */
+ float getHeight() {
+ this = "Ronil" and result = 183.0
+ or
+ this = "Dina" and result = 155.1
+ or
+ this = "Ravi" and result = 175.2
+ or
+ this = "Bruce" and result = 191.3
+ or
+ this = "Jo" and result = 163.4
+ or
+ this = "Aida" and result = 182.6
+ or
+ this = "Esme" and result = 176.9
+ or
+ this = "Charlie" and result = 189.7
+ or
+ this = "Fred" and result = 179.4
+ or
+ this = "Meera" and result = 160.1
+ or
+ this = "Maya" and result = 153.0
+ or
+ this = "Chad" and result = 168.5
+ or
+ this = "Tiana" and result = 149.7
+ or
+ this = "Laura" and result = 87.5
+ or
+ this = "George" and result = 96.4
+ or
+ this = "Will" and result = 167.1
+ or
+ this = "Mary" and result = 159.8
+ or
+ this = "Almira" and result = 62.1
+ or
+ this = "Susannah" and result = 145.8
+ or
+ this = "Rhoda" and result = 180.1
+ or
+ this = "Cynthia" and result = 161.8
+ or
+ this = "Eunice" and result = 153.2
+ or
+ this = "Olive" and result = 179.9
+ or
+ this = "Virginia" and result = 165.1
+ or
+ this = "Angeline" and result = 172.3
+ or
+ this = "Helen" and result = 163.1
+ or
+ this = "Cornelia" and result = 160.8
+ or
+ this = "Harriet" and result = 163.2
+ or
+ this = "Mahala" and result = 157.7
+ or
+ this = "Abby" and result = 174.5
+ or
+ this = "Margaret" and result = 165.6
+ or
+ this = "Deb" and result = 171.6
+ or
+ this = "Minerva" and result = 168.7
+ or
+ this = "Severus" and result = 188.8
+ or
+ this = "Lavina" and result = 155.1
+ or
+ this = "Adeline" and result = 165.5
+ or
+ this = "Cath" and result = 147.8
+ or
+ this = "Elisa" and result = 129.4
+ or
+ this = "Lucretia" and result = 153.6
+ or
+ this = "Anne" and result = 140.4
+ or
+ this = "Eleanor" and result = 151.1
+ or
+ this = "Joanna" and result = 167.2
+ or
+ this = "Adam" and result = 155.5
+ or
+ this = "Agnes" and result = 156.8
+ or
+ this = "Rosanna" and result = 162.4
+ or
+ this = "Clara" and result = 158.6
+ or
+ this = "Melissa" and result = 182.3
+ or
+ this = "Amy" and result = 147.1
+ or
+ this = "Isabel" and result = 121.4
+ or
+ this = "Jemima" and result = 149.8
+ or
+ this = "Cordelia" and result = 151.7
+ or
+ this = "Melinda" and result = 154.4
+ or
+ this = "Delila" and result = 163.4
+ or
+ this = "Jeremiah" and result = 167.5
+ or
+ this = "Elijah" and result = 184.5
+ or
+ this = "Hester" and result = 152.7
+ or
+ this = "Walter" and result = 159.6
+ or
+ this = "Oliver" and result = 192.4
+ or
+ this = "Hugh" and result = 173.1
+ or
+ this = "Aaron" and result = 176.6
+ or
+ this = "Reuben" and result = 169.9
+ or
+ this = "Eli" and result = 180.4
+ or
+ this = "Amos" and result = 167.4
+ or
+ this = "Augustus" and result = 156.5
+ or
+ this = "Theodore" and result = 176.6
+ or
+ this = "Ira" and result = 54.1
+ or
+ this = "Timothy" and result = 172.2
+ or
+ this = "Cyrus" and result = 157.9
+ or
+ this = "Horace" and result = 169.3
+ or
+ this = "Simon" and result = 157.1
+ or
+ this = "Asa" and result = 149.4
+ or
+ this = "Frank" and result = 167.2
+ or
+ this = "Nelson" and result = 173.0
+ or
+ this = "Leonard" and result = 172.0
+ or
+ this = "Harrison" and result = 126.0
+ or
+ this = "Anthony" and result = 98.4
+ or
+ this = "Louis" and result = 186.8
+ or
+ this = "Milton" and result = 157.8
+ or
+ this = "Noah" and result = 190.5
+ or
+ this = "Cornelius" and result = 183.1
+ or
+ this = "Abdul" and result = 182.0
+ or
+ this = "Warren" and result = 175.0
+ or
+ this = "Harvey" and result = 169.3
+ or
+ this = "Dennis" and result = 160.4
+ or
+ this = "Wesley" and result = 139.8
+ or
+ this = "Sylvester" and result = 188.2
+ or
+ this = "Gilbert" and result = 177.6
+ or
+ this = "Sullivan" and result = 168.3
+ or
+ this = "Edmund" and result = 159.2
+ or
+ this = "Wilson" and result = 167.6
+ or
+ this = "Perry" and result = 189.1
+ or
+ this = "Matthew" and result = 167.2
+ or
+ this = "Simba" and result = 140.1
+ or
+ this = "Nala" and result = 138.0
+ or
+ this = "Rafiki" and result = 139.3
+ or
+ this = "Shenzi" and result = 171.1
+ }
+
+ /** Gets the location of the person's home ("north", "south", "east", or "west"). If the person is deceased, there is no result. */
+ string getLocation() {
+ this = "Ronil" and result = "north"
+ or
+ this = "Dina" and result = "north"
+ or
+ this = "Ravi" and result = "north"
+ or
+ this = "Bruce" and result = "south"
+ or
+ this = "Jo" and result = "west"
+ or
+ this = "Aida" and result = "east"
+ or
+ this = "Esme" and result = "east"
+ or
+ this = "Charlie" and result = "south"
+ or
+ this = "Fred" and result = "west"
+ or
+ this = "Meera" and result = "south"
+ or
+ this = "Maya" and result = "south"
+ or
+ this = "Chad" and result = "south"
+ or
+ this = "Tiana" and result = "west"
+ or
+ this = "Laura" and result = "south"
+ or
+ this = "George" and result = "south"
+ or
+ this = "Will" and result = "south"
+ or
+ this = "Mary" and result = "south"
+ or
+ this = "Almira" and result = "south"
+ or
+ this = "Susannah" and result = "north"
+ or
+ this = "Rhoda" and result = "north"
+ or
+ this = "Cynthia" and result = "north"
+ or
+ this = "Eunice" and result = "north"
+ or
+ this = "Olive" and result = "west"
+ or
+ this = "Virginia" and result = "west"
+ or
+ this = "Angeline" and result = "west"
+ or
+ this = "Helen" and result = "west"
+ or
+ this = "Cornelia" and result = "east"
+ or
+ this = "Harriet" and result = "east"
+ or
+ this = "Mahala" and result = "east"
+ or
+ this = "Abby" and result = "east"
+ or
+ this = "Margaret" and result = "east"
+ or
+ this = "Deb" and result = "east"
+ or
+ this = "Minerva" and result = "south"
+ or
+ this = "Severus" and result = "north"
+ or
+ this = "Lavina" and result = "east"
+ or
+ this = "Adeline" and result = "west"
+ or
+ this = "Cath" and result = "east"
+ or
+ this = "Elisa" and result = "east"
+ or
+ this = "Lucretia" and result = "north"
+ or
+ this = "Anne" and result = "north"
+ or
+ this = "Eleanor" and result = "south"
+ or
+ this = "Joanna" and result = "south"
+ or
+ this = "Adam" and result = "east"
+ or
+ this = "Agnes" and result = "east"
+ or
+ this = "Rosanna" and result = "east"
+ or
+ this = "Clara" and result = "east"
+ or
+ this = "Melissa" and result = "west"
+ or
+ this = "Amy" and result = "west"
+ or
+ this = "Isabel" and result = "west"
+ or
+ this = "Jemima" and result = "west"
+ or
+ this = "Cordelia" and result = "west"
+ or
+ this = "Melinda" and result = "west"
+ or
+ this = "Delila" and result = "south"
+ or
+ this = "Jeremiah" and result = "north"
+ or
+ this = "Elijah" and result = "north"
+ or
+ this = "Hester" and result = "east"
+ or
+ this = "Walter" and result = "east"
+ or
+ this = "Oliver" and result = "east"
+ or
+ this = "Hugh" and result = "south"
+ or
+ this = "Aaron" and result = "south"
+ or
+ this = "Reuben" and result = "west"
+ or
+ this = "Eli" and result = "west"
+ or
+ this = "Amos" and result = "east"
+ or
+ this = "Augustus" and result = "south"
+ or
+ this = "Theodore" and result = "west"
+ or
+ this = "Ira" and result = "south"
+ or
+ this = "Timothy" and result = "north"
+ or
+ this = "Cyrus" and result = "north"
+ or
+ this = "Horace" and result = "east"
+ or
+ this = "Simon" and result = "east"
+ or
+ this = "Asa" and result = "east"
+ or
+ this = "Frank" and result = "west"
+ or
+ this = "Nelson" and result = "west"
+ or
+ this = "Leonard" and result = "west"
+ or
+ this = "Harrison" and result = "north"
+ or
+ this = "Anthony" and result = "north"
+ or
+ this = "Louis" and result = "north"
+ or
+ this = "Milton" and result = "south"
+ or
+ this = "Noah" and result = "south"
+ or
+ this = "Cornelius" and result = "east"
+ or
+ this = "Abdul" and result = "east"
+ or
+ this = "Warren" and result = "west"
+ or
+ this = "Harvey" and result = "west"
+ or
+ this = "Dennis" and result = "west"
+ or
+ this = "Wesley" and result = "west"
+ or
+ this = "Sylvester" and result = "south"
+ or
+ this = "Gilbert" and result = "east"
+ or
+ this = "Sullivan" and result = "east"
+ or
+ this = "Edmund" and result = "north"
+ or
+ this = "Wilson" and result = "north"
+ or
+ this = "Perry" and result = "west"
+ or
+ this = "Matthew" and result = "east"
+ or
+ this = "Simba" and result = "south"
+ or
+ this = "Nala" and result = "south"
+ or
+ this = "Rafiki" and result = "north"
+ or
+ this = "Shenzi" and result = "west"
+ }
+
+ /** Holds if the person is deceased. */
+ predicate isDeceased() {
+ this = "Ernest" or
+ this = "Gertrude" or
+ this = "Oscar" or
+ this = "Lilian" or
+ this = "Edwin" or
+ this = "Raymond" or
+ this = "Elgar" or
+ this = "Elmer" or
+ this = "Herbert" or
+ this = "Maude" or
+ this = "Mae" or
+ this = "Otto" or
+ this = "Ophelia" or
+ this = "Parsley" or
+ this = "Sage" or
+ this = "Rosemary" or
+ this = "Thyme" or
+ this = "Garfunkel" or
+ this = "King Basil"
+ }
+
+ /** Gets a parent of the person (alive or deceased). */
+ Person getAParent() {
+ this = "Stephen" and result = "Edmund"
+ or
+ this = "Edmund" and result = "Augustus"
+ or
+ this = "Augustus" and result = "Stephen"
+ or
+ this = "Abby" and result = "Cornelia"
+ or
+ this = "Abby" and result = "Amos"
+ or
+ this = "Abdul" and result = "Susannah"
+ or
+ this = "Adam" and result = "Amos"
+ or
+ this = "Adeline" and result = "Melinda"
+ or
+ this = "Adeline" and result = "Frank"
+ or
+ this = "Agnes" and result = "Abdul"
+ or
+ this = "Aida" and result = "Agnes"
+ or
+ this = "Almira" and result = "Sylvester"
+ or
+ this = "Amos" and result = "Eunice"
+ or
+ this = "Amy" and result = "Noah"
+ or
+ this = "Amy" and result = "Chad"
+ or
+ this = "Angeline" and result = "Reuben"
+ or
+ this = "Angeline" and result = "Lucretia"
+ or
+ this = "Anne" and result = "Rhoda"
+ or
+ this = "Anne" and result = "Louis"
+ or
+ this = "Anthony" and result = "Lavina"
+ or
+ this = "Anthony" and result = "Asa"
+ or
+ this = "Asa" and result = "Cornelia"
+ or
+ this = "Cath" and result = "Harriet"
+ or
+ this = "Charlie" and result = "Matthew"
+ or
+ this = "Clara" and result = "Ernest"
+ or
+ this = "Cornelia" and result = "Cynthia"
+ or
+ this = "Cornelius" and result = "Eli"
+ or
+ this = "Deb" and result = "Margaret"
+ or
+ this = "Dennis" and result = "Fred"
+ or
+ this = "Eli" and result = "Susannah"
+ or
+ this = "Elijah" and result = "Delila"
+ or
+ this = "Elisa" and result = "Deb"
+ or
+ this = "Elisa" and result = "Horace"
+ or
+ this = "Esme" and result = "Margaret"
+ or
+ this = "Frank" and result = "Eleanor"
+ or
+ this = "Frank" and result = "Cyrus"
+ or
+ this = "George" and result = "Maya"
+ or
+ this = "George" and result = "Wilson"
+ or
+ this = "Gilbert" and result = "Cornelius"
+ or
+ this = "Harriet" and result = "Cynthia"
+ or
+ this = "Harrison" and result = "Louis"
+ or
+ this = "Harvey" and result = "Fred"
+ or
+ this = "Helen" and result = "Susannah"
+ or
+ this = "Hester" and result = "Edwin"
+ or
+ this = "Hugh" and result = "Cyrus"
+ or
+ this = "Hugh" and result = "Helen"
+ or
+ this = "Ira" and result = "Maya"
+ or
+ this = "Ira" and result = "Wilson"
+ or
+ this = "Isabel" and result = "Perry"
+ or
+ this = "Isabel" and result = "Harvey"
+ or
+ this = "Jemima" and result = "Melinda"
+ or
+ this = "Jemima" and result = "Frank"
+ or
+ this = "Ernest" and result = "Lilian"
+ or
+ this = "Ernest" and result = "Oscar"
+ or
+ this = "Gertrude" and result = "Ophelia"
+ or
+ this = "Gertrude" and result = "Raymond"
+ or
+ this = "Lilian" and result = "Elgar"
+ or
+ this = "Lilian" and result = "Mae"
+ or
+ this = "Raymond" and result = "Elgar"
+ or
+ this = "Raymond" and result = "Mae"
+ or
+ this = "Elmer" and result = "Ophelia"
+ or
+ this = "Elmer" and result = "Raymond"
+ or
+ this = "Herbert" and result = "Ophelia"
+ or
+ this = "Herbert" and result = "Raymond"
+ or
+ this = "Maude" and result = "Ophelia"
+ or
+ this = "Maude" and result = "Raymond"
+ or
+ this = "Otto" and result = "Elgar"
+ or
+ this = "Otto" and result = "Mae"
+ or
+ this = "Edwin" and result = "Otto"
+ or
+ this = "Parsley" and result = "Simon"
+ or
+ this = "Parsley" and result = "Garfunkel"
+ or
+ this = "Sage" and result = "Simon"
+ or
+ this = "Sage" and result = "Garfunkel"
+ or
+ this = "Rosemary" and result = "Simon"
+ or
+ this = "Rosemary" and result = "Garfunkel"
+ or
+ this = "Thyme" and result = "Simon"
+ or
+ this = "Thyme" and result = "Garfunkel"
+ or
+ this = "King Basil" and result = "Ophelia"
+ or
+ this = "King Basil" and result = "Raymond"
+ or
+ this = "Jo" and result = "Theodore"
+ or
+ this = "Joanna" and result = "Shenzi"
+ or
+ this = "Laura" and result = "Maya"
+ or
+ this = "Laura" and result = "Wilson"
+ or
+ this = "Lavina" and result = "Mahala"
+ or
+ this = "Lavina" and result = "Walter"
+ or
+ this = "Leonard" and result = "Cyrus"
+ or
+ this = "Leonard" and result = "Helen"
+ or
+ this = "Lucretia" and result = "Eleanor"
+ or
+ this = "Lucretia" and result = "Cyrus"
+ or
+ this = "Mahala" and result = "Eunice"
+ or
+ this = "Margaret" and result = "Cynthia"
+ or
+ this = "Matthew" and result = "Cyrus"
+ or
+ this = "Matthew" and result = "Helen"
+ or
+ this = "Maya" and result = "Meera"
+ or
+ this = "Melinda" and result = "Rafiki"
+ or
+ this = "Melissa" and result = "Mahala"
+ or
+ this = "Melissa" and result = "Walter"
+ or
+ this = "Nala" and result = "Bruce"
+ or
+ this = "Nelson" and result = "Mahala"
+ or
+ this = "Nelson" and result = "Walter"
+ or
+ this = "Noah" and result = "Eli"
+ or
+ this = "Olive" and result = "Reuben"
+ or
+ this = "Olive" and result = "Lucretia"
+ or
+ this = "Oliver" and result = "Matthew"
+ or
+ this = "Perry" and result = "Leonard"
+ or
+ this = "Ravi" and result = "Dina"
+ or
+ this = "Simba" and result = "Will"
+ or
+ this = "Simon" and result = "Margaret"
+ or
+ this = "Sullivan" and result = "Cornelius"
+ or
+ this = "Sylvester" and result = "Timothy"
+ or
+ this = "Theodore" and result = "Susannah"
+ or
+ this = "Tiana" and result = "Jo"
+ or
+ this = "Virginia" and result = "Helen"
+ or
+ this = "Warren" and result = "Shenzi"
+ or
+ this = "Wesley" and result = "Warren"
+ or
+ this = "Wesley" and result = "Jo"
+ or
+ this = "Will" and result = "Eli"
+ }
+
+ /** Holds if the person is allowed in the region. Initially, all villagers are allowed in every region. */
+ predicate isAllowedIn(string region) {
+ region = "north" or
+ region = "south" or
+ region = "east" or
+ region = "west"
+ }
+}
+
+/** Returns a parent of the person. */
+Person parentOf(Person p) { result = p.getAParent() }
diff --git a/repo-tests/codeql-ruby/ql/src/AlertSuppression.ql b/repo-tests/codeql-ruby/ql/src/AlertSuppression.ql
new file mode 100644
index 00000000000..b10c4ecbb45
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/AlertSuppression.ql
@@ -0,0 +1,82 @@
+/**
+ * @name Alert suppression
+ * @description Generates information about alert suppressions.
+ * @kind alert-suppression
+ * @id rb/alert-suppression
+ */
+
+import ruby
+import codeql.ruby.ast.internal.TreeSitter
+
+/**
+ * An alert suppression comment.
+ */
+class SuppressionComment extends Ruby::Comment {
+ string annotation;
+
+ SuppressionComment() {
+ // suppression comments must be single-line
+ this.getLocation().getStartLine() = this.getLocation().getEndLine() and
+ exists(string text | text = commentText(this) |
+ // match `lgtm[...]` anywhere in the comment
+ annotation = text.regexpFind("(?i)\\blgtm\\s*\\[[^\\]]*\\]", _, _)
+ or
+ // match `lgtm` at the start of the comment and after semicolon
+ annotation = text.regexpFind("(?i)(?<=^|;)\\s*lgtm(?!\\B|\\s*\\[)", _, _).trim()
+ )
+ }
+
+ /**
+ * Gets the text of this suppression comment.
+ */
+ string getText() { result = commentText(this) }
+
+ /** Gets the suppression annotation in this comment. */
+ string getAnnotation() { result = annotation }
+
+ /**
+ * Holds if this comment applies to the range from column `startcolumn` of line `startline`
+ * to column `endcolumn` of line `endline` in file `filepath`.
+ */
+ predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
+ this.getLocation().hasLocationInfo(filepath, startline, _, endline, endcolumn) and
+ startcolumn = 1
+ }
+
+ /** Gets the scope of this suppression. */
+ SuppressionScope getScope() { this = result.getSuppressionComment() }
+}
+
+private string commentText(Ruby::Comment comment) { result = comment.getValue().suffix(1) }
+
+/**
+ * The scope of an alert suppression comment.
+ */
+class SuppressionScope extends @ruby_token_comment {
+ SuppressionScope() { this instanceof SuppressionComment }
+
+ /** Gets a suppression comment with this scope. */
+ SuppressionComment getSuppressionComment() { result = this }
+
+ /**
+ * Holds if this element is at the specified location.
+ * The location spans column `startcolumn` of line `startline` to
+ * column `endcolumn` of line `endline` in file `filepath`.
+ * For more information, see
+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
+ */
+ predicate hasLocationInfo(
+ string filepath, int startline, int startcolumn, int endline, int endcolumn
+ ) {
+ this.(SuppressionComment).covers(filepath, startline, startcolumn, endline, endcolumn)
+ }
+
+ /** Gets a textual representation of this element. */
+ string toString() { result = "suppression range" }
+}
+
+from SuppressionComment c
+select c, // suppression comment
+ c.getText(), // text of suppression comment (excluding delimiters)
+ c.getAnnotation(), // text of suppression annotation
+ c.getScope() // scope of suppression
diff --git a/repo-tests/codeql-ruby/ql/src/experimental/performance/UseDetect.ql b/repo-tests/codeql-ruby/ql/src/experimental/performance/UseDetect.ql
new file mode 100644
index 00000000000..f5fcf6df4fb
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/experimental/performance/UseDetect.ql
@@ -0,0 +1,64 @@
+/**
+ * @name Use detect
+ * @description Use 'detect' instead of 'select' followed by 'first' or 'last'.
+ * @kind problem
+ * @problem.severity warning
+ * @id rb/use-detect
+ * @tags performance rubocop
+ * @precision high
+ */
+
+// This is an implementation of the Rubocop rule
+// https://github.com/rubocop/rubocop-performance/blob/master/lib/rubocop/cop/performance/detect.rb
+import ruby
+import codeql.ruby.dataflow.SSA
+
+/** A call that extracts the first or last element of a list. */
+class EndCall extends MethodCall {
+ string detect;
+
+ EndCall() {
+ detect = "detect" and
+ (
+ this.getMethodName() = "first" and
+ this.getNumberOfArguments() = 0
+ or
+ this.getNumberOfArguments() = 1 and
+ this.getArgument(0).(IntegerLiteral).getValueText() = "0"
+ )
+ or
+ detect = "reverse_detect" and
+ (
+ this.getMethodName() = "last" and
+ this.getNumberOfArguments() = 0
+ or
+ this.getNumberOfArguments() = 1 and
+ this.getArgument(0).(UnaryMinusExpr).getOperand().(IntegerLiteral).getValueText() = "1"
+ )
+ }
+
+ string detectCall() { result = detect }
+}
+
+Expr getUniqueRead(Expr e) {
+ exists(AssignExpr ae |
+ e = ae.getRightOperand() and
+ forex(Ssa::WriteDefinition def | def.getWriteAccess() = ae.getLeftOperand() |
+ strictcount(def.getARead()) = 1 and
+ not def = any(Ssa::PhiNode phi).getAnInput() and
+ def.getARead() = result.getAControlFlowNode()
+ )
+ )
+}
+
+class SelectBlock extends MethodCall {
+ SelectBlock() {
+ this.getMethodName() in ["select", "filter", "find_all"] and
+ exists(this.getBlock())
+ }
+}
+
+from EndCall call, SelectBlock selectBlock
+where getUniqueRead*(selectBlock) = call.getReceiver()
+select call, "Replace this call and $@ with '" + call.detectCall() + "'.", selectBlock,
+ "'select' call"
diff --git a/repo-tests/codeql-ruby/ql/src/filters/ClassifyFiles.ql b/repo-tests/codeql-ruby/ql/src/filters/ClassifyFiles.ql
new file mode 100644
index 00000000000..d194523e09d
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/filters/ClassifyFiles.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Classify files
+ * @description This query produces a list of all files in a database
+ * that are classified as generated code or test code.
+ *
+ * Used by LGTM.
+ * @kind file-classifier
+ * @id rb/file-classifier
+ */
+
+import ruby
+import codeql.ruby.filters.GeneratedCode
+
+predicate classify(File f, string category) {
+ f instanceof GeneratedCodeFile and category = "generated"
+}
+
+from File f, string category
+where classify(f, category)
+select f, category
diff --git a/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localDefinitions.ql b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localDefinitions.ql
new file mode 100644
index 00000000000..81c5e449bb1
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localDefinitions.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Jump-to-definition links
+ * @description Generates use-definition pairs that provide the data
+ * for jump-to-definition in the code viewer.
+ * @kind definitions
+ * @id ruby/ide-jump-to-definition
+ * @tags ide-contextual-queries/local-definitions
+ */
+
+import codeql.IDEContextual
+import codeql.ruby.AST
+
+external string selectedSourceFile();
+
+from AstNode e, Variable def, string kind
+where
+ e = def.getAnAccess() and
+ kind = "local variable" and
+ e.getLocation().getFile() = getFileBySourceArchiveName(selectedSourceFile())
+select e, def, kind
diff --git a/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localReferences.ql b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localReferences.ql
new file mode 100644
index 00000000000..713b363e60f
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/localReferences.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Find-references links
+ * @description Generates use-definition pairs that provide the data
+ * for find-references in the code viewer.
+ * @kind definitions
+ * @id ruby/ide-find-references
+ * @tags ide-contextual-queries/local-references
+ */
+
+import codeql.IDEContextual
+import codeql.ruby.AST
+import codeql.ruby.ast.Variable
+
+external string selectedSourceFile();
+
+from AstNode e, Variable def, string kind
+where
+ e = def.getAnAccess() and
+ kind = "local variable" and
+ def.getLocation().getFile() = getFileBySourceArchiveName(selectedSourceFile())
+select e, def, kind
diff --git a/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/printAst.ql b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/printAst.ql
new file mode 100644
index 00000000000..cd5b9a4a3b2
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/ide-contextual-queries/printAst.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Print AST
+ * @description Produces a representation of a file's Abstract Syntax Tree.
+ * This query is used by the VS Code extension.
+ * @id ruby/print-ast
+ * @kind graph
+ * @tags ide-contextual-queries/print-ast
+ */
+
+private import codeql.IDEContextual
+private import codeql.ruby.AST
+private import codeql.ruby.printAst
+
+/**
+ * The source file to generate an AST from.
+ */
+external string selectedSourceFile();
+
+/**
+ * Overrides the configuration to print only nodes in the selected source file.
+ */
+class Cfg extends PrintAstConfiguration {
+ override predicate shouldPrintNode(AstNode n) {
+ super.shouldPrintNode(n) and
+ n.getLocation().getFile() = getFileBySourceArchiveName(selectedSourceFile())
+ }
+}
diff --git a/repo-tests/codeql-ruby/ql/src/qlpack.yml b/repo-tests/codeql-ruby/ql/src/qlpack.yml
new file mode 100644
index 00000000000..1c346968c43
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/qlpack.yml
@@ -0,0 +1,7 @@
+name: codeql/ruby-queries
+version: 0.0.2
+suites: codeql-suites
+defaultSuiteFile: codeql-suites/ruby-code-scanning.qls
+dependencies:
+ codeql/ruby-all: "*"
+ codeql/suite-helpers: "*"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/analysis/Definitions.ql b/repo-tests/codeql-ruby/ql/src/queries/analysis/Definitions.ql
new file mode 100644
index 00000000000..aff97bbc345
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/analysis/Definitions.ql
@@ -0,0 +1,81 @@
+/**
+ * @name Definitions
+ * @description Jump to definition helper query.
+ * @kind definitions
+ * @id rb/jump-to-definition
+ */
+
+/*
+ * TODO:
+ * - should `Foo.new` point to `Foo#initialize`?
+ */
+
+import ruby
+import codeql.ruby.ast.internal.Module
+import codeql.ruby.dataflow.SSA
+
+from DefLoc loc, Expr src, Expr target, string kind
+where
+ ConstantDefLoc(src, target) = loc and kind = "constant"
+ or
+ MethodLoc(src, target) = loc and kind = "method"
+ or
+ LocalVariableLoc(src, target) = loc and kind = "variable"
+ or
+ InstanceVariableLoc(src, target) = loc and kind = "instance variable"
+ or
+ ClassVariableLoc(src, target) = loc and kind = "class variable"
+select src, target, kind
+
+/**
+ * Definition location info for different identifiers.
+ * Each branch holds two values that are subclasses of `Expr`.
+ * The first is the "source" - some usage of an identifier.
+ * The second is the "target" - the definition of that identifier.
+ */
+newtype DefLoc =
+ /** A constant, module or class. */
+ ConstantDefLoc(ConstantReadAccess read, ConstantWriteAccess write) { write = definitionOf(read) } or
+ /** A method call. */
+ MethodLoc(MethodCall call, Method meth) { meth = call.getATarget() } or
+ /** A local variable. */
+ LocalVariableLoc(VariableReadAccess read, VariableWriteAccess write) {
+ exists(Ssa::WriteDefinition w |
+ write = w.getWriteAccess() and
+ read = w.getARead().getExpr() and
+ not read.isSynthesized()
+ )
+ } or
+ /** An instance variable */
+ InstanceVariableLoc(InstanceVariableReadAccess read, InstanceVariableWriteAccess write) {
+ /*
+ * We consider instance variables to be "defined" in the initialize method of their enclosing class.
+ * If that method doesn't exist, we won't provide any jump-to-def information for the instance variable.
+ */
+
+ exists(Method m |
+ m.getAChild+() = write and
+ m.getName() = "initialize" and
+ write.getVariable() = read.getVariable()
+ )
+ } or
+ /** A class variable */
+ ClassVariableLoc(ClassVariableReadAccess read, ClassVariableWriteAccess write) {
+ read.getVariable() = write.getVariable() and
+ not exists(MethodBase m | m.getAChild+() = write)
+ }
+
+/**
+ * Gets the constant write that defines the given constant.
+ * Modules often don't have a unique definition, as they are opened multiple times in different
+ * files. In these cases we arbitrarily pick the definition with the lexicographically least
+ * location.
+ */
+ConstantWriteAccess definitionOf(ConstantReadAccess r) {
+ result =
+ min(ConstantWriteAccess w |
+ w.getQualifiedName() = resolveConstant(r)
+ |
+ w order by w.getLocation().toString()
+ )
+}
diff --git a/repo-tests/codeql-ruby/ql/src/queries/diagnostics/ExtractionErrors.ql b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/ExtractionErrors.ql
new file mode 100644
index 00000000000..5c55d984337
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/ExtractionErrors.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Extraction errors
+ * @description List all extraction errors for files in the source code directory.
+ * @kind diagnostic
+ * @id rb/diagnostics/extraction-errors
+ */
+
+import ruby
+import codeql.ruby.Diagnostics
+
+/** Gets the SARIF severity to associate an error. */
+int getSeverity() { result = 2 }
+
+from ExtractionError error, File f
+where
+ f = error.getLocation().getFile() and
+ exists(f.getRelativePath())
+select error, "Extraction failed in " + f + " with error " + error.getMessage(), getSeverity()
diff --git a/repo-tests/codeql-ruby/ql/src/queries/diagnostics/SuccessfullyExtractedFiles.ql b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/SuccessfullyExtractedFiles.ql
new file mode 100644
index 00000000000..74f95763d8a
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/diagnostics/SuccessfullyExtractedFiles.ql
@@ -0,0 +1,16 @@
+/**
+ * @name Successfully extracted files
+ * @description Lists all files in the source code directory that were extracted
+ * without encountering an error in the file.
+ * @kind diagnostic
+ * @id rb/diagnostics/successfully-extracted-files
+ */
+
+import ruby
+import codeql.ruby.Diagnostics
+
+from File f
+where
+ not exists(ExtractionError e | e.getLocation().getFile() = f) and
+ exists(f.getRelativePath())
+select f, ""
diff --git a/repo-tests/codeql-ruby/ql/src/queries/metrics/FLines.ql b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLines.ql
new file mode 100644
index 00000000000..97c319fbf73
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLines.ql
@@ -0,0 +1,13 @@
+/**
+ * @name Number of lines
+ * @kind metric
+ * @description The number of lines in each file.
+ * @metricType file
+ * @id rb/lines-per-file
+ */
+
+import ruby
+
+from RubyFile f, int n
+where n = f.getNumberOfLines()
+select f, n order by n desc
diff --git a/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfCode.ql b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfCode.ql
new file mode 100644
index 00000000000..0c1d15960cc
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfCode.ql
@@ -0,0 +1,14 @@
+/**
+ * @name Lines of code in files
+ * @kind metric
+ * @description Measures the number of lines of code in each file, ignoring lines that
+ * contain only comments or whitespace.
+ * @metricType file
+ * @id rb/lines-of-code-in-files
+ */
+
+import ruby
+
+from RubyFile f, int n
+where n = f.getNumberOfLinesOfCode()
+select f, n order by n desc
diff --git a/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfComments.ql b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfComments.ql
new file mode 100644
index 00000000000..8af882f13d1
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/metrics/FLinesOfComments.ql
@@ -0,0 +1,13 @@
+/**
+ * @name Lines of comments in files
+ * @kind metric
+ * @description Measures the number of lines of comments in each file.
+ * @metricType file
+ * @id rb/lines-of-comments-in-files
+ */
+
+import ruby
+
+from RubyFile f, int n
+where n = f.getNumberOfLinesOfComments()
+select f, n order by n desc
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/CommandInjection.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/CommandInjection.ql
new file mode 100644
index 00000000000..4c2dda966b9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/CommandInjection.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Uncontrolled command line
+ * @description Using externally controlled strings in a command line may allow a malicious
+ * user to change the meaning of the command.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 9.8
+ * @precision high
+ * @id rb/command-line-injection
+ * @tags correctness
+ * security
+ * external/cwe/cwe-078
+ * external/cwe/cwe-088
+ */
+
+import ruby
+import codeql.ruby.security.CommandInjectionQuery
+import DataFlow::PathGraph
+
+from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Source sourceNode
+where
+ config.hasFlowPath(source, sink) and
+ sourceNode = source.getNode()
+select sink.getNode(), source, sink, "This command depends on $@.", sourceNode,
+ sourceNode.getSourceType()
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/KernelOpen.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/KernelOpen.ql
new file mode 100644
index 00000000000..5bb02183915
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-078/KernelOpen.ql
@@ -0,0 +1,76 @@
+/**
+ * @name Use of `Kernel.open` or `IO.read`
+ * @description Using `Kernel.open` or `IO.read` may allow a malicious
+ * user to execute arbitrary system commands.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 9.8
+ * @precision high
+ * @id rb/kernel-open
+ * @tags correctness
+ * security
+ * external/cwe/cwe-078
+ * external/cwe/cwe-088
+ * external/cwe/cwe-073
+ */
+
+import ruby
+import codeql.ruby.ApiGraphs
+import codeql.ruby.frameworks.StandardLibrary
+import codeql.ruby.TaintTracking
+import codeql.ruby.dataflow.BarrierGuards
+import codeql.ruby.dataflow.RemoteFlowSources
+import DataFlow::PathGraph
+
+/**
+ * Method calls that have a suggested replacement.
+ */
+abstract class Replacement extends DataFlow::CallNode {
+ abstract string getFrom();
+
+ abstract string getTo();
+}
+
+class KernelOpenCall extends KernelMethodCall, Replacement {
+ KernelOpenCall() { this.getMethodName() = "open" }
+
+ override string getFrom() { result = "Kernel.open" }
+
+ override string getTo() { result = "File.open" }
+}
+
+class IOReadCall extends DataFlow::CallNode, Replacement {
+ IOReadCall() { this = API::getTopLevelMember("IO").getAMethodCall("read") }
+
+ override string getFrom() { result = "IO.read" }
+
+ override string getTo() { result = "File.read" }
+}
+
+class Configuration extends TaintTracking::Configuration {
+ Configuration() { this = "KernelOpen" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(KernelOpenCall c | c.getArgument(0) = sink)
+ or
+ exists(IOReadCall c | c.getArgument(0) = sink)
+ }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof StringConstCompare or
+ guard instanceof StringConstArrayInclusionCall
+ }
+}
+
+from
+ Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
+ DataFlow::Node sourceNode, DataFlow::CallNode call
+where
+ config.hasFlowPath(source, sink) and
+ sourceNode = source.getNode() and
+ call.asExpr().getExpr().(MethodCall).getArgument(0) = sink.getNode().asExpr().getExpr()
+select sink.getNode(), source, sink,
+ "This call to " + call.(Replacement).getFrom() +
+ " depends on a user-provided value. Replace it with " + call.(Replacement).getTo() + "."
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/ReflectedXSS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/ReflectedXSS.ql
new file mode 100644
index 00000000000..d3f95f69fea
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/ReflectedXSS.ql
@@ -0,0 +1,24 @@
+/**
+ * @name Reflected server-side cross-site scripting
+ * @description Writing user input directly to a web page
+ * allows for a cross-site scripting vulnerability.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 6.1
+ * @sub-severity high
+ * @precision high
+ * @id rb/reflected-xss
+ * @tags security
+ * external/cwe/cwe-079
+ * external/cwe/cwe-116
+ */
+
+import ruby
+import codeql.ruby.security.ReflectedXSSQuery
+import codeql.ruby.DataFlow
+import DataFlow::PathGraph
+
+from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
+ source.getNode(), "a user-provided value"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/StoredXSS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/StoredXSS.ql
new file mode 100644
index 00000000000..e473d5c31e9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-079/StoredXSS.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Stored cross-site scripting
+ * @description Using uncontrolled stored values in HTML allows for
+ * a stored cross-site scripting vulnerability.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 6.1
+ * @precision high
+ * @id rb/stored-xss
+ * @tags security
+ * external/cwe/cwe-079
+ * external/cwe/cwe-116
+ */
+
+import ruby
+import codeql.ruby.security.StoredXSSQuery
+import codeql.ruby.DataFlow
+import DataFlow::PathGraph
+
+from StoredXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@",
+ source.getNode(), "stored value"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-089/SqlInjection.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-089/SqlInjection.ql
new file mode 100644
index 00000000000..de795e34e71
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-089/SqlInjection.ql
@@ -0,0 +1,39 @@
+/**
+ * @name SQL query built from user-controlled sources
+ * @description Building a SQL query from user-controlled sources is vulnerable to insertion of
+ * malicious SQL code by the user.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 8.8
+ * @precision high
+ * @id rb/sql-injection
+ * @tags security
+ * external/cwe/cwe-089
+ * external/owasp/owasp-a1
+ */
+
+import ruby
+import codeql.ruby.Concepts
+import codeql.ruby.DataFlow
+import codeql.ruby.dataflow.BarrierGuards
+import codeql.ruby.dataflow.RemoteFlowSources
+import codeql.ruby.TaintTracking
+import DataFlow::PathGraph
+
+class SQLInjectionConfiguration extends TaintTracking::Configuration {
+ SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof SqlExecution }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof StringConstCompare or
+ guard instanceof StringConstArrayInclusionCall
+ }
+}
+
+from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
+ "a user-provided value"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-094/CodeInjection.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-094/CodeInjection.ql
new file mode 100644
index 00000000000..60e8e32c2f6
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-094/CodeInjection.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Code injection
+ * @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
+ * code execution.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 9.3
+ * @sub-severity high
+ * @precision high
+ * @id rb/code-injection
+ * @tags security
+ * external/owasp/owasp-a1
+ * external/cwe/cwe-094
+ * external/cwe/cwe-095
+ * external/cwe/cwe-116
+ */
+
+import ruby
+import codeql.ruby.security.CodeInjectionQuery
+import DataFlow::PathGraph
+
+from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Source sourceNode
+where
+ config.hasFlowPath(source, sink) and
+ sourceNode = source.getNode()
+select sink.getNode(), source, sink, "This code execution depends on $@.", sourceNode,
+ "a user-provided value"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/PolynomialReDoS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/PolynomialReDoS.ql
new file mode 100644
index 00000000000..9ee914c3bf0
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/PolynomialReDoS.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Polynomial regular expression used on uncontrolled data
+ * @description A regular expression that can require polynomial time
+ * to match may be vulnerable to denial-of-service attacks.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision high
+ * @id rb/polynomial-redos
+ * @tags security
+ * external/cwe/cwe-1333
+ * external/cwe/cwe-730
+ * external/cwe/cwe-400
+ */
+
+import DataFlow::PathGraph
+import codeql.ruby.DataFlow
+import codeql.ruby.regexp.PolynomialReDoSQuery
+import codeql.ruby.regexp.SuperlinearBackTracking
+
+from
+ PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
+ PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
+where
+ config.hasFlowPath(source, sink) and
+ sinkNode = sink.getNode() and
+ regexp = sinkNode.getRegExp()
+select sinkNode.getHighlight(), source, sink,
+ "This $@ that depends on $@ may run slow on strings " + regexp.getPrefixMessage() +
+ "with many repetitions of '" + regexp.getPumpString() + "'.", regexp, "regular expression",
+ source.getNode(), "a user-provided value"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/ReDoS.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/ReDoS.ql
new file mode 100644
index 00000000000..234772240e3
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-1333/ReDoS.ql
@@ -0,0 +1,25 @@
+/**
+ * @name Inefficient regular expression
+ * @description A regular expression that requires exponential time to match certain inputs
+ * can be a performance bottleneck, and may be vulnerable to denial-of-service
+ * attacks.
+ * @kind problem
+ * @problem.severity error
+ * @security-severity 7.5
+ * @precision high
+ * @id rb/redos
+ * @tags security
+ * external/cwe/cwe-1333
+ * external/cwe/cwe-730
+ * external/cwe/cwe-400
+ */
+
+import codeql.ruby.regexp.ExponentialBackTracking
+import codeql.ruby.regexp.ReDoSUtil
+import codeql.ruby.regexp.RegExpTreeView
+
+from RegExpTerm t, string pump, State s, string prefixMsg
+where hasReDoSResult(t, pump, s, prefixMsg)
+select t,
+ "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
+ "containing many repetitions of '" + pump + "'."
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-295/RequestWithoutValidation.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-295/RequestWithoutValidation.ql
new file mode 100644
index 00000000000..e9b236897bc
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-295/RequestWithoutValidation.ql
@@ -0,0 +1,20 @@
+/**
+ * @name Request without certificate validation
+ * @description Making a request without certificate validation can allow
+ * man-in-the-middle attacks.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.5
+ * @precision medium
+ * @id rb/request-without-cert-validation
+ * @tags security
+ * external/cwe/cwe-295
+ */
+
+import ruby
+import codeql.ruby.Concepts
+import codeql.ruby.DataFlow
+
+from HTTP::Client::Request request, DataFlow::Node disablingNode
+where request.disablesCertificateValidation(disablingNode)
+select request, "This request may run with $@.", disablingNode, "certificate validation disabled"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-502/UnsafeDeserialization.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-502/UnsafeDeserialization.ql
new file mode 100644
index 00000000000..0df3b7c8d67
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-502/UnsafeDeserialization.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Deserialization of user-controlled data
+ * @description Deserializing user-controlled data may allow attackers to
+ * execute arbitrary code.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 9.8
+ * @precision high
+ * @id rb/unsafe-deserialization
+ * @tags security
+ * external/cwe/cwe-502
+ */
+
+import ruby
+import DataFlow::PathGraph
+import codeql.ruby.DataFlow
+import codeql.ruby.security.UnsafeDeserializationQuery
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
+where cfg.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Unsafe deserialization of $@.", source.getNode(), "user input"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-601/UrlRedirect.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-601/UrlRedirect.ql
new file mode 100644
index 00000000000..aeaa4c29dc5
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-601/UrlRedirect.ql
@@ -0,0 +1,22 @@
+/**
+ * @name URL redirection from remote source
+ * @description URL redirection based on unvalidated user input
+ * may cause redirection to malicious web sites.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 6.1
+ * @sub-severity low
+ * @id rb/url-redirection
+ * @tags security
+ * external/cwe/cwe-601
+ * @precision high
+ */
+
+import ruby
+import codeql.ruby.security.UrlRedirectQuery
+import codeql.ruby.DataFlow::DataFlow::PathGraph
+
+from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Untrusted URL redirection due to $@.", source.getNode(),
+ "a user-provided value"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-611/Xxe.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-611/Xxe.ql
new file mode 100644
index 00000000000..c7eae21333e
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-611/Xxe.ql
@@ -0,0 +1,43 @@
+/**
+ * @name XML external entity expansion
+ * @description Parsing user input as an XML document with external
+ * entity expansion is vulnerable to XXE attacks.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 9.1
+ * @precision high
+ * @id rb/xxe
+ * @tags security
+ * external/cwe/cwe-611
+ * external/cwe/cwe-776
+ * external/cwe/cwe-827
+ */
+
+import ruby
+import codeql.ruby.dataflow.RemoteFlowSources
+import codeql.ruby.TaintTracking
+import codeql.ruby.Concepts
+import codeql.ruby.DataFlow
+import DataFlow::PathGraph
+
+class UnsafeXxeSink extends DataFlow::ExprNode {
+ UnsafeXxeSink() {
+ exists(XmlParserCall parse |
+ parse.getInput() = this and
+ parse.externalEntitiesEnabled()
+ )
+ }
+}
+
+class XxeConfig extends TaintTracking::Configuration {
+ XxeConfig() { this = "XXE.ql::XxeConfig" }
+
+ override predicate isSource(DataFlow::Node src) { src instanceof RemoteFlowSource }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof UnsafeXxeSink }
+}
+
+from DataFlow::PathNode source, DataFlow::PathNode sink, XxeConfig conf
+where conf.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Unsafe parsing of XML file from $@.", source.getNode(),
+ "user input"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-732/WeakFilePermissions.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-732/WeakFilePermissions.ql
new file mode 100644
index 00000000000..793eafe04bd
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-732/WeakFilePermissions.ql
@@ -0,0 +1,64 @@
+/**
+ * @name Overly permissive file permissions
+ * @description Allowing files to be readable or writable by users other than the owner may allow sensitive information to be accessed.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @id rb/overly-permissive-file
+ * @tags external/cwe/cwe-732
+ * security
+ * @precision low
+ */
+
+import ruby
+import codeql.ruby.Concepts
+import codeql.ruby.DataFlow
+import DataFlow::PathGraph
+import codeql.ruby.ApiGraphs
+
+bindingset[p]
+int world_permission(int p) { result = p.bitAnd(7) }
+
+// 70 oct = 56 dec
+bindingset[p]
+int group_permission(int p) { result = p.bitAnd(56) }
+
+bindingset[p]
+string access(int p) {
+ p.bitAnd(2) != 0 and result = "writable"
+ or
+ p.bitAnd(4) != 0 and result = "readable"
+}
+
+/** An expression specifing a file permission that allows group/others read or write access */
+class PermissivePermissionsExpr extends Expr {
+ // TODO: non-literal expressions?
+ PermissivePermissionsExpr() {
+ exists(int perm, string acc |
+ perm = this.(IntegerLiteral).getValue() and
+ (acc = access(world_permission(perm)) or acc = access(group_permission(perm)))
+ )
+ or
+ // adding/setting read or write permissions for all/group/other
+ this.(StringLiteral).getValueText().regexpMatch(".*[ago][^-=+]*[+=][xXst]*[rw].*")
+ }
+}
+
+class PermissivePermissionsConfig extends DataFlow::Configuration {
+ PermissivePermissionsConfig() { this = "PermissivePermissionsConfig" }
+
+ override predicate isSource(DataFlow::Node source) {
+ exists(PermissivePermissionsExpr ppe | source.asExpr().getExpr() = ppe)
+ }
+
+ override predicate isSink(DataFlow::Node sink) {
+ exists(FileSystemPermissionModification mod | mod.getAPermissionNode() = sink)
+ }
+}
+
+from
+ DataFlow::PathNode source, DataFlow::PathNode sink, PermissivePermissionsConfig conf,
+ FileSystemPermissionModification mod
+where conf.hasFlowPath(source, sink) and mod.getAPermissionNode() = sink.getNode()
+select source.getNode(), source, sink, "Overly permissive mask in $@ sets file to $@.", mod,
+ mod.toString(), source.getNode(), source.getNode().toString()
diff --git a/repo-tests/codeql-ruby/ql/src/queries/security/cwe-798/HardcodedCredentials.ql b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-798/HardcodedCredentials.ql
new file mode 100644
index 00000000000..c887793031d
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/security/cwe-798/HardcodedCredentials.ql
@@ -0,0 +1,155 @@
+/**
+ * @name Hard-coded credentials
+ * @description Credentials are hard coded in the source code of the application.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 9.8
+ * @precision high
+ * @id rb/hardcoded-credentials
+ * @tags security
+ * external/cwe/cwe-259
+ * external/cwe/cwe-321
+ * external/cwe/cwe-798
+ */
+
+import ruby
+import codeql.ruby.DataFlow
+import DataFlow::PathGraph
+import codeql.ruby.TaintTracking
+import codeql.ruby.controlflow.CfgNodes
+
+bindingset[char, fraction]
+predicate fewer_characters_than(StringLiteral str, string char, float fraction) {
+ exists(string text, int chars |
+ text = str.getValueText() and
+ chars = count(int i | text.charAt(i) = char)
+ |
+ /* Allow one character */
+ chars = 1 or
+ chars < text.length() * fraction
+ )
+}
+
+predicate possible_reflective_name(string name) {
+ // TODO: implement this?
+ none()
+}
+
+int char_count(StringLiteral str) { result = count(string c | c = str.getValueText().charAt(_)) }
+
+predicate capitalized_word(StringLiteral str) { str.getValueText().regexpMatch("[A-Z][a-z]+") }
+
+predicate format_string(StringLiteral str) { str.getValueText().matches("%{%}%") }
+
+predicate maybeCredential(Expr e) {
+ /* A string that is not too short and unlikely to be text or an identifier. */
+ exists(StringLiteral str | str = e |
+ /* At least 10 characters */
+ str.getValueText().length() > 9 and
+ /* Not too much whitespace */
+ fewer_characters_than(str, " ", 0.05) and
+ /* or underscores */
+ fewer_characters_than(str, "_", 0.2) and
+ /* Not too repetitive */
+ exists(int chars | chars = char_count(str) |
+ chars > 15 or
+ chars * 3 > str.getValueText().length() * 2
+ ) and
+ not possible_reflective_name(str.getValueText()) and
+ not capitalized_word(str) and
+ not format_string(str)
+ )
+ or
+ /* Or, an integer with over 32 bits */
+ exists(IntegerLiteral lit | lit = e |
+ not exists(lit.getValue()) and
+ /* Not a set of flags or round number */
+ not lit.getValueText().matches("%00%")
+ )
+}
+
+class HardcodedValueSource extends DataFlow::Node {
+ HardcodedValueSource() { maybeCredential(this.asExpr().getExpr()) }
+}
+
+/**
+ * Gets a regular expression for matching names of locations (variables, parameters, keys) that
+ * indicate the value being held is a credential.
+ */
+private string getACredentialRegExp() {
+ result = "(?i).*pass(wd|word|code|phrase)(?!.*question).*" or
+ result = "(?i).*(puid|username|userid).*" or
+ result = "(?i).*(cert)(?!.*(format|name)).*"
+}
+
+bindingset[name]
+private predicate maybeCredentialName(string name) {
+ name.regexpMatch(getACredentialRegExp()) and
+ not name.suffix(name.length() - 4) = "file"
+}
+
+// Positional parameter
+private DataFlow::Node credentialParameter() {
+ exists(Method m, NamedParameter p, int idx |
+ result.asParameter() = p and
+ p = m.getParameter(idx) and
+ maybeCredentialName(p.getName())
+ )
+}
+
+// Keyword argument
+private Expr credentialKeywordArgument() {
+ exists(MethodCall mc, string argKey |
+ result = mc.getKeywordArgument(argKey) and
+ maybeCredentialName(argKey)
+ )
+}
+
+// An equality check against a credential value
+private Expr credentialComparison() {
+ exists(EqualityOperation op, VariableReadAccess vra |
+ maybeCredentialName(vra.getVariable().getName()) and
+ (
+ op.getLeftOperand() = result and
+ op.getRightOperand() = vra
+ or
+ op.getLeftOperand() = vra and op.getRightOperand() = result
+ )
+ )
+}
+
+private predicate isCredentialSink(DataFlow::Node node) {
+ node = credentialParameter()
+ or
+ node.asExpr().getExpr() = credentialKeywordArgument()
+ or
+ node.asExpr().getExpr() = credentialComparison()
+}
+
+class CredentialSink extends DataFlow::Node {
+ CredentialSink() { isCredentialSink(this) }
+}
+
+class HardcodedCredentialsConfiguration extends DataFlow::Configuration {
+ HardcodedCredentialsConfiguration() { this = "HardcodedCredentialsConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) { source instanceof HardcodedValueSource }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof CredentialSink }
+
+ override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+ exists(ExprNodes::BinaryOperationCfgNode binop |
+ (
+ binop.getLeftOperand() = node1.asExpr() or
+ binop.getRightOperand() = node1.asExpr()
+ ) and
+ binop = node2.asExpr() and
+ // string concatenation
+ binop.getExpr() instanceof AddExpr
+ )
+ }
+}
+
+from DataFlow::PathNode source, DataFlow::PathNode sink, HardcodedCredentialsConfiguration conf
+where conf.hasFlowPath(source, sink)
+select source.getNode(), source, sink, "Use of $@.", source.getNode(), "hardcoded credentials"
diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfCode.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfCode.ql
new file mode 100644
index 00000000000..f727cf504d9
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfCode.ql
@@ -0,0 +1,15 @@
+/**
+ * @id rb/summary/lines-of-code
+ * @name Total lines of Ruby code in the database
+ * @description The total number of lines of Ruby code from the source code
+ * directory, including external libraries and auto-generated files. This is a
+ * useful metric of the size of a database. This query counts the lines of
+ * code, excluding whitespace or comments.
+ * @kind metric
+ * @tags summary
+ * lines-of-code
+ */
+
+import ruby
+
+select sum(RubyFile f | exists(f.getRelativePath()) | f.getNumberOfLinesOfCode())
diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfUserCode.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfUserCode.ql
new file mode 100644
index 00000000000..19f4f46fb8d
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/summary/LinesOfUserCode.ql
@@ -0,0 +1,19 @@
+/**
+ * @id rb/summary/lines-of-user-code
+ * @name Total Lines of user written Ruby code in the database
+ * @description The total number of lines of Ruby code from the source code
+ * directory, excluding external library and auto-generated files. This
+ * query counts the lines of code, excluding whitespace or comments.
+ * @kind metric
+ * @tags summary
+ */
+
+import ruby
+
+select sum(RubyFile f |
+ f.fromSource() and
+ exists(f.getRelativePath()) and
+ not f.getAbsolutePath().matches("%/vendor/%")
+ |
+ f.getNumberOfLinesOfCode()
+ )
diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfFilesExtractedWithErrors.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfFilesExtractedWithErrors.ql
new file mode 100644
index 00000000000..1a68d2c57e6
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfFilesExtractedWithErrors.ql
@@ -0,0 +1,15 @@
+/**
+ * @id rb/summary/number-of-files-extracted-with-errors
+ * @name Total number of files that were extracted with errors
+ * @description The total number of Ruby code files that we extracted, but where
+ * at least one extraction error occurred in the process.
+ * @kind metric
+ * @tags summary
+ */
+
+import ruby
+import codeql.ruby.Diagnostics
+
+select count(File f |
+ exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath())
+ )
diff --git a/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfSuccessfullyExtractedFiles.ql b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfSuccessfullyExtractedFiles.ql
new file mode 100644
index 00000000000..356989935e1
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/summary/NumberOfSuccessfullyExtractedFiles.ql
@@ -0,0 +1,15 @@
+/**
+ * @id rb/summary/number-of-successfully-extracted-files
+ * @name Total number of files that were extracted without error
+ * @description The total number of Ruby code files that we extracted without
+ * encountering any extraction errors
+ * @kind metric
+ * @tags summary
+ */
+
+import ruby
+import codeql.ruby.Diagnostics
+
+select count(File f |
+ not exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath())
+ )
diff --git a/repo-tests/codeql-ruby/ql/src/queries/variables/DeadStoreOfLocal.ql b/repo-tests/codeql-ruby/ql/src/queries/variables/DeadStoreOfLocal.ql
new file mode 100644
index 00000000000..5ce06a0c182
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/variables/DeadStoreOfLocal.ql
@@ -0,0 +1,28 @@
+/**
+ * @name Useless assignment to local variable
+ * @description An assignment to a local variable that is not used later on, or whose value is always
+ * overwritten, has no effect.
+ * @kind problem
+ * @problem.severity warning
+ * @id rb/useless-assignment-to-local
+ * @tags maintainability
+ * external/cwe/cwe-563
+ * @precision low
+ */
+
+import ruby
+import codeql.ruby.dataflow.SSA
+
+class RelevantLocalVariableWriteAccess extends LocalVariableWriteAccess {
+ RelevantLocalVariableWriteAccess() {
+ not this.getVariable().getName().charAt(0) = "_" and
+ not this = any(Parameter p).getAVariable().getDefiningAccess()
+ }
+}
+
+from RelevantLocalVariableWriteAccess write, LocalVariable v
+where
+ v = write.getVariable() and
+ exists(write.getAControlFlowNode()) and
+ not exists(Ssa::WriteDefinition def | def.getWriteAccess() = write)
+select write, "This assignment to $@ is useless, since its value is never read.", v, v.getName()
diff --git a/repo-tests/codeql-ruby/ql/src/queries/variables/UninitializedLocal.ql b/repo-tests/codeql-ruby/ql/src/queries/variables/UninitializedLocal.ql
new file mode 100644
index 00000000000..ef134eddd70
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/variables/UninitializedLocal.ql
@@ -0,0 +1,32 @@
+/**
+ * @name Potentially uninitialized local variable
+ * @description Using a local variable before it is initialized gives the variable a default
+ * 'nil' value.
+ * @kind problem
+ * @problem.severity error
+ * @id rb/uninitialized-local-variable
+ * @tags reliability
+ * correctness
+ * @precision low
+ */
+
+import ruby
+import codeql.ruby.dataflow.SSA
+
+class RelevantLocalVariableReadAccess extends LocalVariableReadAccess {
+ RelevantLocalVariableReadAccess() {
+ not exists(MethodCall c |
+ c.getReceiver() = this and
+ c.getMethodName() = "nil?"
+ )
+ }
+}
+
+from RelevantLocalVariableReadAccess read, LocalVariable v
+where
+ v = read.getVariable() and
+ exists(Ssa::Definition def |
+ def.getAnUltimateDefinition() instanceof Ssa::UninitializedDefinition and
+ read = def.getARead().getExpr()
+ )
+select read, "Local variable $@ may be used before it is initialized.", v, v.getName()
diff --git a/repo-tests/codeql-ruby/ql/src/queries/variables/UnusedParameter.ql b/repo-tests/codeql-ruby/ql/src/queries/variables/UnusedParameter.ql
new file mode 100644
index 00000000000..1aa1a6bc462
--- /dev/null
+++ b/repo-tests/codeql-ruby/ql/src/queries/variables/UnusedParameter.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Unused parameter.
+ * @description A parameter that is not used later on, or whose value is always overwritten,
+ * can be removed.
+ * @kind problem
+ * @problem.severity warning
+ * @id rb/unused-parameter
+ * @tags maintainability
+ * external/cwe/cwe-563
+ * @precision low
+ */
+
+import ruby
+import codeql.ruby.dataflow.SSA
+
+class RelevantParameterVariable extends LocalVariable {
+ RelevantParameterVariable() {
+ exists(Parameter p |
+ this = p.getAVariable() and
+ not this.getName().charAt(0) = "_"
+ )
+ }
+}
+
+from RelevantParameterVariable v
+where not exists(Ssa::WriteDefinition def | def.getWriteAccess() = v.getDefiningAccess())
+select v, "Unused parameter."