Add codeql-ruby sources (236643fc43)

This commit is contained in:
Esben Sparre Andreasen
2021-10-13 21:53:37 +02:00
parent d9f38826f7
commit 9826e7df1d
151 changed files with 42145 additions and 0 deletions

View File

@@ -0,0 +1 @@
236643fc43b8ae09e15dfa13e86bfdb61a106668

View File

@@ -0,0 +1,25 @@
import codeql.ruby.AST
import codeql.ruby.ast.internal.Synthesis
query predicate missingParent(AstNode node, string cls) {
not exists(node.getParent()) and
node.getLocation().getFile().getExtension() != "erb" and
not node instanceof Toplevel and
cls = node.getPrimaryQlClasses()
}
pragma[noinline]
private AstNode parent(AstNode child, int desugarLevel) {
result = child.getParent() and
desugarLevel = desugarLevel(result)
}
query predicate multipleParents(AstNode node, AstNode parent, string cls) {
parent = node.getParent() and
cls = parent.getPrimaryQlClasses() and
exists(AstNode one, AstNode two, int desugarLevel |
one = parent(node, desugarLevel) and
two = parent(node, desugarLevel) and
one != two
)
}

View File

@@ -0,0 +1 @@
import codeql.ruby.controlflow.internal.ControlFlowGraphImplShared::Consistency

View File

@@ -0,0 +1 @@
import codeql.ruby.dataflow.internal.DataFlowImplConsistency::Consistency

View File

@@ -0,0 +1,22 @@
import ruby
import codeql.ruby.dataflow.SSA
import codeql.ruby.controlflow.ControlFlowGraph
query predicate nonUniqueDef(CfgNode read, Ssa::Definition def) {
read = def.getARead() and
exists(Ssa::Definition other | read = other.getARead() and other != def)
}
query predicate readWithoutDef(LocalVariableReadAccess read) {
exists(CfgNode node |
node = read.getAControlFlowNode() and
not node = any(Ssa::Definition def).getARead()
)
}
query predicate deadDef(Ssa::Definition def, LocalVariable v) {
v = def.getSourceVariable() and
not v.isCaptured() and
not exists(def.getARead()) and
not def = any(Ssa::PhiNode phi).getAnInput()
}

View File

@@ -0,0 +1,6 @@
import codeql.ruby.ast.Variable
query predicate ambiguousVariable(VariableAccess access, Variable variable) {
access.getVariable() = variable and
count(access.getVariable()) > 1
}

View File

@@ -0,0 +1,5 @@
name: codeql/ruby-consistency-queries
version: 0.0.1
dependencies:
codeql/ruby-all: 0.0.1

View File

@@ -0,0 +1,4 @@
name: codeql/ruby-examples
version: 0.0.2
dependencies:
codeql/ruby-all: ^0.0.2

View File

@@ -0,0 +1,18 @@
/**
* @name If statements with empty then branch
* @description Finds 'if' statements where the 'then' branch is
* an empty block statement
* @id ruby/examples/emptythen
* @tags if
* then
* empty
* conditional
* branch
* statement
*/
import ruby
from IfExpr i
where not exists(i.getThen().getAChild())
select i

View File

@@ -0,0 +1,19 @@
private import codeql.files.FileSystem
/**
* Returns an appropriately encoded version of a filename `name`
* passed by the VS Code extension in order to coincide with the
* output of `.getFile()` on locatable entities.
*/
cached
File getFileBySourceArchiveName(string name) {
// The name provided for a file in the source archive by the VS Code extension
// has some differences from the absolute path in the database:
// 1. colons are replaced by underscores
// 2. there's a leading slash, even for Windows paths: "C:/foo/bar" ->
// "/C_/foo/bar"
// 3. double slashes in UNC prefixes are replaced with a single slash
// We can handle 2 and 3 together by unconditionally adding a leading slash
// before replacing double slashes.
name = ("/" + result.getAbsolutePath().replaceAll(":", "_")).replaceAll("//", "/")
}

View File

@@ -0,0 +1,66 @@
/** Provides classes for working with locations. */
import files.FileSystem
/**
* A location as given by a file, a start line, a start column,
* an end line, and an end column.
*
* For more information about locations see [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
class Location extends @location {
/** Gets the file for this location. */
File getFile() { locations_default(this, result, _, _, _, _) }
/** Gets the 1-based line number (inclusive) where this location starts. */
int getStartLine() { locations_default(this, _, result, _, _, _) }
/** Gets the 1-based column number (inclusive) where this location starts. */
int getStartColumn() { locations_default(this, _, _, result, _, _) }
/** Gets the 1-based line number (inclusive) where this location ends. */
int getEndLine() { locations_default(this, _, _, _, result, _) }
/** Gets the 1-based column number (inclusive) where this location ends. */
int getEndColumn() { locations_default(this, _, _, _, _, result) }
/** Gets the number of lines covered by this location. */
int getNumLines() { result = getEndLine() - getStartLine() + 1 }
/** Gets a textual representation of this element. */
string toString() {
exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) and
result = filepath + "@" + startline + ":" + startcolumn + ":" + endline + ":" + endcolumn
)
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(File f |
locations_default(this, f, startline, startcolumn, endline, endcolumn) and
filepath = f.getAbsolutePath()
)
}
/** Holds if this location starts strictly before the specified location. */
pragma[inline]
predicate strictlyBefore(Location other) {
this.getStartLine() < other.getStartLine()
or
this.getStartLine() = other.getStartLine() and this.getStartColumn() < other.getStartColumn()
}
}
/** An entity representing an empty location. */
class EmptyLocation extends Location {
EmptyLocation() { this.hasLocationInfo("", 0, 0, 0, 0) }
}

View File

@@ -0,0 +1,173 @@
/** Provides classes for working with files and folders. */
private import codeql.Locations
/** A file or folder. */
abstract class Container extends @container {
/** Gets a file or sub-folder in this container. */
Container getAChildContainer() { this = result.getParentContainer() }
/** Gets a file in this container. */
File getAFile() { result = getAChildContainer() }
/** Gets a sub-folder in this container. */
Folder getAFolder() { result = getAChildContainer() }
/**
* Gets the absolute, canonical path of this container, using forward slashes
* as path separator.
*
* The path starts with a _root prefix_ followed by zero or more _path
* segments_ separated by forward slashes.
*
* The root prefix is of one of the following forms:
*
* 1. A single forward slash `/` (Unix-style)
* 2. An upper-case drive letter followed by a colon and a forward slash,
* such as `C:/` (Windows-style)
* 3. Two forward slashes, a computer name, and then another forward slash,
* such as `//FileServer/` (UNC-style)
*
* Path segments are never empty (that is, absolute paths never contain two
* contiguous slashes, except as part of a UNC-style root prefix). Also, path
* segments never contain forward slashes, and no path segment is of the
* form `.` (one dot) or `..` (two dots).
*
* Note that an absolute path never ends with a forward slash, except if it is
* a bare root prefix, that is, the path has no path segments. A container
* whose absolute path has no segments is always a `Folder`, not a `File`.
*/
abstract string getAbsolutePath();
/**
* Gets the base name of this container including extension, that is, the last
* segment of its absolute path, or the empty string if it has no segments.
*
* Here are some examples of absolute paths and the corresponding base names
* (surrounded with quotes to avoid ambiguity):
*
* <table border="1">
* <tr><th>Absolute path</th><th>Base name</th></tr>
* <tr><td>"/tmp/tst.go"</td><td>"tst.go"</td></tr>
* <tr><td>"C:/Program Files (x86)"</td><td>"Program Files (x86)"</td></tr>
* <tr><td>"/"</td><td>""</td></tr>
* <tr><td>"C:/"</td><td>""</td></tr>
* <tr><td>"D:/"</td><td>""</td></tr>
* <tr><td>"//FileServer/"</td><td>""</td></tr>
* </table>
*/
string getBaseName() {
result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
}
/**
* Gets the extension of this container, that is, the suffix of its base name
* after the last dot character, if any.
*
* In particular,
*
* - if the name does not include a dot, there is no extension, so this
* predicate has no result;
* - if the name ends in a dot, the extension is the empty string;
* - if the name contains multiple dots, the extension follows the last dot.
*
* Here are some examples of absolute paths and the corresponding extensions
* (surrounded with quotes to avoid ambiguity):
*
* <table border="1">
* <tr><th>Absolute path</th><th>Extension</th></tr>
* <tr><td>"/tmp/tst.go"</td><td>"go"</td></tr>
* <tr><td>"/tmp/.classpath"</td><td>"classpath"</td></tr>
* <tr><td>"/bin/bash"</td><td>not defined</td></tr>
* <tr><td>"/tmp/tst2."</td><td>""</td></tr>
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
* </table>
*/
string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) }
/** Gets the file in this container that has the given `baseName`, if any. */
File getFile(string baseName) {
result = getAFile() and
result.getBaseName() = baseName
}
/** Gets the sub-folder in this container that has the given `baseName`, if any. */
Folder getFolder(string baseName) {
result = getAFolder() and
result.getBaseName() = baseName
}
/** Gets the parent container of this file or folder, if any. */
Container getParentContainer() { containerparent(result, this) }
/**
* Gets the relative path of this file or folder from the root folder of the
* analyzed source location. The relative path of the root folder itself is
* the empty string.
*
* This has no result if the container is outside the source root, that is,
* if the root folder is not a reflexive, transitive parent of this container.
*/
string getRelativePath() {
exists(string absPath, string pref |
absPath = getAbsolutePath() and sourceLocationPrefix(pref)
|
absPath = pref and result = ""
or
absPath = pref.regexpReplaceAll("/$", "") + "/" + result and
not result.matches("/%")
)
}
/**
* Gets the stem of this container, that is, the prefix of its base name up to
* (but not including) the last dot character if there is one, or the entire
* base name if there is not.
*
* Here are some examples of absolute paths and the corresponding stems
* (surrounded with quotes to avoid ambiguity):
*
* <table border="1">
* <tr><th>Absolute path</th><th>Stem</th></tr>
* <tr><td>"/tmp/tst.go"</td><td>"tst"</td></tr>
* <tr><td>"/tmp/.classpath"</td><td>""</td></tr>
* <tr><td>"/bin/bash"</td><td>"bash"</td></tr>
* <tr><td>"/tmp/tst2."</td><td>"tst2"</td></tr>
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
* </table>
*/
string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) }
/**
* Gets a URL representing the location of this container.
*
* For more information see https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls.
*/
abstract string getURL();
/**
* Gets a textual representation of the path of this container.
*
* This is the absolute path of the container.
*/
string toString() { result = getAbsolutePath() }
}
/** A folder. */
class Folder extends Container, @folder {
override string getAbsolutePath() { folders(this, result) }
/** Gets the URL of this folder. */
override string getURL() { result = "folder://" + getAbsolutePath() }
}
/** A file. */
class File extends Container, @file {
override string getAbsolutePath() { files(this, result) }
/** Gets the URL of this file. */
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
/** Holds if this file was extracted from ordinary source code. */
predicate fromSource() { any() }
}

View File

@@ -0,0 +1,141 @@
import codeql.Locations
import ast.Call
import ast.Control
import ast.Constant
import ast.Erb
import ast.Expr
import ast.Literal
import ast.Method
import ast.Module
import ast.Parameter
import ast.Operation
import ast.Pattern
import ast.Scope
import ast.Statement
import ast.Variable
private import ast.internal.AST
private import ast.internal.Scope
private import ast.internal.Synthesis
private import ast.internal.TreeSitter
/**
* A node in the abstract syntax tree. This class is the base class for all Ruby
* program elements.
*/
class AstNode extends TAstNode {
/**
* Gets the name of a primary CodeQL class to which this node belongs.
*
* This predicate always has a result. If no primary class can be
* determined, the result is `"???"`. If multiple primary classes match,
* this predicate can have multiple results.
*/
string getAPrimaryQlClass() { result = "???" }
/**
* Gets a comma-separated list of the names of the primary CodeQL classes to
* which this element belongs.
*/
final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
/** Gets the enclosing module, if any. */
ModuleBase getEnclosingModule() {
exists(Scope::Range s |
s = scopeOf(toGeneratedInclSynth(this)) and
toGeneratedInclSynth(result) = s.getEnclosingModule()
)
}
/** Gets the enclosing method, if any. */
MethodBase getEnclosingMethod() {
exists(Scope::Range s |
s = scopeOf(toGeneratedInclSynth(this)) and
toGeneratedInclSynth(result) = s.getEnclosingMethod()
)
}
/** Gets a textual representation of this node. */
cached
string toString() { none() }
/** Gets the location of this node. */
Location getLocation() { result = getLocation(this) }
/** Gets the file of this node. */
final File getFile() { result = this.getLocation().getFile() }
/** Gets a child node of this `AstNode`. */
final AstNode getAChild() { result = this.getAChild(_) }
/** Gets the parent of this `AstNode`, if this node is not a root node. */
final AstNode getParent() { result.getAChild() = this }
/**
* Gets a child of this node, which can also be retrieved using a predicate
* named `pred`.
*/
cached
AstNode getAChild(string pred) {
pred = "getDesugared" and
result = this.getDesugared()
}
/**
* Holds if this node was synthesized to represent an implicit AST node not
* present in the source code. In the following example method call, the
* receiver is an implicit `self` reference, for which there is a synthesized
* `Self` node.
*
* ```rb
* foo(123)
* ```
*/
final predicate isSynthesized() { this = getSynthChild(_, _) }
/**
* Gets the desugared version of this AST node, if any.
*
* For example, the desugared version of
*
* ```rb
* x += y
* ```
*
* is
*
* ```rb
* x = x + y
* ```
*
* when `x` is a variable. Whenever an AST node can be desugared,
* then the desugared version is used in the control-flow graph.
*/
final AstNode getDesugared() { result = getSynthChild(this, -1) }
}
/** A Ruby source file */
class RubyFile extends File {
RubyFile() { ruby_ast_node_parent(_, this, _) }
/** Gets a token in this file. */
private Ruby::Token getAToken() { result.getLocation().getFile() = this }
/** Holds if `line` contains a token. */
private predicate line(int line, boolean comment) {
exists(Ruby::Token token, Location l |
token = this.getAToken() and
l = token.getLocation() and
line in [l.getStartLine() .. l.getEndLine()] and
if token instanceof @ruby_token_comment then comment = true else comment = false
)
}
/** Gets the number of lines in this file. */
int getNumberOfLines() { result = max([0, this.getAToken().getLocation().getEndLine()]) }
/** Gets the number of lines of code in this file. */
int getNumberOfLinesOfCode() { result = count(int line | this.line(line, false)) }
/** Gets the number of lines of comments in this file. */
int getNumberOfLinesOfComments() { result = count(int line | this.line(line, true)) }
}

View File

@@ -0,0 +1,408 @@
/**
* Provides an implementation of _API graphs_, which are an abstract representation of the API
* surface used and/or defined by a code base.
*
* The nodes of the API graph represent definitions and uses of API components. The edges are
* directed and labeled; they specify how the components represented by nodes relate to each other.
*/
private import ruby
import codeql.ruby.DataFlow
import codeql.ruby.typetracking.TypeTracker
import codeql.ruby.ast.internal.Module
private import codeql.ruby.controlflow.CfgNodes
/**
* Provides classes and predicates for working with APIs used in a database.
*/
module API {
/**
* An abstract representation of a definition or use of an API component such as a Ruby module,
* or the result of a method call.
*/
class Node extends Impl::TApiNode {
/**
* Gets a data-flow node corresponding to a use of the API component represented by this node.
*
* For example, `Kernel.format "%s world!", "Hello"` is a use of the return of the `format` function of
* the `Kernel` module.
*
* This includes indirect uses found via data flow.
*/
DataFlow::Node getAUse() {
exists(DataFlow::LocalSourceNode src | Impl::use(this, src) |
Impl::trackUseNode(src).flowsTo(result)
)
}
/**
* Gets an immediate use of the API component represented by this node.
*
* Unlike `getAUse()`, this predicate only gets the immediate references, not the indirect uses
* found via data flow.
*/
DataFlow::LocalSourceNode getAnImmediateUse() { Impl::use(this, result) }
/**
* Gets a call to a method on the receiver represented by this API component.
*/
DataFlow::CallNode getAMethodCall(string method) {
result = getReturn(method).getAnImmediateUse()
}
/**
* Gets a node representing member `m` of this API component.
*
* For example, a member can be:
*
* - A submodule of a module
* - An attribute of an object
*/
bindingset[m]
bindingset[result]
Node getMember(string m) { result = getASuccessor(Label::member(m)) }
/**
* Gets a node representing a member of this API component where the name of the member is
* not known statically.
*/
Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
/**
* Gets a node representing a member of this API component where the name of the member may
* or may not be known statically.
*/
Node getAMember() {
result = getASuccessor(Label::member(_)) or
result = getUnknownMember()
}
/**
* Gets a node representing an instance of this API component, that is, an object whose
* constructor is the function represented by this node.
*
* For example, if this node represents a use of some class `A`, then there might be a node
* representing instances of `A`, typically corresponding to expressions `new A()` at the
* source level.
*
* This predicate may have multiple results when there are multiple constructor calls invoking this API component.
* Consider using `getAnInstantiation()` if there is a need to distinguish between individual constructor calls.
*/
Node getInstance() { result = getASuccessor(Label::instance()) }
/**
* Gets a node representing the result of calling a method on the receiver represented by this node.
*/
Node getReturn(string method) { result = getASuccessor(Label::return(method)) }
/**
* Gets a `new` call to the function represented by this API component.
*/
DataFlow::Node getAnInstantiation() { result = getInstance().getAnImmediateUse() }
/**
* Gets a node representing a subclass of the class represented by this node.
*/
Node getASubclass() { result = getASuccessor(Label::subclass()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
*/
string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
/**
* Gets a node such that there is an edge in the API graph between this node and the other
* one, and that edge is labeled with `lbl`.
*/
Node getASuccessor(string lbl) { Impl::edge(this, lbl, result) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one, and that edge is labeled with `lbl`
*/
Node getAPredecessor(string lbl) { this = result.getASuccessor(lbl) }
/**
* Gets a node such that there is an edge in the API graph between this node and the other
* one.
*/
Node getAPredecessor() { result = getAPredecessor(_) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one.
*/
Node getASuccessor() { result = getASuccessor(_) }
/**
* Gets the data-flow node that gives rise to this node, if any.
*/
DataFlow::Node getInducingNode() { this = Impl::MkUse(result) }
/** Gets the location of this node. */
Location getLocation() {
result = this.getInducingNode().getLocation()
or
// For nodes that do not have a meaningful location, `path` is the empty string and all other
// parameters are zero.
not exists(getInducingNode()) and
result instanceof EmptyLocation
}
/**
* Gets a textual representation of this element.
*/
abstract string toString();
/**
* Gets a path of the given `length` from the root to this node.
*/
private string getAPath(int length) {
this instanceof Impl::MkRoot and
length = 0 and
result = ""
or
exists(Node pred, string lbl, string predpath |
Impl::edge(pred, lbl, this) and
lbl != "" and
predpath = pred.getAPath(length - 1) and
exists(string dot | if length = 1 then dot = "" else dot = "." |
result = predpath + dot + lbl and
// avoid producing strings longer than 1MB
result.length() < 1000 * 1000
)
) and
length in [1 .. Impl::distanceFromRoot(this)]
}
/** Gets the shortest distance from the root to this node in the API graph. */
int getDepth() { result = Impl::distanceFromRoot(this) }
}
/** The root node of an API graph. */
class Root extends Node, Impl::MkRoot {
override string toString() { result = "root" }
}
/** A node corresponding to the use of an API component. */
class Use extends Node, Impl::MkUse {
override string toString() {
exists(string type | type = "Use " |
result = type + getPath()
or
not exists(this.getPath()) and result = type + "with no path"
)
}
}
/** Gets the root node. */
Root root() { any() }
/**
* Gets a node corresponding to a top-level member `m` (typically a module).
*
* This is equivalent to `root().getAMember("m")`.
*
* Note: You should only use this predicate for top level modules or classes. If you want nodes corresponding to a nested module or class,
* you should use `.getMember` on the parent module/class. For example, for nodes corresponding to the class `Gem::Version`,
* use `getTopLevelMember("Gem").getMember("Version")`.
*/
Node getTopLevelMember(string m) { result = root().getMember(m) }
/**
* Provides the actual implementation of API graphs, cached for performance.
*
* Ideally, we'd like nodes to correspond to (global) access paths, with edge labels
* corresponding to extending the access path by one element. We also want to be able to map
* nodes to their definitions and uses in the data-flow graph, and this should happen modulo
* (inter-procedural) data flow.
*
* This, however, is not easy to implement, since access paths can have unbounded length
* and we need some way of recognizing cycles to avoid non-termination. Unfortunately, expressing
* a condition like "this node hasn't been involved in constructing any predecessor of
* this node in the API graph" without negative recursion is tricky.
*
* So instead most nodes are directly associated with a data-flow node, representing
* either a use or a definition of an API component. This ensures that we only have a finite
* number of nodes. However, we can now have multiple nodes with the same access
* path, which are essentially indistinguishable for a client of the API.
*
* On the other hand, a single node can have multiple access paths (which is, of
* course, unavoidable). We pick as canonical the alphabetically least access path with
* shortest length.
*/
cached
private module Impl {
cached
newtype TApiNode =
/** The root of the API graph. */
MkRoot() or
/** A use of an API member at the node `nd`. */
MkUse(DataFlow::Node nd) { isUse(nd) }
private string resolveTopLevel(ConstantReadAccess read) {
TResolved(result) = resolveScopeExpr(read) and
not result.matches("%::%")
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from the root
* node labeled `lbl` in the API graph.
*/
cached
predicate useRoot(string lbl, DataFlow::Node ref) {
exists(string name, ExprNodes::ConstantAccessCfgNode access, ConstantReadAccess read |
access = ref.asExpr() and
lbl = Label::member(read.getName()) and
read = access.getExpr()
|
name = resolveTopLevel(read)
or
name = read.getName() and
not exists(resolveTopLevel(read)) and
not exists(read.getScopeExpr())
)
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from use node
* `base` labeled `lbl` in the API graph.
*/
cached
predicate useUse(DataFlow::LocalSourceNode base, string lbl, DataFlow::Node ref) {
exists(ExprCfgNode node |
// First, we find a predecessor of the node `ref` that we want to determine. The predecessor
// is any node that is a type-tracked use of a data flow node (`src`), which is itself a
// reference to the API node `base`. Thus, `pred` and `src` both represent uses of `base`.
//
// Once we have identified the predecessor, we define its relation to the successor `ref` as
// well as the label on the edge from `pred` to `ref`. This label describes the nature of
// the relationship between `pred` and `ref`.
useExpr(node, base)
|
// // Referring to an attribute on a node that is a use of `base`:
// pred = `Rails` part of `Rails::Whatever`
// lbl = `Whatever`
// ref = `Rails::Whatever`
exists(ExprNodes::ConstantAccessCfgNode c, ConstantReadAccess read |
not exists(resolveTopLevel(read)) and
node = c.getScopeExpr() and
lbl = Label::member(read.getName()) and
ref.asExpr() = c and
read = c.getExpr()
)
or
// Calling a method on a node that is a use of `base`
exists(ExprNodes::MethodCallCfgNode call, string name |
node = call.getReceiver() and
name = call.getExpr().getMethodName() and
lbl = Label::return(name) and
name != "new" and
ref.asExpr() = call
)
or
// Calling the `new` method on a node that is a use of `base`, which creates a new instance
exists(ExprNodes::MethodCallCfgNode call |
node = call.getReceiver() and
lbl = Label::instance() and
call.getExpr().getMethodName() = "new" and
ref.asExpr() = call
)
)
}
pragma[nomagic]
private predicate isUse(DataFlow::Node nd) {
useRoot(_, nd)
or
useUse(_, _, nd)
}
pragma[nomagic]
private predicate useExpr(ExprCfgNode node, DataFlow::LocalSourceNode src) {
exists(DataFlow::LocalSourceNode pred |
pred = trackUseNode(src) and
pred.flowsTo(any(DataFlow::ExprNode n | n.getExprNode() = node))
)
}
/**
* Holds if `ref` is a use of node `nd`.
*/
cached
predicate use(TApiNode nd, DataFlow::Node ref) { nd = MkUse(ref) }
/**
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
*
* The flow from `src` to that node may be inter-procedural.
*/
private DataFlow::LocalSourceNode trackUseNode(DataFlow::Node src, TypeTracker t) {
// Declaring `src` to be a `LocalSourceNode` currently causes a redundant check in the
// recursive case, so instead we check it explicitly here.
src instanceof DataFlow::LocalSourceNode and
t.start() and
isUse(src) and
result = src
or
exists(TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
}
/**
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
*
* The flow from `src` to that node may be inter-procedural.
*/
cached
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
result = trackUseNode(src, TypeTracker::end())
}
/**
* Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
*/
cached
predicate edge(TApiNode pred, string lbl, TApiNode succ) {
/* Every node that is a use of an API component is itself added to the API graph. */
exists(DataFlow::LocalSourceNode ref | succ = MkUse(ref) |
pred = MkRoot() and
useRoot(lbl, ref)
or
exists(DataFlow::Node nd |
pred = MkUse(nd) and
useUse(nd, lbl, ref)
)
)
}
/**
* Holds if there is an edge from `pred` to `succ` in the API graph.
*/
private predicate edge(TApiNode pred, TApiNode succ) { edge(pred, _, succ) }
/** Gets the shortest distance from the root to `nd` in the API graph. */
cached
int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result)
}
}
private module Label {
/** Gets the `member` edge label for member `m`. */
bindingset[m]
bindingset[result]
string member(string m) { result = "getMember(\"" + m + "\")" }
/** Gets the `member` edge label for the unknown member. */
string unknownMember() { result = "getUnknownMember()" }
/** Gets the `instance` edge label. */
string instance() { result = "instance" }
/** Gets the `return` edge label. */
bindingset[m]
bindingset[result]
string return(string m) { result = "getReturn(\"" + m + "\")" }
string subclass() { result = "getASubclass()" }
}

View File

@@ -0,0 +1,5 @@
/** Provides classes representing the control flow graph. */
import controlflow.ControlFlowGraph
import controlflow.CfgNodes as CfgNodes
import controlflow.BasicBlocks

View File

@@ -0,0 +1,585 @@
/**
* Provides abstract classes representing generic concepts such as file system
* access or system command execution, for which individual framework libraries
* provide concrete subclasses.
*/
private import codeql.ruby.AST
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import codeql.ruby.Frameworks
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.ApiGraphs
/**
* A data-flow node that executes SQL statements.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlExecution::Range` instead.
*/
class SqlExecution extends DataFlow::Node instanceof SqlExecution::Range {
/** Gets the argument that specifies the SQL statements to be executed. */
DataFlow::Node getSql() { result = super.getSql() }
}
/** Provides a class for modeling new SQL execution APIs. */
module SqlExecution {
/**
* A data-flow node that executes SQL statements.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the SQL statements to be executed. */
abstract DataFlow::Node getSql();
}
}
/**
* A data flow node that performs a file system access, including reading and writing data,
* creating and deleting files and folders, checking and updating permissions, and so on.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemAccess::Range` instead.
*/
class FileSystemAccess extends DataFlow::Node instanceof FileSystemAccess::Range {
/** Gets an argument to this file system access that is interpreted as a path. */
DataFlow::Node getAPathArgument() { result = super.getAPathArgument() }
}
/** Provides a class for modeling new file system access APIs. */
module FileSystemAccess {
/**
* A data-flow node that performs a file system access, including reading and writing data,
* creating and deleting files and folders, checking and updating permissions, and so on.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `FileSystemAccess` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an argument to this file system access that is interpreted as a path. */
abstract DataFlow::Node getAPathArgument();
}
}
/**
* A data flow node that reads data from the file system.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemReadAccess::Range` instead.
*/
class FileSystemReadAccess extends FileSystemAccess instanceof FileSystemReadAccess::Range {
/**
* Gets a node that represents data read from the file system access.
*/
DataFlow::Node getADataNode() { result = FileSystemReadAccess::Range.super.getADataNode() }
}
/** Provides a class for modeling new file system reads. */
module FileSystemReadAccess {
/**
* A data flow node that reads data from the file system.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `FileSystemReadAccess` instead.
*/
abstract class Range extends FileSystemAccess::Range {
/**
* Gets a node that represents data read from the file system.
*/
abstract DataFlow::Node getADataNode();
}
}
/**
* A data flow node that sets the permissions for one or more files.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `FileSystemPermissionModification::Range` instead.
*/
class FileSystemPermissionModification extends DataFlow::Node instanceof FileSystemPermissionModification::Range {
/**
* Gets an argument to this permission modification that is interpreted as a
* set of permissions.
*/
DataFlow::Node getAPermissionNode() { result = super.getAPermissionNode() }
}
/** Provides a class for modeling new file system permission modifications. */
module FileSystemPermissionModification {
/**
* A data-flow node that sets permissions for a one or more files.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `FileSystemPermissionModification` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets an argument to this permission modification that is interpreted as a
* set of permissions.
*/
abstract DataFlow::Node getAPermissionNode();
}
}
/**
* A data flow node that contains a file name or an array of file names from the local file system.
*/
abstract class FileNameSource extends DataFlow::Node { }
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Escaping::Range` instead.
*/
class Escaping extends DataFlow::Node instanceof Escaping::Range {
Escaping() {
// escapes that don't have _both_ input/output defined are not valid
exists(super.getAnInput()) and
exists(super.getOutput())
}
/** Gets an input that will be escaped. */
DataFlow::Node getAnInput() { result = super.getAnInput() }
/** Gets the output that contains the escaped data. */
DataFlow::Node getOutput() { result = super.getOutput() }
/**
* Gets the context that this function escapes for, such as `html`, or `url`.
*/
string getKind() { result = super.getKind() }
}
/** Provides a class for modeling new escaping APIs. */
module Escaping {
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Escaping` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that will be escaped. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the escaped data. */
abstract DataFlow::Node getOutput();
/**
* Gets the context that this function escapes for.
*
* While kinds are represented as strings, this should not be relied upon. Use the
* predicates in the `Escaping` module, such as `getHtmlKind`.
*/
abstract string getKind();
}
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
}
/**
* An escape of a string so it can be safely included in
* the body of an HTML element, for example, replacing `{}` in
* `<p>{}</p>`.
*/
class HtmlEscaping extends Escaping {
HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
/** Provides classes for modeling HTTP servers. */
module Server {
/**
* A data-flow node that sets up a route on a server.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RouteSetup::Range` instead.
*/
class RouteSetup extends DataFlow::Node instanceof RouteSetup::Range {
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() { result = super.getUrlPattern() }
/**
* Gets a function that will handle incoming requests for this route, if any.
*
* NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`.
*/
Method getARequestHandler() { result = super.getARequestHandler() }
/**
* Gets a parameter that will receive parts of the url when handling incoming
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
*/
Parameter getARoutedParameter() { result = super.getARoutedParameter() }
/** Gets a string that identifies the framework used for this route setup. */
string getFramework() { result = super.getFramework() }
}
/** Provides a class for modeling new HTTP routing APIs. */
module RouteSetup {
/**
* A data-flow node that sets up a route on a server.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RouteSetup` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument used to set the URL pattern. */
abstract DataFlow::Node getUrlPatternArg();
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() {
exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode |
this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(strNode) and
result = strNode.getExpr().getValueText()
)
}
/**
* Gets a function that will handle incoming requests for this route, if any.
*
* NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`.
*/
abstract Method getARequestHandler();
/**
* Gets a parameter that will receive parts of the url when handling incoming
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
*/
abstract Parameter getARoutedParameter();
/** Gets a string that identifies the framework used for this route setup. */
abstract string getFramework();
}
}
/**
* A function that will handle incoming HTTP requests.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RequestHandler::Range` instead.
*/
class RequestHandler extends Method instanceof RequestHandler::Range {
/**
* Gets a parameter that could receive parts of the url when handling incoming
* requests, if any. These automatically become a `RemoteFlowSource`.
*/
Parameter getARoutedParameter() { result = super.getARoutedParameter() }
/** Gets a string that identifies the framework used for this route setup. */
string getFramework() { result = super.getFramework() }
}
/** Provides a class for modeling new HTTP request handlers. */
module RequestHandler {
/**
* A function that will handle incoming HTTP requests.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RequestHandler` instead.
*
* Only extend this class if you can't provide a `RouteSetup`, since we handle that case automatically.
*/
abstract class Range extends Method {
/**
* Gets a parameter that could receive parts of the url when handling incoming
* requests, if any. These automatically become a `RemoteFlowSource`.
*/
abstract Parameter getARoutedParameter();
/** Gets a string that identifies the framework used for this request handler. */
abstract string getFramework();
}
}
private class RequestHandlerFromRouteSetup extends RequestHandler::Range {
RouteSetup rs;
RequestHandlerFromRouteSetup() { this = rs.getARequestHandler() }
override Parameter getARoutedParameter() {
result = rs.getARoutedParameter() and
result = this.getAParameter()
}
override string getFramework() { result = rs.getFramework() }
}
/** A parameter that will receive parts of the url when handling an incoming request. */
private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
RequestHandler handler;
RoutedParameter() { this.getParameter() = handler.getARoutedParameter() }
override string getSourceType() { result = handler.getFramework() + " RoutedParameter" }
}
/**
* A data-flow node that creates a HTTP response on a server.
*
* Note: we don't require that this response must be sent to a client (a kind of
* "if a tree falls in a forest and nobody hears it" situation).
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HttpResponse::Range` instead.
*/
class HttpResponse extends DataFlow::Node instanceof HttpResponse::Range {
/** Gets the data-flow node that specifies the body of this HTTP response. */
DataFlow::Node getBody() { result = super.getBody() }
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() { result = super.getMimetype() }
}
/** Provides a class for modeling new HTTP response APIs. */
module HttpResponse {
/**
* A data-flow node that creates a HTTP response on a server.
*
* Note: we don't require that this response must be sent to a client (a kind of
* "if a tree falls in a forest and nobody hears it" situation).
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HttpResponse` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the data-flow node that specifies the body of this HTTP response. */
abstract DataFlow::Node getBody();
/** Gets the data-flow node that specifies the content-type/mimetype of this HTTP response, if any. */
abstract DataFlow::Node getMimetypeOrContentTypeArg();
/** Gets the default mimetype that should be used if `getMimetypeOrContentTypeArg` has no results. */
abstract string getMimetypeDefault();
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() {
exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode |
this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(strNode) and
result = strNode.getExpr().getValueText().splitAt(";", 0)
)
or
not exists(this.getMimetypeOrContentTypeArg()) and
result = this.getMimetypeDefault()
}
}
}
/**
* A data-flow node that creates a HTTP redirect response on a server.
*
* Note: we don't require that this redirect must be sent to a client (a kind of
* "if a tree falls in a forest and nobody hears it" situation).
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HttpRedirectResponse::Range` instead.
*/
class HttpRedirectResponse extends HttpResponse instanceof HttpRedirectResponse::Range {
/** Gets the data-flow node that specifies the location of this HTTP redirect response. */
DataFlow::Node getRedirectLocation() { result = super.getRedirectLocation() }
}
/** Provides a class for modeling new HTTP redirect response APIs. */
module HttpRedirectResponse {
/**
* A data-flow node that creates a HTTP redirect response on a server.
*
* Note: we don't require that this redirect must be sent to a client (a kind of
* "if a tree falls in a forest and nobody hears it" situation).
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HttpResponse` instead.
*/
abstract class Range extends HTTP::Server::HttpResponse::Range {
/** Gets the data-flow node that specifies the location of this HTTP redirect response. */
abstract DataFlow::Node getRedirectLocation();
}
}
}
/** Provides classes for modeling HTTP clients. */
module Client {
/**
* A method call that makes an outgoing HTTP request.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Request::Range` instead.
*/
class Request extends MethodCall instanceof Request::Range {
/** Gets a node which returns the body of the response */
DataFlow::Node getResponseBody() { result = super.getResponseBody() }
/** Gets a string that identifies the framework used for this request. */
string getFramework() { result = super.getFramework() }
/**
* Holds if this request is made using a mode that disables SSL/TLS
* certificate validation, where `disablingNode` represents the point at
* which the validation was disabled.
*/
predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
super.disablesCertificateValidation(disablingNode)
}
}
/** Provides a class for modeling new HTTP requests. */
module Request {
/**
* A method call that makes an outgoing HTTP request.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Request` instead.
*/
abstract class Range extends MethodCall {
/** Gets a node which returns the body of the response */
abstract DataFlow::Node getResponseBody();
/** Gets a string that identifies the framework used for this request. */
abstract string getFramework();
/**
* Holds if this request is made using a mode that disables SSL/TLS
* certificate validation, where `disablingNode` represents the point at
* which the validation was disabled.
*/
abstract predicate disablesCertificateValidation(DataFlow::Node disablingNode);
}
}
/** The response body from an outgoing HTTP request, considered as a remote flow source */
private class RequestResponseBody extends RemoteFlowSource::Range, DataFlow::Node {
Request request;
RequestResponseBody() { this = request.getResponseBody() }
override string getSourceType() { result = request.getFramework() }
}
}
}
/**
* A data flow node that executes an operating system command,
* for instance by spawning a new process.
*/
class SystemCommandExecution extends DataFlow::Node instanceof SystemCommandExecution::Range {
/** Holds if a shell interprets `arg`. */
predicate isShellInterpreted(DataFlow::Node arg) { super.isShellInterpreted(arg) }
/** Gets an argument to this execution that specifies the command or an argument to it. */
DataFlow::Node getAnArgument() { result = super.getAnArgument() }
}
/** Provides a class for modeling new operating system command APIs. */
module SystemCommandExecution {
/**
* A data flow node that executes an operating system command, for instance by spawning a new
* process.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SystemCommandExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an argument to this execution that specifies the command or an argument to it. */
abstract DataFlow::Node getAnArgument();
/** Holds if a shell interprets `arg`. */
predicate isShellInterpreted(DataFlow::Node arg) { none() }
}
}
/**
* A data-flow node that dynamically executes Ruby code.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CodeExecution::Range` instead.
*/
class CodeExecution extends DataFlow::Node instanceof CodeExecution::Range {
/** Gets the argument that specifies the code to be executed. */
DataFlow::Node getCode() { result = super.getCode() }
}
/** Provides a class for modeling new dynamic code execution APIs. */
module CodeExecution {
/**
* A data-flow node that dynamically executes Ruby code.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CodeExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the code to be executed. */
abstract DataFlow::Node getCode();
}
}
/**
* A data-flow node that parses XML content.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `XmlParserCall::Range` instead.
*/
class XmlParserCall extends DataFlow::Node {
XmlParserCall::Range range;
XmlParserCall() { this = range }
/** Gets the argument that specifies the XML content to be parsed. */
DataFlow::Node getInput() { result = range.getInput() }
/** Holds if this XML parser call is configured to process external entities */
predicate externalEntitiesEnabled() { range.externalEntitiesEnabled() }
}
/** Provides a class for modeling new XML parsing APIs. */
module XmlParserCall {
/**
* A data-flow node that parses XML content.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `class XmlParserCall` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the XML content to be parsed. */
abstract DataFlow::Node getInput();
/** Holds if this XML parser call is configured to process external entities */
abstract predicate externalEntitiesEnabled();
}
}
/**
* A data-flow node that may represent a database object in an ORM system.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `OrmInstantiation::Range` instead.
*/
class OrmInstantiation extends DataFlow::Node instanceof OrmInstantiation::Range {
/** Holds if a call to `methodName` on this instance may return a field of this ORM object. */
bindingset[methodName]
predicate methodCallMayAccessField(string methodName) {
super.methodCallMayAccessField(methodName)
}
}
/** Provides a class for modeling new ORM object instantiation APIs. */
module OrmInstantiation {
/**
* A data-flow node that may represent a database object in an ORM system.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `OrmInstantiation` instead.
*/
abstract class Range extends DataFlow::Node {
/** Holds if a call to `methodName` on this instance may return a field of this ORM object. */
bindingset[methodName]
abstract predicate methodCallMayAccessField(string methodName);
}
}

View File

@@ -0,0 +1,7 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import codeql.ruby.dataflow.internal.DataFlowImpl
}

View File

@@ -0,0 +1,7 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow2 {
import codeql.ruby.dataflow.internal.DataFlowImpl2
}

View File

@@ -0,0 +1,52 @@
private import codeql.Locations
/** A diagnostic emitted during extraction, such as a parse error */
class Diagnostic extends @diagnostic {
int severity;
string tag;
string message;
string fullMessage;
Location location;
Diagnostic() { diagnostics(this, severity, tag, message, fullMessage, location) }
/**
* Gets the numerical severity level associated with this diagnostic.
*/
int getSeverity() { result = severity }
/** Gets a string representation of the severity of this diagnostic. */
string getSeverityText() {
severity = 10 and result = "Debug"
or
severity = 20 and result = "Info"
or
severity = 30 and result = "Warning"
or
severity = 40 and result = "Error"
}
/** Gets the error code associated with this diagnostic, e.g. parse_error. */
string getTag() { result = tag }
/**
* Gets the error message text associated with this diagnostic.
*/
string getMessage() { result = message }
/**
* Gets the full error message text associated with this diagnostic.
*/
string getFullMessage() { result = fullMessage }
/** Gets the source location of this diagnostic. */
Location getLocation() { result = location }
/** Gets a textual representation of this diagnostic. */
string toString() { result = this.getMessage() }
}
/** A diagnostic relating to a particular error in extracting a file. */
class ExtractionError extends Diagnostic, @diagnostic_error {
ExtractionError() { this.getTag() = "parse_error" }
}

View File

@@ -0,0 +1,11 @@
/**
* Helper file that imports all framework modeling.
*/
private import codeql.ruby.frameworks.ActionController
private import codeql.ruby.frameworks.ActiveRecord
private import codeql.ruby.frameworks.ActionView
private import codeql.ruby.frameworks.StandardLibrary
private import codeql.ruby.frameworks.Files
private import codeql.ruby.frameworks.HttpClients
private import codeql.ruby.frameworks.XmlParsing

View File

@@ -0,0 +1,7 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking {
import codeql.ruby.dataflow.internal.tainttracking1.TaintTrackingImpl
}

View File

@@ -0,0 +1,215 @@
private import codeql.ruby.AST
private import internal.AST
private import internal.Call
private import internal.TreeSitter
private import codeql.ruby.dataflow.internal.DataFlowDispatch
private import codeql.ruby.dataflow.internal.DataFlowImplCommon
/**
* A call.
*/
class Call extends Expr instanceof CallImpl {
override string getAPrimaryQlClass() { result = "Call" }
/**
* Gets the `n`th argument of this method call. In the following example, the
* result for n=0 is the `IntegerLiteral` 0, while for n=1 the result is a
* `Pair` (whose `getKey` returns the `SymbolLiteral` for `bar`, and
* `getValue` returns the `IntegerLiteral` 1). Keyword arguments like this
* can be accessed more naturally using the
* `getKeywordArgument(string keyword)` predicate.
* ```rb
* foo(0, bar: 1)
* yield 0, bar: 1
* ```
*/
final Expr getArgument(int n) { result = super.getArgumentImpl(n) }
/**
* Gets an argument of this method call.
*/
final Expr getAnArgument() { result = this.getArgument(_) }
/**
* Gets the value of the keyword argument whose key is `keyword`, if any. For
* example, the result for `getKeywordArgument("qux")` in the following
* example is the `IntegerLiteral` 123.
* ```rb
* foo :bar "baz", qux: 123
* ```
*/
final Expr getKeywordArgument(string keyword) {
exists(Pair p |
p = this.getAnArgument() and
p.getKey().(SymbolLiteral).getValueText() = keyword and
result = p.getValue()
)
}
/**
* Gets the number of arguments of this method call.
*/
final int getNumberOfArguments() { result = super.getNumberOfArgumentsImpl() }
/** Gets a potential target of this call, if any. */
final Callable getATarget() {
exists(DataFlowCall c | this = c.asCall().getExpr() |
TCfgScope(result) = [viableCallable(c), viableCallableLambda(c, _)]
)
}
override AstNode getAChild(string pred) {
result = Expr.super.getAChild(pred)
or
pred = "getArgument" and result = this.getArgument(_)
}
}
/**
* A method call.
*/
class MethodCall extends Call instanceof MethodCallImpl {
override string getAPrimaryQlClass() { result = "MethodCall" }
/**
* Gets the receiver of this call, if any. For example:
*
* ```rb
* foo.bar
* Baz::qux
* corge()
* ```
*
* The result for the call to `bar` is the `Expr` for `foo`; the result for
* the call to `qux` is the `Expr` for `Baz`; for the call to `corge` there
* is no result.
*/
final Expr getReceiver() { result = super.getReceiverImpl() }
/**
* Gets the name of the method being called. For example, in:
*
* ```rb
* foo.bar x, y
* ```
*
* the result is `"bar"`.
*/
final string getMethodName() { result = super.getMethodNameImpl() }
/**
* Gets the block of this method call, if any.
* ```rb
* foo.each { |x| puts x }
* ```
*/
final Block getBlock() { result = super.getBlockImpl() }
override string toString() { result = "call to " + this.getMethodName() }
override AstNode getAChild(string pred) {
result = Call.super.getAChild(pred)
or
pred = "getReceiver" and result = this.getReceiver()
or
pred = "getBlock" and result = this.getBlock()
}
}
/**
* A call to a setter method.
* ```rb
* self.foo = 10
* a[0] = 10
* ```
*/
class SetterMethodCall extends MethodCall, TMethodCallSynth {
SetterMethodCall() { this = TMethodCallSynth(_, _, _, true, _) }
final override string getAPrimaryQlClass() { result = "SetterMethodCall" }
}
/**
* An element reference; a call to the `[]` method.
* ```rb
* a[0]
* ```
*/
class ElementReference extends MethodCall instanceof ElementReferenceImpl {
final override string getAPrimaryQlClass() { result = "ElementReference" }
final override string toString() { result = "...[...]" }
}
/**
* A call to `yield`.
* ```rb
* yield x, y
* ```
*/
class YieldCall extends Call instanceof YieldCallImpl {
final override string getAPrimaryQlClass() { result = "YieldCall" }
final override string toString() { result = "yield ..." }
}
/**
* A call to `super`.
* ```rb
* class Foo < Bar
* def baz
* super
* end
* end
* ```
*/
class SuperCall extends MethodCall instanceof SuperCallImpl {
final override string getAPrimaryQlClass() { result = "SuperCall" }
}
/**
* A block argument in a method call.
* ```rb
* foo(&block)
* ```
*/
class BlockArgument extends Expr, TBlockArgument {
private Ruby::BlockArgument g;
BlockArgument() { this = TBlockArgument(g) }
final override string getAPrimaryQlClass() { result = "BlockArgument" }
/**
* Gets the underlying expression representing the block. In the following
* example, the result is the `Expr` for `bar`:
* ```rb
* foo(&bar)
* ```
*/
final Expr getValue() { toGenerated(result) = g.getChild() }
final override string toString() { result = "&..." }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getValue" and result = this.getValue()
}
}
/**
* A `...` expression that contains forwarded arguments.
* ```rb
* foo(...)
* ```
*/
class ForwardedArguments extends Expr, TForwardArgument {
private Ruby::ForwardArgument g;
ForwardedArguments() { this = TForwardArgument(g) }
final override string getAPrimaryQlClass() { result = "ForwardedArguments" }
final override string toString() { result = "..." }
}

View File

@@ -0,0 +1,210 @@
private import codeql.ruby.AST
private import internal.AST
private import internal.Module
private import internal.Variable
private import internal.TreeSitter
/** An access to a constant. */
class ConstantAccess extends Expr, TConstantAccess {
/** Gets the name of the constant being accessed. */
string getName() { none() }
/** Holds if the name of the constant being accessed is `name`. */
final predicate hasName(string name) { this.getName() = name }
/**
* Gets the expression used in the access's scope resolution operation, if
* any. In the following example, the result is the `Call` expression for
* `foo()`.
*
* ```rb
* foo()::MESSAGE
* ```
*
* However, there is no result for the following example, since there is no
* scope resolution operation.
*
* ```rb
* MESSAGE
* ```
*/
Expr getScopeExpr() { none() }
/**
* Holds if the access uses the scope resolution operator to refer to the
* global scope, as in this example:
*
* ```rb
* ::MESSAGE
* ```
*/
predicate hasGlobalScope() { none() }
override string toString() { result = this.getName() }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getScopeExpr" and result = this.getScopeExpr()
}
}
private class TokenConstantAccess extends ConstantAccess, TTokenConstantAccess {
private Ruby::Constant g;
TokenConstantAccess() { this = TTokenConstantAccess(g) }
final override string getName() { result = g.getValue() }
}
private class ScopeResolutionConstantAccess extends ConstantAccess, TScopeResolutionConstantAccess {
private Ruby::ScopeResolution g;
private Ruby::Constant constant;
ScopeResolutionConstantAccess() { this = TScopeResolutionConstantAccess(g, constant) }
final override string getName() { result = constant.getValue() }
final override Expr getScopeExpr() { toGenerated(result) = g.getScope() }
final override predicate hasGlobalScope() { not exists(g.getScope()) }
}
private class ConstantReadAccessSynth extends ConstantAccess, TConstantReadAccessSynth {
private string value;
ConstantReadAccessSynth() { this = TConstantReadAccessSynth(_, _, value) }
final override string getName() {
if this.hasGlobalScope() then result = value.suffix(2) else result = value
}
final override Expr getScopeExpr() { synthChild(this, 0, result) }
final override predicate hasGlobalScope() { value.matches("::%") }
}
/**
* A use (read) of a constant.
*
* For example, the right-hand side of the assignment in:
*
* ```rb
* x = Foo
* ```
*
* Or the superclass `Bar` in this example:
*
* ```rb
* class Foo < Bar
* end
* ```
*/
class ConstantReadAccess extends ConstantAccess {
ConstantReadAccess() {
not this instanceof ConstantWriteAccess
or
// `X` in `X ||= 10` is considered both a read and a write
this = any(AssignOperation a).getLeftOperand()
or
this instanceof TConstantReadAccessSynth
}
/**
* Gets the value being read, if any. For example, in
*
* ```rb
* module M
* CONST = "const"
* end
*
* puts M::CONST
* ```
*
* the value being read at `M::CONST` is `"const"`.
*/
Expr getValue() {
not exists(this.getScopeExpr()) and
result = lookupConst(this.getEnclosingModule+().getModule(), this.getName()) and
// For now, we restrict the scope of top-level declarations to their file.
// This may remove some plausible targets, but also removes a lot of
// implausible targets
if result.getEnclosingModule() instanceof Toplevel
then result.getFile() = this.getFile()
else any()
or
this.hasGlobalScope() and
result = lookupConst(TResolved("Object"), this.getName())
or
result = lookupConst(resolveScopeExpr(this.getScopeExpr()), this.getName())
}
override string getValueText() { result = this.getValue().getValueText() }
final override string getAPrimaryQlClass() { result = "ConstantReadAccess" }
}
/**
* A definition of a constant.
*
* Examples:
*
* ```rb
* Foo = 1 # defines constant Foo as an integer
* M::Foo = 1 # defines constant Foo as an integer in module M
*
* class Bar; end # defines constant Bar as a class
* class M::Bar; end # defines constant Bar as a class in module M
*
* module Baz; end # defines constant Baz as a module
* module M::Baz; end # defines constant Baz as a module in module M
* ```
*/
class ConstantWriteAccess extends ConstantAccess {
ConstantWriteAccess() {
explicitAssignmentNode(toGenerated(this), _) or this instanceof TNamespace
}
override string getAPrimaryQlClass() { result = "ConstantWriteAccess" }
/**
* Gets the fully qualified name for this constant, based on the context in
* which it is defined.
*
* For example, given
* ```rb
* module Foo
* module Bar
* class Baz
* end
* end
* CONST_A = "a"
* end
* ```
*
* the constant `Baz` has the fully qualified name `Foo::Bar::Baz`, and
* `CONST_A` has the fully qualified name `Foo::CONST_A`.
*/
string getQualifiedName() {
/* get the qualified name for the parent module, then append w */
exists(ConstantWriteAccess parent | parent = this.getEnclosingModule() |
result = parent.getQualifiedName() + "::" + this.getName()
)
or
/* base case - there's no parent module */
not exists(ConstantWriteAccess parent | parent = this.getEnclosingModule()) and
result = this.getName()
}
}
/**
* A definition of a constant via assignment. For example, the left-hand
* operand in the following example:
*
* ```rb
* MAX_SIZE = 100
* ```
*/
class ConstantAssignment extends ConstantWriteAccess, LhsExpr {
override string getAPrimaryQlClass() { result = "ConstantAssignment" }
}

View File

@@ -0,0 +1,611 @@
private import codeql.ruby.AST
private import internal.AST
private import internal.TreeSitter
/**
* A control expression that can be any of the following:
* - `case`
* - `if`/`unless` (including expression-modifier variants)
* - ternary-if (`?:`)
* - `while`/`until` (including expression-modifier variants)
* - `for`
*/
class ControlExpr extends Expr, TControlExpr { }
/**
* A conditional expression: `if`/`unless` (including expression-modifier
* variants), and ternary-if (`?:`) expressions.
*/
class ConditionalExpr extends ControlExpr, TConditionalExpr {
/**
* Gets the condition expression. For example, the result is `foo` in the
* following:
* ```rb
* if foo
* bar = 1
* end
* ```
*/
Expr getCondition() { none() }
/**
* Gets the branch of this conditional expression that is taken when the
* condition evaluates to `cond`, if any.
*/
Stmt getBranch(boolean cond) { none() }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getCondition" and result = this.getCondition()
or
pred = "getBranch" and result = this.getBranch(_)
}
}
/**
* An `if` or `elsif` expression.
* ```rb
* if x
* a += 1
* elsif y
* a += 2
* end
* ```
*/
class IfExpr extends ConditionalExpr, TIfExpr {
final override string getAPrimaryQlClass() { result = "IfExpr" }
/** Holds if this is an `elsif` expression. */
predicate isElsif() { none() }
/** Gets the 'then' branch of this `if`/`elsif` expression. */
Stmt getThen() { none() }
/**
* Gets the `elsif`/`else` branch of this `if`/`elsif` expression, if any. In
* the following example, the result is a `StmtSequence` containing `b`.
* ```rb
* if foo
* a
* else
* b
* end
* ```
* But there is no result for the following:
* ```rb
* if foo
* a
* end
* ```
* There can be at most one result, since `elsif` branches nest. In the
* following example, `ifExpr.getElse()` returns an `ElsifExpr`, and the
* `else` branch is nested inside that. To get the `StmtSequence` for the
* `else` branch, i.e. the one containing `c`, use
* `getElse().(ElsifExpr).getElse()`.
* ```rb
* if foo
* a
* elsif bar
* b
* else
* c
* end
* ```
*/
Stmt getElse() { none() }
final override Stmt getBranch(boolean cond) {
cond = true and result = this.getThen()
or
cond = false and result = this.getElse()
}
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getThen" and result = this.getThen()
or
pred = "getElse" and result = this.getElse()
}
}
private class If extends IfExpr, TIf {
private Ruby::If g;
If() { this = TIf(g) }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
final override Stmt getThen() { toGenerated(result) = g.getConsequence() }
final override Stmt getElse() { toGenerated(result) = g.getAlternative() }
final override string toString() { result = "if ..." }
}
private class Elsif extends IfExpr, TElsif {
private Ruby::Elsif g;
Elsif() { this = TElsif(g) }
final override predicate isElsif() { any() }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
final override Stmt getThen() { toGenerated(result) = g.getConsequence() }
final override Stmt getElse() { toGenerated(result) = g.getAlternative() }
final override string toString() { result = "elsif ..." }
}
/**
* An `unless` expression.
* ```rb
* unless x == 0
* y /= x
* end
* ```
*/
class UnlessExpr extends ConditionalExpr, TUnlessExpr {
private Ruby::Unless g;
UnlessExpr() { this = TUnlessExpr(g) }
final override string getAPrimaryQlClass() { result = "UnlessExpr" }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
/**
* Gets the 'then' branch of this `unless` expression. In the following
* example, the result is the `StmtSequence` containing `foo`.
* ```rb
* unless a == b then
* foo
* else
* bar
* end
* ```
*/
final Stmt getThen() { toGenerated(result) = g.getConsequence() }
/**
* Gets the 'else' branch of this `unless` expression. In the following
* example, the result is the `StmtSequence` containing `bar`.
* ```rb
* unless a == b then
* foo
* else
* bar
* end
* ```
*/
final Stmt getElse() { toGenerated(result) = g.getAlternative() }
final override Expr getBranch(boolean cond) {
cond = false and result = getThen()
or
cond = true and result = getElse()
}
final override string toString() { result = "unless ..." }
override AstNode getAChild(string pred) {
result = ConditionalExpr.super.getAChild(pred)
or
pred = "getThen" and result = this.getThen()
or
pred = "getElse" and result = this.getElse()
}
}
/**
* An expression modified using `if`.
* ```rb
* foo if bar
* ```
*/
class IfModifierExpr extends ConditionalExpr, TIfModifierExpr {
private Ruby::IfModifier g;
IfModifierExpr() { this = TIfModifierExpr(g) }
final override string getAPrimaryQlClass() { result = "IfModifierExpr" }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
final override Stmt getBranch(boolean cond) { cond = true and result = this.getBody() }
/**
* Gets the statement that is conditionally evaluated. In the following
* example, the result is the `Expr` for `foo`.
* ```rb
* foo if bar
* ```
*/
final Stmt getBody() { toGenerated(result) = g.getBody() }
final override string toString() { result = "... if ..." }
override AstNode getAChild(string pred) {
result = ConditionalExpr.super.getAChild(pred)
or
pred = "getBody" and result = this.getBody()
}
}
/**
* An expression modified using `unless`.
* ```rb
* y /= x unless x == 0
* ```
*/
class UnlessModifierExpr extends ConditionalExpr, TUnlessModifierExpr {
private Ruby::UnlessModifier g;
UnlessModifierExpr() { this = TUnlessModifierExpr(g) }
final override string getAPrimaryQlClass() { result = "UnlessModifierExpr" }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
final override Stmt getBranch(boolean cond) { cond = false and result = this.getBody() }
/**
* Gets the statement that is conditionally evaluated. In the following
* example, the result is the `Expr` for `foo`.
* ```rb
* foo unless bar
* ```
*/
final Stmt getBody() { toGenerated(result) = g.getBody() }
final override string toString() { result = "... unless ..." }
override AstNode getAChild(string pred) {
result = ConditionalExpr.super.getAChild(pred)
or
pred = "getBody" and result = this.getBody()
}
}
/**
* A conditional expression using the ternary (`?:`) operator.
* ```rb
* (a > b) ? a : b
* ```
*/
class TernaryIfExpr extends ConditionalExpr, TTernaryIfExpr {
private Ruby::Conditional g;
TernaryIfExpr() { this = TTernaryIfExpr(g) }
final override string getAPrimaryQlClass() { result = "TernaryIfExpr" }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
/** Gets the 'then' branch of this ternary if expression. */
final Stmt getThen() { toGenerated(result) = g.getConsequence() }
/** Gets the 'else' branch of this ternary if expression. */
final Stmt getElse() { toGenerated(result) = g.getAlternative() }
final override Stmt getBranch(boolean cond) {
cond = true and result = getThen()
or
cond = false and result = getElse()
}
final override string toString() { result = "... ? ... : ..." }
override AstNode getAChild(string pred) {
result = ConditionalExpr.super.getAChild(pred)
or
pred = "getThen" and result = this.getThen()
or
pred = "getElse" and result = this.getElse()
}
}
class CaseExpr extends ControlExpr, TCaseExpr {
private Ruby::Case g;
CaseExpr() { this = TCaseExpr(g) }
final override string getAPrimaryQlClass() { result = "CaseExpr" }
/**
* Gets the expression being compared, if any. For example, `foo` in the following example.
* ```rb
* case foo
* when 0
* puts 'zero'
* when 1
* puts 'one'
* end
* ```
* There is no result for the following example:
* ```rb
* case
* when a then 0
* when b then 1
* else 2
* end
* ```
*/
final Expr getValue() { toGenerated(result) = g.getValue() }
/**
* Gets the `n`th branch of this case expression, either a `WhenExpr` or a
* `StmtSequence`.
*/
final Expr getBranch(int n) { toGenerated(result) = g.getChild(n) }
/**
* Gets a branch of this case expression, either a `WhenExpr` or an
* `ElseExpr`.
*/
final Expr getABranch() { result = this.getBranch(_) }
/** Gets a `when` branch of this case expression. */
final WhenExpr getAWhenBranch() { result = getABranch() }
/** Gets the `else` branch of this case expression, if any. */
final StmtSequence getElseBranch() { result = getABranch() }
/**
* Gets the number of branches of this case expression.
*/
final int getNumberOfBranches() { result = count(this.getBranch(_)) }
final override string toString() { result = "case ..." }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getValue" and result = this.getValue()
or
pred = "getBranch" and result = this.getBranch(_)
}
}
/**
* A `when` branch of a `case` expression.
* ```rb
* case
* when a > b then x
* end
* ```
*/
class WhenExpr extends Expr, TWhenExpr {
private Ruby::When g;
WhenExpr() { this = TWhenExpr(g) }
final override string getAPrimaryQlClass() { result = "WhenExpr" }
/** Gets the body of this case-when expression. */
final Stmt getBody() { toGenerated(result) = g.getBody() }
/**
* Gets the `n`th pattern (or condition) in this case-when expression. In the
* following example, the 0th pattern is `x`, the 1st pattern is `y`, and the
* 2nd pattern is `z`.
* ```rb
* case foo
* when x, y, z
* puts 'x/y/z'
* end
* ```
*/
final Expr getPattern(int n) { toGenerated(result) = g.getPattern(n).getChild() }
/**
* Gets a pattern (or condition) in this case-when expression.
*/
final Expr getAPattern() { result = this.getPattern(_) }
/**
* Gets the number of patterns in this case-when expression.
*/
final int getNumberOfPatterns() { result = count(this.getPattern(_)) }
final override string toString() { result = "when ..." }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getBody" and result = this.getBody()
or
pred = "getPattern" and result = this.getPattern(_)
}
}
/**
* A loop. That is, a `for` loop, a `while` or `until` loop, or their
* expression-modifier variants.
*/
class Loop extends ControlExpr, TLoop {
/** Gets the body of this loop. */
Stmt getBody() { none() }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getBody" and result = this.getBody()
}
}
/**
* A loop using a condition expression. That is, a `while` or `until` loop, or
* their expression-modifier variants.
*/
class ConditionalLoop extends Loop, TConditionalLoop {
/** Gets the condition expression of this loop. */
Expr getCondition() { none() }
override AstNode getAChild(string pred) {
result = Loop.super.getAChild(pred)
or
pred = "getCondition" and result = this.getCondition()
}
/** Holds if the loop body is entered when the condition is `condValue`. */
predicate entersLoopWhenConditionIs(boolean condValue) { none() }
}
/**
* A `while` loop.
* ```rb
* while a < b
* p a
* a += 2
* end
* ```
*/
class WhileExpr extends ConditionalLoop, TWhileExpr {
private Ruby::While g;
WhileExpr() { this = TWhileExpr(g) }
final override string getAPrimaryQlClass() { result = "WhileExpr" }
/** Gets the body of this `while` loop. */
final override Stmt getBody() { toGenerated(result) = g.getBody() }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
/**
* Holds if the loop body is entered when the condition is `condValue`. For
* `while` loops, this holds when `condValue` is true.
*/
final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true }
final override string toString() { result = "while ..." }
}
/**
* An `until` loop.
* ```rb
* until a >= b
* p a
* a += 1
* end
* ```
*/
class UntilExpr extends ConditionalLoop, TUntilExpr {
private Ruby::Until g;
UntilExpr() { this = TUntilExpr(g) }
final override string getAPrimaryQlClass() { result = "UntilExpr" }
/** Gets the body of this `until` loop. */
final override Stmt getBody() { toGenerated(result) = g.getBody() }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
/**
* Holds if the loop body is entered when the condition is `condValue`. For
* `until` loops, this holds when `condValue` is false.
*/
final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false }
final override string toString() { result = "until ..." }
}
/**
* An expression looped using the `while` modifier.
* ```rb
* foo while bar
* ```
*/
class WhileModifierExpr extends ConditionalLoop, TWhileModifierExpr {
private Ruby::WhileModifier g;
WhileModifierExpr() { this = TWhileModifierExpr(g) }
final override Stmt getBody() { toGenerated(result) = g.getBody() }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
/**
* Holds if the loop body is entered when the condition is `condValue`. For
* `while`-modifier loops, this holds when `condValue` is true.
*/
final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true }
final override string getAPrimaryQlClass() { result = "WhileModifierExpr" }
final override string toString() { result = "... while ..." }
}
/**
* An expression looped using the `until` modifier.
* ```rb
* foo until bar
* ```
*/
class UntilModifierExpr extends ConditionalLoop, TUntilModifierExpr {
private Ruby::UntilModifier g;
UntilModifierExpr() { this = TUntilModifierExpr(g) }
final override Stmt getBody() { toGenerated(result) = g.getBody() }
final override Expr getCondition() { toGenerated(result) = g.getCondition() }
/**
* Holds if the loop body is entered when the condition is `condValue`. For
* `until`-modifier loops, this holds when `condValue` is false.
*/
final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false }
final override string getAPrimaryQlClass() { result = "UntilModifierExpr" }
final override string toString() { result = "... until ..." }
}
/**
* A `for` loop.
* ```rb
* for val in 1..n
* sum += val
* end
* ```
*/
class ForExpr extends Loop, TForExpr {
private Ruby::For g;
ForExpr() { this = TForExpr(g) }
final override string getAPrimaryQlClass() { result = "ForExpr" }
/** Gets the body of this `for` loop. */
final override Stmt getBody() { toGenerated(result) = g.getBody() }
/** Gets the pattern representing the iteration argument. */
final Pattern getPattern() { toGenerated(result) = g.getPattern() }
/**
* Gets the value being iterated over. In the following example, the result
* is the expression `1..10`:
* ```rb
* for n in 1..10 do
* puts n
* end
* ```
*/
final Expr getValue() { toGenerated(result) = g.getValue().getChild() }
final override string toString() { result = "for ... in ..." }
override AstNode getAChild(string pred) {
result = Loop.super.getAChild(pred)
or
pred = "getPattern" and result = this.getPattern()
or
pred = "getValue" and result = this.getValue()
}
}

View File

@@ -0,0 +1,313 @@
private import codeql.Locations
private import codeql.ruby.AST
private import internal.Erb
private import internal.TreeSitter
/**
* A node in the ERB abstract syntax tree. This class is the base class for all
* ERB elements.
*/
class ErbAstNode extends TAstNode {
/** Gets a textual representation of this node. */
cached
string toString() { none() }
/** Gets the location of this node. */
Location getLocation() { result = getLocation(this) }
/**
* Gets the name of a primary CodeQL class to which this node belongs.
*
* This predicate always has a result. If no primary class can be
* determined, the result is `"???"`. If multiple primary classes match,
* this predicate can have multiple results.
*/
string getAPrimaryQlClass() { result = "???" }
}
/**
* An ERB template. This can contain multiple directives to be executed when
* the template is compiled.
*/
class ErbTemplate extends TTemplate, ErbAstNode {
private Erb::Template g;
ErbTemplate() { this = TTemplate(g) }
override string toString() { result = "erb template" }
final override string getAPrimaryQlClass() { result = "ErbTemplate" }
ErbAstNode getAChildNode() { toGenerated(result) = g.getChild(_) }
}
// Truncate the token string value to 32 char max
bindingset[val]
private string displayToken(string val) {
val.length() <= 32 and result = val
or
val.length() > 32 and result = val.prefix(29) + "..."
}
/**
* An ERB token. This could be embedded code, a comment, or arbitrary text.
*/
class ErbToken extends TTokenNode, ErbAstNode {
override string toString() { result = displayToken(this.getValue()) }
/** Gets the string value of this token. */
string getValue() { exists(Erb::Token g | this = fromGenerated(g) | result = g.getValue()) }
override string getAPrimaryQlClass() { result = "ErbToken" }
}
/**
* An ERB token appearing within a comment directive.
*/
class ErbComment extends ErbToken {
private Erb::Comment g;
ErbComment() { this = TComment(g) }
override string getValue() { result = g.getValue() }
final override string getAPrimaryQlClass() { result = "ErbComment" }
}
/**
* An ERB token appearing within a code directive. This will typically be
* interpreted as Ruby code or a GraphQL query, depending on context.
*/
class ErbCode extends ErbToken {
private Erb::Code g;
ErbCode() { this = TCode(g) }
override string getValue() { result = g.getValue() }
final override string getAPrimaryQlClass() { result = "ErbCode" }
}
bindingset[line, col]
private predicate locationIncludesPosition(Location loc, int line, int col) {
// position between start and end line, exclusive
line > loc.getStartLine() and
line < loc.getEndLine()
or
// position on start line, multi line location
line = loc.getStartLine() and
not loc.getStartLine() = loc.getEndLine() and
col >= loc.getStartColumn()
or
// position on end line, multi line location
line = loc.getEndLine() and
not loc.getStartLine() = loc.getEndLine() and
col <= loc.getEndColumn()
or
// single line location, position between start and end column
line = loc.getStartLine() and
loc.getStartLine() = loc.getEndLine() and
col >= loc.getStartColumn() and
col <= loc.getEndColumn()
}
/** A file containing an ERB directive. */
private class ErbDirectiveFile extends File {
pragma[nomagic]
ErbDirectiveFile() { this = any(ErbDirective dir).getLocation().getFile() }
/** Gets a statement in this file. */
pragma[nomagic]
Stmt getAStmt(int startLine, int startColumn) {
exists(Location loc |
result.getLocation() = loc and
loc.getFile() = this and
loc.getStartLine() = startLine and
loc.getStartColumn() = startColumn
)
}
}
/**
* A directive in an ERB template.
*/
class ErbDirective extends TDirectiveNode, ErbAstNode {
/** Holds if this directive spans line `line` in the file `file`. */
pragma[nomagic]
private predicate spans(ErbDirectiveFile file, int line) {
exists(Location loc |
loc = this.getLocation() and
file = loc.getFile() and
line in [loc.getStartLine() .. loc.getEndLine()]
)
}
private predicate containsStmtStart(Stmt s) {
// `Toplevel` statements are not contained within individual directives,
// though their start location may appear within a directive location
not s instanceof Toplevel and
exists(ErbDirectiveFile file, int startLine, int startColumn |
this.spans(file, startLine) and
s = file.getAStmt(startLine, startColumn) and
locationIncludesPosition(this.getLocation(), startLine, startColumn)
)
}
/**
* Gets a statement that starts in directive that is not a child of any other
* statement starting in this directive.
*/
Stmt getAChildStmt() {
this.containsStmtStart(result) and
not this.containsStmtStart(result.getParent())
}
/**
* Gets the last child statement in this directive.
* See `getAChildStmt` for more details.
*/
Stmt getTerminalStmt() {
result = this.getAChildStmt() and
forall(Stmt s | s = this.getAChildStmt() and not s = result |
s.getLocation().strictlyBefore(result.getLocation())
)
}
/** Gets the child token of this directive. */
ErbToken getToken() {
exists(Erb::Directive g | this = fromGenerated(g) | toGenerated(result) = g.getChild())
}
override string toString() { result = "erb directive" }
override string getAPrimaryQlClass() { result = "ErbDirective" }
}
/**
* A comment directive in an ERB template.
* ```erb
* <%#= 2 + 2 %>
* <%# for x in xs do %>
* ```
*/
class ErbCommentDirective extends ErbDirective {
private Erb::CommentDirective g;
ErbCommentDirective() { this = TCommentDirective(g) }
override ErbComment getToken() { toGenerated(result) = g.getChild() }
final override string toString() { result = "<%#" + this.getToken().toString() + "%>" }
final override string getAPrimaryQlClass() { result = "ErbCommentDirective" }
}
/**
* A GraphQL directive in an ERB template.
* ```erb
* <%graphql
* fragment Foo on Bar {
* some {
* queryText
* moreProperties
* }
* }
* %>
* ```
*/
class ErbGraphqlDirective extends ErbDirective {
private Erb::GraphqlDirective g;
ErbGraphqlDirective() { this = TGraphqlDirective(g) }
override ErbCode getToken() { toGenerated(result) = g.getChild() }
final override string toString() { result = "<%graphql" + this.getToken().toString() + "%>" }
final override string getAPrimaryQlClass() { result = "ErbGraphqlDirective" }
}
/**
* An output directive in an ERB template.
* ```erb
* <%=
* fragment Foo on Bar {
* some {
* queryText
* moreProperties
* }
* }
* %>
* ```
*/
class ErbOutputDirective extends ErbDirective {
private Erb::OutputDirective g;
ErbOutputDirective() { this = TOutputDirective(g) }
override ErbCode getToken() { toGenerated(result) = g.getChild() }
final override string toString() { result = "<%=" + this.getToken().toString() + "%>" }
final override string getAPrimaryQlClass() { result = "ErbOutputDirective" }
}
/**
* An execution directive in an ERB template.
* This code will be executed as Ruby, but not rendered.
* ```erb
* <% books = author.books
* for book in books do %>
* ```
*/
class ErbExecutionDirective extends ErbDirective {
private Erb::Directive g;
ErbExecutionDirective() { this = TDirective(g) }
final override string toString() { result = "<%" + this.getToken().toString() + "%>" }
final override string getAPrimaryQlClass() { result = "ErbExecutionDirective" }
}
/**
* A `File` containing an Embedded Ruby template.
* This is typically a file containing snippets of Ruby code that can be
* evaluated to create a compiled version of the file.
*/
class ErbFile extends File {
private ErbTemplate template;
ErbFile() { this = template.getLocation().getFile() }
/**
* Holds if the file represents a partial to be rendered in the context of
* another template.
*/
predicate isPartial() { this.getStem().charAt(0) = "_" }
/**
* Gets the base template name associated with this ERB file.
* For instance, a file named `foo.html.erb` has a template name of `foo`.
* A partial template file named `_item.html.erb` has a template name of `item`.
*/
string getTemplateName() { none() }
/**
* Gets the erb template contained within this file.
*/
ErbTemplate getTemplate() { result = template }
}
private class PartialErbFile extends ErbFile {
PartialErbFile() { this.isPartial() }
// Drop the leading underscore
override string getTemplateName() { result = this.getStem().splitAt(".", 0).suffix(1) }
}
private class FullErbFile extends ErbFile {
FullErbFile() { not this.isPartial() }
override string getTemplateName() { result = this.getStem().splitAt(".", 0) }
}

View File

@@ -0,0 +1,456 @@
private import codeql.ruby.AST
private import codeql.ruby.CFG
private import internal.AST
private import internal.TreeSitter
/**
* An expression.
*
* This is the root QL class for all expressions.
*/
class Expr extends Stmt, TExpr {
/** Gets the textual (constant) value of this expression, if any. */
string getValueText() {
forex(CfgNodes::ExprCfgNode n | n = this.getAControlFlowNode() | result = n.getValueText())
}
}
/**
* A reference to the current object. For example:
* - `self == other`
* - `self.method_name`
* - `def self.method_name ... end`
*
* This also includes implicit references to the current object in method
* calls. For example, the method call `foo(123)` has an implicit `self`
* receiver, and is equivalent to the explicit `self.foo(123)`.
*/
class Self extends Expr, TSelf {
final override string getAPrimaryQlClass() { result = "Self" }
final override string toString() { result = "self" }
}
/**
* A sequence of expressions in the right-hand side of an assignment or
* a `return`, `break` or `next` statement.
* ```rb
* x = 1, *items, 3, *more
* return 1, 2
* next *list
* break **map
* return 1, 2, *items, k: 5, **map
* ```
*/
class ArgumentList extends Expr, TArgumentList {
private Ruby::AstNode g;
ArgumentList() { this = TArgumentList(g) }
/** Gets the `i`th element in this argument list. */
Expr getElement(int i) {
toGenerated(result) in [
g.(Ruby::ArgumentList).getChild(i), g.(Ruby::RightAssignmentList).getChild(i)
]
}
final override string getAPrimaryQlClass() { result = "ArgumentList" }
final override string toString() { result = "..., ..." }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getElement" and result = this.getElement(_)
}
}
/** A sequence of expressions. */
class StmtSequence extends Expr, TStmtSequence {
override string getAPrimaryQlClass() { result = "StmtSequence" }
/** Gets the `n`th statement in this sequence. */
Stmt getStmt(int n) { none() }
/** Gets a statement in this sequence. */
final Stmt getAStmt() { result = this.getStmt(_) }
/** Gets the last statement in this sequence, if any. */
final Stmt getLastStmt() { result = this.getStmt(this.getNumberOfStatements() - 1) }
/** Gets the number of statements in this sequence. */
final int getNumberOfStatements() { result = count(this.getAStmt()) }
/** Holds if this sequence has no statements. */
final predicate isEmpty() { this.getNumberOfStatements() = 0 }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getStmt" and result = this.getStmt(_)
}
}
private class StmtSequenceSynth extends StmtSequence, TStmtSequenceSynth {
final override Stmt getStmt(int n) { synthChild(this, n, result) }
final override string toString() { result = "..." }
}
private class Then extends StmtSequence, TThen {
private Ruby::Then g;
Then() { this = TThen(g) }
override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
final override string toString() { result = "then ..." }
}
private class Else extends StmtSequence, TElse {
private Ruby::Else g;
Else() { this = TElse(g) }
override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
final override string toString() { result = "else ..." }
}
private class Do extends StmtSequence, TDo {
private Ruby::Do g;
Do() { this = TDo(g) }
override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
final override string toString() { result = "do ..." }
}
private class Ensure extends StmtSequence, TEnsure {
private Ruby::Ensure g;
Ensure() { this = TEnsure(g) }
override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
final override string toString() { result = "ensure ..." }
}
/**
* A sequence of statements representing the body of a method, class, module,
* or do-block. That is, any body that may also include rescue/ensure/else
* statements.
*/
class BodyStmt extends StmtSequence, TBodyStmt {
// Not defined by dispatch, as it should not be exposed
private Ruby::AstNode getChild(int i) {
result = any(Ruby::Method g | this = TMethod(g)).getChild(i)
or
result = any(Ruby::SingletonMethod g | this = TSingletonMethod(g)).getChild(i)
or
exists(Ruby::Lambda g | this = TLambda(g) |
result = g.getBody().(Ruby::DoBlock).getChild(i) or
result = g.getBody().(Ruby::Block).getChild(i)
)
or
result = any(Ruby::DoBlock g | this = TDoBlock(g)).getChild(i)
or
result = any(Ruby::Program g | this = TToplevel(g)).getChild(i) and
not result instanceof Ruby::BeginBlock
or
result = any(Ruby::Class g | this = TClassDeclaration(g)).getChild(i)
or
result = any(Ruby::SingletonClass g | this = TSingletonClass(g)).getChild(i)
or
result = any(Ruby::Module g | this = TModuleDeclaration(g)).getChild(i)
or
result = any(Ruby::Begin g | this = TBeginExpr(g)).getChild(i)
}
final override Stmt getStmt(int n) {
result =
rank[n + 1](AstNode node, int i |
toGenerated(node) = this.getChild(i) and
not node instanceof Else and
not node instanceof RescueClause and
not node instanceof Ensure
|
node order by i
)
}
/** Gets the `n`th rescue clause in this block. */
final RescueClause getRescue(int n) {
result =
rank[n + 1](RescueClause node, int i | toGenerated(node) = getChild(i) | node order by i)
}
/** Gets a rescue clause in this block. */
final RescueClause getARescue() { result = this.getRescue(_) }
/** Gets the `else` clause in this block, if any. */
final StmtSequence getElse() { result = unique(Else s | toGenerated(s) = getChild(_)) }
/** Gets the `ensure` clause in this block, if any. */
final StmtSequence getEnsure() { result = unique(Ensure s | toGenerated(s) = getChild(_)) }
final predicate hasEnsure() { exists(this.getEnsure()) }
override AstNode getAChild(string pred) {
result = StmtSequence.super.getAChild(pred)
or
pred = "getRescue" and result = this.getRescue(_)
or
pred = "getElse" and result = this.getElse()
or
pred = "getEnsure" and result = this.getEnsure()
}
}
/**
* A parenthesized expression sequence, typically containing a single expression:
* ```rb
* (x + 1)
* ```
* However, they can also contain multiple expressions (the value of the parenthesized
* expression is the last expression):
* ```rb
* (foo; bar)
* ```
* or even an empty sequence (value is `nil`):
* ```rb
* ()
* ```
*/
class ParenthesizedExpr extends StmtSequence, TParenthesizedExpr {
private Ruby::ParenthesizedStatements g;
ParenthesizedExpr() { this = TParenthesizedExpr(g) }
final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
final override string getAPrimaryQlClass() { result = "ParenthesizedExpr" }
final override string toString() { result = "( ... )" }
}
/**
* A pair expression. For example, in a hash:
* ```rb
* { foo: bar }
* ```
* Or a keyword argument:
* ```rb
* baz(qux: 1)
* ```
*/
class Pair extends Expr, TPair {
private Ruby::Pair g;
Pair() { this = TPair(g) }
final override string getAPrimaryQlClass() { result = "Pair" }
/**
* Gets the key expression of this pair. For example, the `SymbolLiteral`
* representing the keyword `foo` in the following example:
* ```rb
* bar(foo: 123)
* ```
* Or the `StringLiteral` for `'foo'` in the following hash pair:
* ```rb
* { 'foo' => 123 }
* ```
*/
final Expr getKey() { toGenerated(result) = g.getKey() }
/**
* Gets the value expression of this pair. For example, the `InteralLiteral`
* 123 in the following hash pair:
* ```rb
* { 'foo' => 123 }
* ```
*/
final Expr getValue() { toGenerated(result) = g.getValue() }
final override string toString() { result = "Pair" }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getKey" and result = this.getKey()
or
pred = "getValue" and result = this.getValue()
}
}
/**
* A rescue clause. For example:
* ```rb
* begin
* write_file
* rescue StandardError => msg
* puts msg
* end
*/
class RescueClause extends Expr, TRescueClause {
private Ruby::Rescue g;
RescueClause() { this = TRescueClause(g) }
final override string getAPrimaryQlClass() { result = "RescueClause" }
/**
* Gets the `n`th exception to match, if any. For example `FirstError` or `SecondError` in:
* ```rb
* begin
* do_something
* rescue FirstError, SecondError => e
* handle_error(e)
* end
* ```
*/
final Expr getException(int n) { toGenerated(result) = g.getExceptions().getChild(n) }
/**
* Gets an exception to match, if any. For example `FirstError` or `SecondError` in:
* ```rb
* begin
* do_something
* rescue FirstError, SecondError => e
* handle_error(e)
* end
* ```
*/
final Expr getAnException() { result = this.getException(_) }
/**
* Gets the variable to which to assign the matched exception, if any.
* For example `err` in:
* ```rb
* begin
* do_something
* rescue StandardError => err
* handle_error(err)
* end
* ```
*/
final LhsExpr getVariableExpr() { toGenerated(result) = g.getVariable().getChild() }
/**
* Gets the exception handler body.
*/
final StmtSequence getBody() { toGenerated(result) = g.getBody() }
final override string toString() { result = "rescue ..." }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getException" and result = this.getException(_)
or
pred = "getVariableExpr" and result = this.getVariableExpr()
or
pred = "getBody" and result = this.getBody()
}
}
/**
* An expression with a `rescue` modifier. For example:
* ```rb
* contents = read_file rescue ""
* ```
*/
class RescueModifierExpr extends Expr, TRescueModifierExpr {
private Ruby::RescueModifier g;
RescueModifierExpr() { this = TRescueModifierExpr(g) }
final override string getAPrimaryQlClass() { result = "RescueModifierExpr" }
/**
* Gets the body of this `RescueModifierExpr`.
* ```rb
* body rescue handler
* ```
*/
final Stmt getBody() { toGenerated(result) = g.getBody() }
/**
* Gets the exception handler of this `RescueModifierExpr`.
* ```rb
* body rescue handler
* ```
*/
final Stmt getHandler() { toGenerated(result) = g.getHandler() }
final override string toString() { result = "... rescue ..." }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getBody" and result = this.getBody()
or
pred = "getHandler" and result = this.getHandler()
}
}
/**
* A concatenation of string literals.
*
* ```rb
* "foo" "bar" "baz"
* ```
*/
class StringConcatenation extends Expr, TStringConcatenation {
private Ruby::ChainedString g;
StringConcatenation() { this = TStringConcatenation(g) }
final override string getAPrimaryQlClass() { result = "StringConcatenation" }
/** Gets the `n`th string literal in this concatenation. */
final StringLiteral getString(int n) { toGenerated(result) = g.getChild(n) }
/** Gets a string literal in this concatenation. */
final StringLiteral getAString() { result = this.getString(_) }
/** Gets the number of string literals in this concatenation. */
final int getNumberOfStrings() { result = count(this.getString(_)) }
/**
* Gets the result of concatenating all the string literals, if and only if
* they do not contain any interpolations.
*
* For the following example, the result is `"foobar"`:
*
* ```rb
* "foo" 'bar'
* ```
*
* And for the following example, where one of the string literals includes
* an interpolation, there is no result:
*
* ```rb
* "foo" "bar#{ n }"
* ```
*/
final string getConcatenatedValueText() {
forall(StringLiteral c | c = this.getString(_) | exists(c.getValueText())) and
result =
concat(string valueText, int i |
valueText = this.getString(i).getValueText()
|
valueText order by i
)
}
final override string toString() { result = "\"...\" \"...\"" }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getString" and result = this.getString(_)
}
}

View File

@@ -0,0 +1,892 @@
private import codeql.ruby.AST
private import codeql.ruby.regexp.RegExpTreeView as RETV
private import internal.AST
private import internal.Scope
private import internal.TreeSitter
/**
* A literal.
*
* This is the QL root class for all literals.
*/
class Literal extends Expr, TLiteral {
/**
* Gets the source text for this literal, if this is a simple literal.
*
* For complex literals, such as arrays, hashes, and strings with
* interpolations, this predicate has no result.
*/
override string getValueText() { none() }
}
/**
* A numeric literal, i.e. an integer, floating-point, rational, or complex
* value.
*
* ```rb
* 123
* 0xff
* 3.14159
* 1.0E2
* 7r
* 1i
* ```
*/
class NumericLiteral extends Literal, TNumericLiteral { }
/**
* An integer literal.
*
* ```rb
* 123
* 0xff
* ```
*/
class IntegerLiteral extends NumericLiteral, TIntegerLiteral {
/** Gets the numerical value of this integer literal. */
int getValue() { none() }
final override string toString() { result = this.getValueText() }
final override string getAPrimaryQlClass() { result = "IntegerLiteral" }
}
private class IntegerLiteralReal extends IntegerLiteral, TIntegerLiteralReal {
private Ruby::Integer g;
IntegerLiteralReal() { this = TIntegerLiteralReal(g) }
final override string getValueText() { result = g.getValue() }
final override int getValue() {
exists(string s, string values, string str |
s = this.getValueText().toLowerCase() and
(
s.matches("0b%") and
values = "01" and
str = s.suffix(2)
or
s.matches("0x%") and
values = "0123456789abcdef" and
str = s.suffix(2)
or
s.charAt(0) = "0" and
not s.charAt(1) = ["b", "x", "o"] and
values = "01234567" and
str = s.suffix(1)
or
s.matches("0o%") and
values = "01234567" and
str = s.suffix(2)
or
s.charAt(0) != "0" and values = "0123456789" and str = s
)
|
result =
sum(int index, string c, int v, int exp |
c = str.replaceAll("_", "").charAt(index) and
v = values.indexOf(c.toLowerCase()) and
exp = str.replaceAll("_", "").length() - index - 1
|
v * values.length().pow(exp)
)
)
}
}
private class IntegerLiteralSynth extends IntegerLiteral, TIntegerLiteralSynth {
private int value;
IntegerLiteralSynth() { this = TIntegerLiteralSynth(_, _, value) }
final override string getValueText() { result = value.toString() }
final override int getValue() { result = value }
}
/**
* A floating-point literal.
*
* ```rb
* 1.3
* 2.7e+5
* ```
*/
class FloatLiteral extends NumericLiteral, TFloatLiteral {
private Ruby::Float g;
FloatLiteral() { this = TFloatLiteral(g) }
final override string getValueText() { result = g.getValue() }
final override string toString() { result = this.getValueText() }
final override string getAPrimaryQlClass() { result = "FloatLiteral" }
}
/**
* A rational literal.
*
* ```rb
* 123r
* ```
*/
class RationalLiteral extends NumericLiteral, TRationalLiteral {
private Ruby::Rational g;
RationalLiteral() { this = TRationalLiteral(g) }
final override string getValueText() { result = g.getChild().(Ruby::Token).getValue() + "r" }
final override string toString() { result = this.getValueText() }
final override string getAPrimaryQlClass() { result = "RationalLiteral" }
}
/**
* A complex literal.
*
* ```rb
* 1i
* ```
*/
class ComplexLiteral extends NumericLiteral, TComplexLiteral {
private Ruby::Complex g;
ComplexLiteral() { this = TComplexLiteral(g) }
final override string getValueText() { result = g.getValue() }
final override string toString() { result = this.getValueText() }
final override string getAPrimaryQlClass() { result = "ComplexLiteral" }
}
/** A `nil` literal. */
class NilLiteral extends Literal, TNilLiteral {
private Ruby::Nil g;
NilLiteral() { this = TNilLiteral(g) }
final override string getValueText() { result = g.getValue() }
final override string toString() { result = this.getValueText() }
final override string getAPrimaryQlClass() { result = "NilLiteral" }
}
/**
* A Boolean literal.
* ```rb
* true
* false
* TRUE
* FALSE
* ```
*/
class BooleanLiteral extends Literal, TBooleanLiteral {
final override string getAPrimaryQlClass() { result = "BooleanLiteral" }
final override string toString() { result = this.getValueText() }
/** Holds if the Boolean literal is `true` or `TRUE`. */
predicate isTrue() { none() }
/** Holds if the Boolean literal is `false` or `FALSE`. */
predicate isFalse() { none() }
/** Gets the value of this Boolean literal. */
boolean getValue() {
this.isTrue() and result = true
or
this.isFalse() and result = false
}
}
private class TrueLiteral extends BooleanLiteral, TTrueLiteral {
private Ruby::True g;
TrueLiteral() { this = TTrueLiteral(g) }
final override string getValueText() { result = g.getValue() }
final override predicate isTrue() { any() }
}
private class FalseLiteral extends BooleanLiteral, TFalseLiteral {
private Ruby::False g;
FalseLiteral() { this = TFalseLiteral(g) }
final override string getValueText() { result = g.getValue() }
final override predicate isFalse() { any() }
}
/**
* The base class for a component of a string: `StringTextComponent`,
* `StringEscapeSequenceComponent`, or `StringInterpolationComponent`.
*/
class StringComponent extends AstNode, TStringComponent {
/**
* Gets the source text for this string component. Has no result if this is
* a `StringInterpolationComponent`.
*/
string getValueText() { none() }
}
/**
* A component of a string (or string-like) literal that is simply text.
*
* For example, the following string literals all contain `StringTextComponent`
* components whose `getValueText()` returns `"foo"`:
*
* ```rb
* 'foo'
* "#{ bar() }foo"
* "foo#{ bar() } baz"
* ```
*/
class StringTextComponent extends StringComponent, TStringTextComponent {
private Ruby::Token g;
StringTextComponent() { this = TStringTextComponent(g) }
final override string toString() { result = g.getValue() }
final override string getValueText() { result = g.getValue() }
final override string getAPrimaryQlClass() { result = "StringTextComponent" }
}
/**
* An escape sequence component of a string or string-like literal.
*/
class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequenceComponent {
private Ruby::EscapeSequence g;
StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponent(g) }
final override string toString() { result = g.getValue() }
final override string getValueText() { result = g.getValue() }
final override string getAPrimaryQlClass() { result = "StringEscapeSequenceComponent" }
}
/**
* An interpolation expression component of a string or string-like literal.
*/
class StringInterpolationComponent extends StringComponent, StmtSequence,
TStringInterpolationComponent {
private Ruby::Interpolation g;
StringInterpolationComponent() { this = TStringInterpolationComponent(g) }
final override string toString() { result = "#{...}" }
final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
final override string getValueText() { none() }
final override string getAPrimaryQlClass() { result = "StringInterpolationComponent" }
}
/**
* A string, symbol, regexp, or subshell literal.
*/
class StringlikeLiteral extends Literal, TStringlikeLiteral {
/**
* Gets the `n`th component of this string or string-like literal. The result
* will be one of `StringTextComponent`, `StringInterpolationComponent`, and
* `StringEscapeSequenceComponent`.
*
* In the following example, the result for `n = 0` is the
* `StringTextComponent` for `foo_`, and the result for `n = 1` is the
* `StringInterpolationComponent` for `Time.now`.
*
* ```rb
* "foo_#{ Time.now }"
* ```
*/
StringComponent getComponent(int n) { none() }
/**
* Gets the number of components in this string or string-like literal.
*
* For the empty string `""`, the result is 0.
*
* For the string `"foo"`, the result is 1: there is a single
* `StringTextComponent`.
*
* For the following example, the result is 3: there is a
* `StringTextComponent` for the substring `"foo_"`; a
* `StringEscapeSequenceComponent` for the escaped quote; and a
* `StringInterpolationComponent` for the interpolation.
*
* ```rb
* "foo\"#{bar}"
* ```
*/
final int getNumberOfComponents() { result = count(this.getComponent(_)) }
private string getStartDelimiter() {
this instanceof TStringLiteral and
result = "\""
or
this instanceof TRegExpLiteral and
result = "/"
or
this instanceof TSimpleSymbolLiteral and
result = ":"
or
this instanceof TComplexSymbolLiteral and
result = ":\""
or
this instanceof THashKeySymbolLiteral and
result = ""
or
this instanceof TSubshellLiteral and
result = "`"
or
this instanceof THereDoc and
result = ""
}
private string getEndDelimiter() {
this instanceof TStringLiteral and
result = "\""
or
this instanceof TRegExpLiteral and
result = "/"
or
this instanceof TSimpleSymbolLiteral and
result = ""
or
this instanceof TComplexSymbolLiteral and
result = "\""
or
this instanceof THashKeySymbolLiteral and
result = ""
or
this instanceof TSubshellLiteral and
result = "`"
or
this instanceof THereDoc and
result = ""
}
override string getValueText() {
// 0 components should result in the empty string
// if there are any interpolations, there should be no result
// otherwise, concatenate all the components
forall(StringComponent c | c = this.getComponent(_) |
not c instanceof StringInterpolationComponent
) and
result =
concat(StringComponent c, int i | c = this.getComponent(i) | c.getValueText() order by i)
}
override string toString() {
exists(string full, string summary |
full =
concat(StringComponent c, int i, string s |
c = this.getComponent(i) and
(
s = toGenerated(c).(Ruby::Token).getValue()
or
not toGenerated(c) instanceof Ruby::Token and
s = "#{...}"
)
|
s order by i
) and
(
// summary should be 32 chars max (incl. ellipsis)
full.length() > 32 and summary = full.substring(0, 29) + "..."
or
full.length() <= 32 and summary = full
) and
result = this.getStartDelimiter() + summary + this.getEndDelimiter()
)
}
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getComponent" and result = this.getComponent(_)
}
}
/**
* A string literal.
*
* ```rb
* 'hello'
* "hello, #{name}"
* ```
*/
class StringLiteral extends StringlikeLiteral, TStringLiteral {
final override string getAPrimaryQlClass() { result = "StringLiteral" }
}
private class RegularStringLiteral extends StringLiteral, TRegularStringLiteral {
private Ruby::String g;
RegularStringLiteral() { this = TRegularStringLiteral(g) }
final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) }
}
private class BareStringLiteral extends StringLiteral, TBareStringLiteral {
private Ruby::BareString g;
BareStringLiteral() { this = TBareStringLiteral(g) }
final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) }
}
/**
* A regular expression literal.
*
* ```rb
* /[a-z]+/
* ```
*/
class RegExpLiteral extends StringlikeLiteral, TRegExpLiteral {
private Ruby::Regex g;
RegExpLiteral() { this = TRegExpLiteral(g) }
final override string getAPrimaryQlClass() { result = "RegExpLiteral" }
final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
/**
* Gets the regexp flags as a string.
*
* ```rb
* /foo/ # => ""
* /foo/i # => "i"
* /foo/imxo # => "imxo"
*/
final string getFlagString() {
// For `/foo/i`, there should be an `/i` token in the database with `this`
// as its parents. Strip the delimiter, which can vary.
result =
max(Ruby::Token t | t.getParent() = g | t.getValue().suffix(1) order by t.getParentIndex())
}
/**
* Holds if the regexp was specified using the `i` flag to indicate case
* insensitivity, as in the following example:
*
* ```rb
* /foo/i
* ```
*/
final predicate hasCaseInsensitiveFlag() { this.getFlagString().charAt(_) = "i" }
/**
* Holds if the regex was specified using the `m` flag to indicate multiline
* mode. For example:
*
* ```rb
* /foo/m
* ```
*/
final predicate hasMultilineFlag() { this.getFlagString().charAt(_) = "m" }
/**
* Holds if the regex was specified using the `x` flag to indicate
* 'free-spacing' mode (also known as 'extended' mode), meaning that
* whitespace and comments in the pattern are ignored. For example:
*
* ```rb
* %r{
* [a-zA-Z_] # starts with a letter or underscore
* \w* # and then zero or more letters/digits/underscores
* }/x
* ```
*/
final predicate hasFreeSpacingFlag() { this.getFlagString().charAt(_) = "x" }
/** Returns the root node of the parse tree of this regular expression. */
final RETV::RegExpTerm getParsed() { result = RETV::getParsedRegExp(this) }
}
/**
* A symbol literal.
*
* ```rb
* :foo
* :"foo bar"
* :"foo bar #{baz}"
* ```
*/
class SymbolLiteral extends StringlikeLiteral, TSymbolLiteral {
final override string getAPrimaryQlClass() {
not this instanceof MethodName and result = "SymbolLiteral"
}
}
private class SimpleSymbolLiteral extends SymbolLiteral, TSimpleSymbolLiteral {
private Ruby::SimpleSymbol g;
SimpleSymbolLiteral() { this = TSimpleSymbolLiteral(g) }
// Tree-sitter gives us value text including the colon, which we skip.
final override string getValueText() { result = g.getValue().suffix(1) }
final override string toString() { result = g.getValue() }
}
private class ComplexSymbolLiteral extends SymbolLiteral, TComplexSymbolLiteral { }
private class DelimitedSymbolLiteral extends ComplexSymbolLiteral, TDelimitedSymbolLiteral {
private Ruby::DelimitedSymbol g;
DelimitedSymbolLiteral() { this = TDelimitedSymbolLiteral(g) }
final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
}
private class BareSymbolLiteral extends ComplexSymbolLiteral, TBareSymbolLiteral {
private Ruby::BareSymbol g;
BareSymbolLiteral() { this = TBareSymbolLiteral(g) }
final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
}
private class HashKeySymbolLiteral extends SymbolLiteral, THashKeySymbolLiteral {
private Ruby::HashKeySymbol g;
HashKeySymbolLiteral() { this = THashKeySymbolLiteral(g) }
final override string getValueText() { result = g.getValue() }
final override string toString() { result = ":" + this.getValueText() }
}
/**
* A subshell literal.
*
* ```rb
* `ls -l`
* %x(/bin/sh foo.sh)
* ```
*/
class SubshellLiteral extends StringlikeLiteral, TSubshellLiteral {
private Ruby::Subshell g;
SubshellLiteral() { this = TSubshellLiteral(g) }
final override string getAPrimaryQlClass() { result = "SubshellLiteral" }
final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) }
}
/**
* A character literal.
*
* ```rb
* ?a
* ?\u{61}
* ```
*/
class CharacterLiteral extends Literal, TCharacterLiteral {
private Ruby::Character g;
CharacterLiteral() { this = TCharacterLiteral(g) }
final override string getValueText() { result = g.getValue() }
final override string toString() { result = g.getValue() }
final override string getAPrimaryQlClass() { result = "CharacterLiteral" }
}
/**
* A "here document". For example:
* ```rb
* query = <<SQL
* SELECT * FROM person
* WHERE age > 21
* SQL
* ```
*/
class HereDoc extends StringlikeLiteral, THereDoc {
private Ruby::HeredocBeginning g;
HereDoc() { this = THereDoc(g) }
final override string getAPrimaryQlClass() { result = "HereDoc" }
/**
* Holds if this here document is executed in a subshell.
* ```rb
* <<`COMMAND`
* echo "Hello world!"
* COMMAND
* ```
*/
final predicate isSubShell() { getQuoteStyle() = "`" }
/**
* Gets the quotation mark (`"`, `'` or `` ` ``) that surrounds the here document identifier, if any.
* ```rb
* <<"IDENTIFIER"
* <<'IDENTIFIER'
* <<`IDENTIFIER`
* ```
*/
final string getQuoteStyle() {
exists(string s |
s = g.getValue() and
s.charAt(s.length() - 1) = result and
result = ["'", "`", "\""]
)
}
/**
* Gets the indentation modifier (`-` or `~`) of the here document identifier, if any.
* ```rb
* <<~IDENTIFIER
* <<-IDENTIFIER
* <<IDENTIFIER
* ```
*/
final string getIndentationModifier() {
exists(string s |
s = g.getValue() and
s.charAt(2) = result and
result = ["-", "~"]
)
}
final override StringComponent getComponent(int n) {
toGenerated(result) = getHereDocBody(g).getChild(n)
}
final override string toString() { result = g.getValue() }
}
/**
* An array literal.
*
* ```rb
* [123, 'foo', bar()]
* %w(foo bar)
* %i(foo bar)
* ```
*/
class ArrayLiteral extends Literal, TArrayLiteral {
final override string getAPrimaryQlClass() { result = "ArrayLiteral" }
/** Gets the `n`th element in this array literal. */
final Expr getElement(int n) { result = this.(ArrayLiteralImpl).getElementImpl(n) }
/** Gets an element in this array literal. */
final Expr getAnElement() { result = this.getElement(_) }
/** Gets the number of elements in this array literal. */
final int getNumberOfElements() { result = this.(ArrayLiteralImpl).getNumberOfElementsImpl() }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getElement" and result = this.getElement(_)
}
}
abstract private class ArrayLiteralImpl extends ArrayLiteral {
abstract Expr getElementImpl(int n);
abstract int getNumberOfElementsImpl();
}
private class RegularArrayLiteral extends ArrayLiteralImpl, TRegularArrayLiteral {
private Ruby::Array g;
RegularArrayLiteral() { this = TRegularArrayLiteral(g) }
final override Expr getElementImpl(int i) { toGenerated(result) = g.getChild(i) }
final override int getNumberOfElementsImpl() { result = count(g.getChild(_)) }
final override string toString() { result = "[...]" }
}
private class StringArrayLiteral extends ArrayLiteralImpl, TStringArrayLiteral {
private Ruby::StringArray g;
StringArrayLiteral() { this = TStringArrayLiteral(g) }
final override Expr getElementImpl(int i) { toGenerated(result) = g.getChild(i) }
final override int getNumberOfElementsImpl() { result = count(g.getChild(_)) }
final override string toString() { result = "%w(...)" }
}
private class SymbolArrayLiteral extends ArrayLiteralImpl, TSymbolArrayLiteral {
private Ruby::SymbolArray g;
SymbolArrayLiteral() { this = TSymbolArrayLiteral(g) }
final override Expr getElementImpl(int i) { toGenerated(result) = g.getChild(i) }
final override int getNumberOfElementsImpl() { result = count(g.getChild(_)) }
final override string toString() { result = "%i(...)" }
}
/**
* A hash literal.
*
* ```rb
* { foo: 123, bar: 456 }
* ```
*/
class HashLiteral extends Literal, THashLiteral {
private Ruby::Hash g;
HashLiteral() { this = THashLiteral(g) }
final override string getAPrimaryQlClass() { result = "HashLiteral" }
/**
* Gets the `n`th element in this hash literal.
*
* In the following example, the 0th element is a `Pair`, and the 1st element
* is a `HashSplatExpr`.
*
* ```rb
* { foo: 123, **bar }
* ```
*/
final Expr getElement(int n) { toGenerated(result) = g.getChild(n) }
/** Gets an element in this hash literal. */
final Expr getAnElement() { result = this.getElement(_) }
/** Gets a key-value `Pair` in this hash literal. */
final Pair getAKeyValuePair() { result = this.getAnElement() }
/** Gets the number of elements in this hash literal. */
final int getNumberOfElements() { result = count(this.getAnElement()) }
final override string toString() { result = "{...}" }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getElement" and result = this.getElement(_)
}
}
/**
* A range literal.
*
* ```rb
* (1..10)
* (1024...2048)
* ```
*/
class RangeLiteral extends Literal, TRangeLiteral {
final override string getAPrimaryQlClass() { result = "RangeLiteral" }
/** Gets the begin expression of this range, if any. */
Expr getBegin() { none() }
/** Gets the end expression of this range, if any. */
Expr getEnd() { none() }
/**
* Holds if the range is inclusive of the end value, i.e. uses the `..`
* operator.
*/
predicate isInclusive() { none() }
/**
* Holds if the range is exclusive of the end value, i.e. uses the `...`
* operator.
*/
predicate isExclusive() { none() }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getBegin" and result = this.getBegin()
or
pred = "getEnd" and result = this.getEnd()
}
final override string toString() {
exists(string op |
this.isInclusive() and op = ".."
or
this.isExclusive() and op = "..."
|
result = "_ " + op + " _"
)
}
}
private class RangeLiteralReal extends RangeLiteral, TRangeLiteralReal {
private Ruby::Range g;
RangeLiteralReal() { this = TRangeLiteralReal(g) }
final override Expr getBegin() { toGenerated(result) = g.getBegin() }
final override Expr getEnd() { toGenerated(result) = g.getEnd() }
final override predicate isInclusive() { g instanceof @ruby_range_dotdot }
final override predicate isExclusive() { g instanceof @ruby_range_dotdotdot }
}
private class RangeLiteralSynth extends RangeLiteral, TRangeLiteralSynth {
private boolean inclusive;
RangeLiteralSynth() { this = TRangeLiteralSynth(_, _, inclusive) }
final override Expr getBegin() { result = TIntegerLiteralSynth(this, 0, _) }
final override Expr getEnd() { result = TIntegerLiteralSynth(this, 1, _) }
final override predicate isInclusive() { inclusive = true }
final override predicate isExclusive() { inclusive = false }
}
/**
* A method name literal. For example:
* ```rb
* method_name # a normal name
* + # an operator
* :method_name # a symbol
* :"eval_#{name}" # a complex symbol
* ```
*/
class MethodName extends Literal {
MethodName() { MethodName::range(toGenerated(this)) }
final override string getAPrimaryQlClass() { result = "MethodName" }
}
private class TokenMethodName extends MethodName, TTokenMethodName {
private MethodName::Token g;
TokenMethodName() { this = TTokenMethodName(g) }
final override string getValueText() {
result = g.(Ruby::Token).getValue()
or
result = g.(Ruby::Setter).getName().getValue() + "="
}
final override string toString() { result = this.getValueText() }
}

View File

@@ -0,0 +1,228 @@
private import codeql.ruby.AST
private import codeql.ruby.controlflow.ControlFlowGraph
private import internal.AST
private import internal.TreeSitter
/** A callable. */
class Callable extends Expr, Scope, TCallable {
/** Gets the number of parameters of this callable. */
final int getNumberOfParameters() { result = count(this.getAParameter()) }
/** Gets a parameter of this callable. */
final Parameter getAParameter() { result = this.getParameter(_) }
/** Gets the `n`th parameter of this callable. */
Parameter getParameter(int n) { none() }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getParameter" and result = this.getParameter(_)
}
}
/** A method. */
class MethodBase extends Callable, BodyStmt, Scope, TMethodBase {
/** Gets the name of this method. */
string getName() { none() }
/** Holds if the name of this method is `name`. */
final predicate hasName(string name) { this.getName() = name }
override AstNode getAChild(string pred) {
result = Callable.super.getAChild(pred)
or
result = BodyStmt.super.getAChild(pred)
}
}
/** A call to `private`. */
private class Private extends MethodCall {
Private() { this.getMethodName() = "private" }
}
/** A normal method. */
class Method extends MethodBase, TMethod {
private Ruby::Method g;
Method() { this = TMethod(g) }
final override string getAPrimaryQlClass() { result = "Method" }
final override string getName() {
result = g.getName().(Ruby::Token).getValue() or
result = g.getName().(Ruby::Setter).getName().getValue() + "="
}
/**
* Holds if this is a setter method, as in the following example:
* ```rb
* class Person
* def name=(n)
* @name = n
* end
* end
* ```
*/
final predicate isSetter() { g.getName() instanceof Ruby::Setter }
/**
* Holds if this method is private. All methods with the name prefix
* `private` are private below:
*
* ```rb
* class C
* private def private1
* end
*
* def public
* end
*
* def private2
* end
* private :private2
*
* private
*
* def private3
* end
*
* def private4
* end
* end
* ```
*/
predicate isPrivate() {
this = any(Private p).getArgument(0)
or
exists(ClassDeclaration c, Private p, SymbolLiteral s |
p.getArgument(0) = s and
p = c.getAStmt() and
this.getName() = s.getValueText() and
this = c.getAStmt()
)
or
exists(ClassDeclaration c, int i, int j |
c.getStmt(i).(Private).getNumberOfArguments() = 0 and
this = c.getStmt(j) and
j > i
)
or
// Top-level methods are private members of the Object class
this.getEnclosingModule() instanceof Toplevel
}
final override Parameter getParameter(int n) {
toGenerated(result) = g.getParameters().getChild(n)
}
final override string toString() { result = this.getName() }
}
/** A singleton method. */
class SingletonMethod extends MethodBase, TSingletonMethod {
private Ruby::SingletonMethod g;
SingletonMethod() { this = TSingletonMethod(g) }
final override string getAPrimaryQlClass() { result = "SingletonMethod" }
/** Gets the object of this singleton method. */
final Expr getObject() { toGenerated(result) = g.getObject() }
final override string getName() {
result = g.getName().(Ruby::Token).getValue()
or
result = g.getName().(Ruby::Setter).getName().getValue() + "="
}
final override Parameter getParameter(int n) {
toGenerated(result) = g.getParameters().getChild(n)
}
final override string toString() { result = this.getName() }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getObject" and result = this.getObject()
}
}
/**
* A lambda (anonymous method). For example:
* ```rb
* -> (x) { x + 1 }
* ```
*/
class Lambda extends Callable, BodyStmt, TLambda {
private Ruby::Lambda g;
Lambda() { this = TLambda(g) }
final override string getAPrimaryQlClass() { result = "Lambda" }
final override Parameter getParameter(int n) {
toGenerated(result) = g.getParameters().getChild(n)
}
final override string toString() { result = "-> { ... }" }
final override AstNode getAChild(string pred) {
result = Callable.super.getAChild(pred)
or
result = BodyStmt.super.getAChild(pred)
}
}
/** A block. */
class Block extends Callable, StmtSequence, Scope, TBlock {
override AstNode getAChild(string pred) {
result = Callable.super.getAChild(pred)
or
result = StmtSequence.super.getAChild(pred)
}
}
/** A block enclosed within `do` and `end`. */
class DoBlock extends Block, BodyStmt, TDoBlock {
private Ruby::DoBlock g;
DoBlock() { this = TDoBlock(g) }
final override Parameter getParameter(int n) {
toGenerated(result) = g.getParameters().getChild(n)
}
final override string toString() { result = "do ... end" }
final override AstNode getAChild(string pred) {
result = Block.super.getAChild(pred)
or
result = BodyStmt.super.getAChild(pred)
}
final override string getAPrimaryQlClass() { result = "DoBlock" }
}
/**
* A block defined using curly braces, e.g. in the following code:
* ```rb
* names.each { |name| puts name }
* ```
*/
class BraceBlock extends Block, TBraceBlock {
private Ruby::Block g;
BraceBlock() { this = TBraceBlock(g) }
final override Parameter getParameter(int n) {
toGenerated(result) = g.getParameters().getChild(n)
}
final override Stmt getStmt(int i) { toGenerated(result) = g.getChild(i) }
final override string toString() { result = "{ ... }" }
final override string getAPrimaryQlClass() { result = "BraceBlock" }
}

View File

@@ -0,0 +1,365 @@
private import codeql.ruby.AST
private import codeql.ruby.ast.Constant
private import internal.AST
private import internal.Module
private import internal.TreeSitter
/**
* A representation of a run-time `module` or `class` value.
*/
class Module extends TModule {
/** Gets a declaration of this module, if any. */
ModuleBase getADeclaration() { result.getModule() = this }
/** Gets the super class of this module, if any. */
Module getSuperClass() { result = getSuperClass(this) }
/** Gets a `prepend`ed module. */
Module getAPrependedModule() { result = getAPrependedModule(this) }
/** Gets an `include`d module. */
Module getAnIncludedModule() { result = getAnIncludedModule(this) }
/** Holds if this module is a class. */
pragma[noinline]
predicate isClass() { this.getADeclaration() instanceof ClassDeclaration }
/** Gets a textual representation of this module. */
string toString() {
this = TResolved(result)
or
exists(Namespace n | this = TUnresolved(n) and result = "...::" + n.toString())
}
/** Gets the location of this module. */
Location getLocation() {
exists(Namespace n | this = TUnresolved(n) and result = n.getLocation())
or
result =
min(Namespace n, string qName, Location loc, int weight |
this = TResolved(qName) and
qName = namespaceDeclaration(n) and
loc = n.getLocation() and
if exists(loc.getFile().getRelativePath()) then weight = 0 else weight = 1
|
loc
order by
weight, count(n.getAStmt()) desc, loc.getFile().getAbsolutePath(), loc.getStartLine(),
loc.getStartColumn()
)
}
}
/**
* The base class for classes, singleton classes, and modules.
*/
class ModuleBase extends BodyStmt, Scope, TModuleBase {
/** Gets a method defined in this module/class. */
MethodBase getAMethod() { result = this.getAStmt() }
/** Gets the method named `name` in this module/class, if any. */
MethodBase getMethod(string name) { result = this.getAMethod() and result.getName() = name }
/** Gets a class defined in this module/class. */
ClassDeclaration getAClass() { result = this.getAStmt() }
/** Gets the class named `name` in this module/class, if any. */
ClassDeclaration getClass(string name) { result = this.getAClass() and result.getName() = name }
/** Gets a module defined in this module/class. */
ModuleDeclaration getAModule() { result = this.getAStmt() }
/** Gets the module named `name` in this module/class, if any. */
ModuleDeclaration getModule(string name) {
result = this.getAModule() and result.getName() = name
}
/**
* Gets the value of the constant named `name`, if any.
*
* For example, the value of `CONST` is `"const"` in
* ```rb
* module M
* CONST = "const"
* end
* ```
*/
Expr getConstant(string name) {
exists(AssignExpr ae, ConstantWriteAccess w |
ae = this.getAStmt() and
w = ae.getLeftOperand() and
w.getName() = name and
not exists(w.getScopeExpr()) and
result = ae.getRightOperand()
)
}
/** Gets the representation of the run-time value of this module or class. */
Module getModule() { none() }
}
/**
* A Ruby source file.
*
* ```rb
* def main
* puts "hello world!"
* end
* main
* ```
*/
class Toplevel extends ModuleBase, TToplevel {
private Ruby::Program g;
Toplevel() { this = TToplevel(g) }
final override string getAPrimaryQlClass() { result = "Toplevel" }
/**
* Gets the `n`th `BEGIN` block.
*/
final BeginBlock getBeginBlock(int n) {
toGenerated(result) = rank[n + 1](int i, Ruby::BeginBlock b | b = g.getChild(i) | b order by i)
}
/**
* Gets a `BEGIN` block.
*/
final BeginBlock getABeginBlock() { result = getBeginBlock(_) }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getBeginBlock" and result = this.getBeginBlock(_)
}
final override Module getModule() { result = TResolved("Object") }
final override string toString() { result = g.getLocation().getFile().getBaseName() }
}
/**
* A class or module definition.
*
* ```rb
* class Foo
* def bar
* end
* end
* module Bar
* class Baz
* end
* end
* ```
*/
class Namespace extends ModuleBase, ConstantWriteAccess, TNamespace {
override string getAPrimaryQlClass() { result = "Namespace" }
/**
* Gets the name of the module/class. In the following example, the result is
* `"Foo"`.
* ```rb
* class Foo
* end
* ```
*
* N.B. in the following example, where the module/class name uses the scope
* resolution operator, the result is the name being resolved, i.e. `"Bar"`.
* Use `getScopeExpr` to get the `Foo` for `Foo`.
* ```rb
* module Foo::Bar
* end
* ```
*/
override string getName() { none() }
/**
* Gets the scope expression used in the module/class name's scope resolution
* operation, if any.
*
* In the following example, the result is the `Expr` for `Foo`.
*
* ```rb
* module Foo::Bar
* end
* ```
*
* However, there is no result for the following example, since there is no
* scope resolution operation.
*
* ```rb
* module Baz
* end
* ```
*/
override Expr getScopeExpr() { none() }
/**
* Holds if the module/class name uses the scope resolution operator to access the
* global scope, as in this example:
*
* ```rb
* class ::Foo
* end
* ```
*/
override predicate hasGlobalScope() { none() }
final override Module getModule() {
result = any(string qName | qName = namespaceDeclaration(this) | TResolved(qName))
or
result = TUnresolved(this)
}
override AstNode getAChild(string pred) {
result = ModuleBase.super.getAChild(pred) or
result = ConstantWriteAccess.super.getAChild(pred)
}
final override string toString() { result = ConstantWriteAccess.super.toString() }
}
/**
* A class definition.
*
* ```rb
* class Foo
* def bar
* end
* end
* ```
*/
class ClassDeclaration extends Namespace, TClassDeclaration {
private Ruby::Class g;
ClassDeclaration() { this = TClassDeclaration(g) }
final override string getAPrimaryQlClass() { result = "ClassDeclaration" }
/**
* Gets the `Expr` used as the superclass in the class definition, if any.
*
* In the following example, the result is a `ConstantReadAccess`.
* ```rb
* class Foo < Bar
* end
* ```
*
* In the following example, where the superclass is a call expression, the
* result is a `Call`.
* ```rb
* class C < foo()
* end
* ```
*/
final Expr getSuperclassExpr() { toGenerated(result) = g.getSuperclass().getChild() }
final override string getName() {
result = g.getName().(Ruby::Token).getValue() or
result = g.getName().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue()
}
final override Expr getScopeExpr() {
toGenerated(result) = g.getName().(Ruby::ScopeResolution).getScope()
}
final override predicate hasGlobalScope() {
exists(Ruby::ScopeResolution sr |
sr = g.getName() and
not exists(sr.getScope())
)
}
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getSuperclassExpr" and result = this.getSuperclassExpr()
}
}
/**
* A definition of a singleton class on an object.
*
* ```rb
* class << foo
* def bar
* p 'bar'
* end
* end
* ```
*/
class SingletonClass extends ModuleBase, TSingletonClass {
private Ruby::SingletonClass g;
SingletonClass() { this = TSingletonClass(g) }
final override string getAPrimaryQlClass() { result = "SingletonClass" }
/**
* Gets the expression resulting in the object on which the singleton class
* is defined. In the following example, the result is the `Expr` for `foo`:
*
* ```rb
* class << foo
* end
* ```
*/
final Expr getValue() { toGenerated(result) = g.getValue() }
final override string toString() { result = "class << ..." }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getValue" and result = this.getValue()
}
}
/**
* A module definition.
*
* ```rb
* module Foo
* class Bar
* end
* end
* ```
*
* N.B. this class represents a single instance of a module definition. In the
* following example, classes `Bar` and `Baz` are both defined in the module
* `Foo`, but in two syntactically distinct definitions, meaning that there
* will be two instances of `ModuleDeclaration` in the database.
*
* ```rb
* module Foo
* class Bar; end
* end
*
* module Foo
* class Baz; end
* end
* ```
*/
class ModuleDeclaration extends Namespace, TModuleDeclaration {
private Ruby::Module g;
ModuleDeclaration() { this = TModuleDeclaration(g) }
final override string getAPrimaryQlClass() { result = "ModuleDeclaration" }
final override string getName() {
result = g.getName().(Ruby::Token).getValue() or
result = g.getName().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue()
}
final override Expr getScopeExpr() {
toGenerated(result) = g.getName().(Ruby::ScopeResolution).getScope()
}
final override predicate hasGlobalScope() {
exists(Ruby::ScopeResolution sr |
sr = g.getName() and
not exists(sr.getScope())
)
}
}

View File

@@ -0,0 +1,620 @@
private import codeql.ruby.AST
private import internal.AST
private import internal.TreeSitter
private import internal.Operation
/**
* An operation.
*
* This is the QL root class for all operations.
*/
class Operation extends Expr instanceof OperationImpl {
/** Gets the operator of this operation. */
final string getOperator() { result = super.getOperatorImpl() }
/** Gets an operand of this operation. */
final Expr getAnOperand() { result = super.getAnOperandImpl() }
override AstNode getAChild(string pred) {
result = Expr.super.getAChild(pred)
or
pred = "getAnOperand" and result = this.getAnOperand()
}
}
/** A unary operation. */
class UnaryOperation extends Operation, MethodCall instanceof UnaryOperationImpl {
/** Gets the operand of this unary operation. */
final Expr getOperand() { result = super.getOperandImpl() }
final override AstNode getAChild(string pred) {
result = Operation.super.getAChild(pred)
or
result = MethodCall.super.getAChild(pred)
or
pred = "getOperand" and result = this.getOperand()
}
final override string toString() { result = this.getOperator() + " ..." }
}
/** A unary logical operation. */
class UnaryLogicalOperation extends UnaryOperation, TUnaryLogicalOperation { }
/**
* A logical NOT operation, using either `!` or `not`.
* ```rb
* !x.nil?
* not params.empty?
* ```
*/
class NotExpr extends UnaryLogicalOperation, TNotExpr {
final override string getAPrimaryQlClass() { result = "NotExpr" }
}
/** A unary arithmetic operation. */
class UnaryArithmeticOperation extends UnaryOperation, TUnaryArithmeticOperation { }
/**
* A unary plus expression.
* ```rb
* + a
* ```
*/
class UnaryPlusExpr extends UnaryArithmeticOperation, TUnaryPlusExpr {
final override string getAPrimaryQlClass() { result = "UnaryPlusExpr" }
}
/**
* A unary minus expression.
* ```rb
* - a
* ```
*/
class UnaryMinusExpr extends UnaryArithmeticOperation, TUnaryMinusExpr {
final override string getAPrimaryQlClass() { result = "UnaryMinusExpr" }
}
/**
* A splat expression.
* ```rb
* foo(*args)
* ```
*/
class SplatExpr extends UnaryOperation, TSplatExpr {
final override string getAPrimaryQlClass() { result = "SplatExpr" }
}
/**
* A hash-splat (or 'double-splat') expression.
* ```rb
* foo(**options)
* ```
*/
class HashSplatExpr extends UnaryOperation, THashSplatExpr {
private Ruby::HashSplatArgument g;
HashSplatExpr() { this = THashSplatExpr(g) }
final override string getAPrimaryQlClass() { result = "HashSplatExpr" }
}
/** A unary bitwise operation. */
class UnaryBitwiseOperation extends UnaryOperation, TUnaryBitwiseOperation { }
/**
* A complement (bitwise NOT) expression.
* ```rb
* ~x
* ```
*/
class ComplementExpr extends UnaryBitwiseOperation, TComplementExpr {
final override string getAPrimaryQlClass() { result = "ComplementExpr" }
}
/**
* A call to the special `defined?` operator.
* ```rb
* defined? some_method
* ```
*/
class DefinedExpr extends UnaryOperation, TDefinedExpr {
final override string getAPrimaryQlClass() { result = "DefinedExpr" }
}
/** A binary operation. */
class BinaryOperation extends Operation, MethodCall instanceof BinaryOperationImpl {
final override string toString() { result = "... " + this.getOperator() + " ..." }
override AstNode getAChild(string pred) {
result = Operation.super.getAChild(pred)
or
result = MethodCall.super.getAChild(pred)
or
pred = "getLeftOperand" and result = this.getLeftOperand()
or
pred = "getRightOperand" and result = this.getRightOperand()
}
/** Gets the left operand of this binary operation. */
final Stmt getLeftOperand() { result = super.getLeftOperandImpl() }
/** Gets the right operand of this binary operation. */
final Stmt getRightOperand() { result = super.getRightOperandImpl() }
}
/**
* A binary arithmetic operation.
*/
class BinaryArithmeticOperation extends BinaryOperation, TBinaryArithmeticOperation { }
/**
* An add expression.
* ```rb
* x + 1
* ```
*/
class AddExpr extends BinaryArithmeticOperation, TAddExpr {
final override string getAPrimaryQlClass() { result = "AddExpr" }
}
/**
* A subtract expression.
* ```rb
* x - 3
* ```
*/
class SubExpr extends BinaryArithmeticOperation, TSubExpr {
final override string getAPrimaryQlClass() { result = "SubExpr" }
}
/**
* A multiply expression.
* ```rb
* x * 10
* ```
*/
class MulExpr extends BinaryArithmeticOperation, TMulExpr {
final override string getAPrimaryQlClass() { result = "MulExpr" }
}
/**
* A divide expression.
* ```rb
* x / y
* ```
*/
class DivExpr extends BinaryArithmeticOperation, TDivExpr {
final override string getAPrimaryQlClass() { result = "DivExpr" }
}
/**
* A modulo expression.
* ```rb
* x % 2
* ```
*/
class ModuloExpr extends BinaryArithmeticOperation, TModuloExpr {
final override string getAPrimaryQlClass() { result = "ModuloExpr" }
}
/**
* An exponent expression.
* ```rb
* x ** 2
* ```
*/
class ExponentExpr extends BinaryArithmeticOperation, TExponentExpr {
final override string getAPrimaryQlClass() { result = "ExponentExpr" }
}
/**
* A binary logical operation.
*/
class BinaryLogicalOperation extends BinaryOperation, TBinaryLogicalOperation { }
/**
* A logical AND operation, using either `and` or `&&`.
* ```rb
* x and y
* a && b
* ```
*/
class LogicalAndExpr extends BinaryLogicalOperation, TLogicalAndExpr {
final override string getAPrimaryQlClass() { result = "LogicalAndExpr" }
}
/**
* A logical OR operation, using either `or` or `||`.
* ```rb
* x or y
* a || b
* ```
*/
class LogicalOrExpr extends BinaryLogicalOperation, TLogicalOrExpr {
final override string getAPrimaryQlClass() { result = "LogicalOrExpr" }
}
/**
* A binary bitwise operation.
*/
class BinaryBitwiseOperation extends BinaryOperation, TBinaryBitwiseOperation { }
/**
* A left-shift operation.
* ```rb
* x << n
* ```
*/
class LShiftExpr extends BinaryBitwiseOperation, TLShiftExpr {
final override string getAPrimaryQlClass() { result = "LShiftExpr" }
}
/**
* A right-shift operation.
* ```rb
* x >> n
* ```
*/
class RShiftExpr extends BinaryBitwiseOperation, TRShiftExpr {
final override string getAPrimaryQlClass() { result = "RShiftExpr" }
}
/**
* A bitwise AND operation.
* ```rb
* x & 0xff
* ```
*/
class BitwiseAndExpr extends BinaryBitwiseOperation, TBitwiseAndExpr {
final override string getAPrimaryQlClass() { result = "BitwiseAndExpr" }
}
/**
* A bitwise OR operation.
* ```rb
* x | 0x01
* ```
*/
class BitwiseOrExpr extends BinaryBitwiseOperation, TBitwiseOrExpr {
final override string getAPrimaryQlClass() { result = "BitwiseOrExpr" }
}
/**
* An XOR (exclusive OR) operation.
* ```rb
* x ^ y
* ```
*/
class BitwiseXorExpr extends BinaryBitwiseOperation, TBitwiseXorExpr {
final override string getAPrimaryQlClass() { result = "BitwiseXorExpr" }
}
/**
* A comparison operation. That is, either an equality operation or a
* relational operation.
*/
class ComparisonOperation extends BinaryOperation, TComparisonOperation { }
/**
* An equality operation.
*/
class EqualityOperation extends ComparisonOperation, TEqualityOperation { }
/**
* An equals expression.
* ```rb
* x == y
* ```
*/
class EqExpr extends EqualityOperation, TEqExpr {
final override string getAPrimaryQlClass() { result = "EqExpr" }
}
/**
* A not-equals expression.
* ```rb
* x != y
* ```
*/
class NEExpr extends EqualityOperation, TNEExpr {
final override string getAPrimaryQlClass() { result = "NEExpr" }
}
/**
* A case-equality (or 'threequals') expression.
* ```rb
* String === "foo"
* ```
*/
class CaseEqExpr extends EqualityOperation, TCaseEqExpr {
final override string getAPrimaryQlClass() { result = "CaseEqExpr" }
}
/**
* A relational operation, that is, one of `<=`, `<`, `>`, or `>=`.
*/
class RelationalOperation extends ComparisonOperation, TRelationalOperation {
/** Gets the greater operand. */
Expr getGreaterOperand() { none() }
/** Gets the lesser operand. */
Expr getLesserOperand() { none() }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getGreaterOperand" and result = this.getGreaterOperand()
or
pred = "getLesserOperand" and result = this.getLesserOperand()
}
}
/**
* A greater-than expression.
* ```rb
* x > 0
* ```
*/
class GTExpr extends RelationalOperation, TGTExpr {
final override string getAPrimaryQlClass() { result = "GTExpr" }
final override Expr getGreaterOperand() { result = this.getLeftOperand() }
final override Expr getLesserOperand() { result = this.getRightOperand() }
}
/**
* A greater-than-or-equal expression.
* ```rb
* x >= 0
* ```
*/
class GEExpr extends RelationalOperation, TGEExpr {
final override string getAPrimaryQlClass() { result = "GEExpr" }
final override Expr getGreaterOperand() { result = this.getLeftOperand() }
final override Expr getLesserOperand() { result = this.getRightOperand() }
}
/**
* A less-than expression.
* ```rb
* x < 10
* ```
*/
class LTExpr extends RelationalOperation, TLTExpr {
final override string getAPrimaryQlClass() { result = "LTExpr" }
final override Expr getGreaterOperand() { result = this.getRightOperand() }
final override Expr getLesserOperand() { result = this.getLeftOperand() }
}
/**
* A less-than-or-equal expression.
* ```rb
* x <= 10
* ```
*/
class LEExpr extends RelationalOperation, TLEExpr {
final override string getAPrimaryQlClass() { result = "LEExpr" }
final override Expr getGreaterOperand() { result = this.getRightOperand() }
final override Expr getLesserOperand() { result = this.getLeftOperand() }
}
/**
* A three-way comparison ('spaceship') expression.
* ```rb
* a <=> b
* ```
*/
class SpaceshipExpr extends BinaryOperation, TSpaceshipExpr {
final override string getAPrimaryQlClass() { result = "SpaceshipExpr" }
}
/**
* A regexp match expression.
* ```rb
* input =~ /\d/
* ```
*/
class RegExpMatchExpr extends BinaryOperation, TRegExpMatchExpr {
final override string getAPrimaryQlClass() { result = "RegExpMatchExpr" }
}
/**
* A regexp-doesn't-match expression.
* ```rb
* input !~ /\d/
* ```
*/
class NoRegExpMatchExpr extends BinaryOperation, TNoRegExpMatchExpr {
final override string getAPrimaryQlClass() { result = "NoRegExpMatchExpr" }
}
/**
* A binary assignment operation, including `=`, `+=`, `&=`, etc.
*
* This is a QL base class for all assignments.
*/
class Assignment extends Operation instanceof AssignmentImpl {
/** Gets the left hand side of this assignment. */
final Pattern getLeftOperand() { result = super.getLeftOperandImpl() }
/** Gets the right hand side of this assignment. */
final Expr getRightOperand() { result = super.getRightOperandImpl() }
final override string toString() { result = "... " + this.getOperator() + " ..." }
override AstNode getAChild(string pred) {
result = Operation.super.getAChild(pred)
or
pred = "getLeftOperand" and result = getLeftOperand()
or
pred = "getRightOperand" and result = getRightOperand()
}
}
/**
* An assignment operation with the operator `=`.
* ```rb
* x = 123
* ```
*/
class AssignExpr extends Assignment, TAssignExpr {
final override string getAPrimaryQlClass() { result = "AssignExpr" }
}
/**
* A binary assignment operation other than `=`.
*/
class AssignOperation extends Assignment instanceof AssignOperationImpl { }
/**
* An arithmetic assignment operation: `+=`, `-=`, `*=`, `/=`, `**=`, and `%=`.
*/
class AssignArithmeticOperation extends AssignOperation, TAssignArithmeticOperation { }
/**
* A `+=` assignment expression.
* ```rb
* x += 1
* ```
*/
class AssignAddExpr extends AssignArithmeticOperation, TAssignAddExpr {
final override string getAPrimaryQlClass() { result = "AssignAddExpr" }
}
/**
* A `-=` assignment expression.
* ```rb
* x -= 3
* ```
*/
class AssignSubExpr extends AssignArithmeticOperation, TAssignSubExpr {
final override string getAPrimaryQlClass() { result = "AssignSubExpr" }
}
/**
* A `*=` assignment expression.
* ```rb
* x *= 10
* ```
*/
class AssignMulExpr extends AssignArithmeticOperation, TAssignMulExpr {
final override string getAPrimaryQlClass() { result = "AssignMulExpr" }
}
/**
* A `/=` assignment expression.
* ```rb
* x /= y
* ```
*/
class AssignDivExpr extends AssignArithmeticOperation, TAssignDivExpr {
final override string getAPrimaryQlClass() { result = "AssignDivExpr" }
}
/**
* A `%=` assignment expression.
* ```rb
* x %= 4
* ```
*/
class AssignModuloExpr extends AssignArithmeticOperation, TAssignModuloExpr {
final override string getAPrimaryQlClass() { result = "AssignModuloExpr" }
}
/**
* A `**=` assignment expression.
* ```rb
* x **= 2
* ```
*/
class AssignExponentExpr extends AssignArithmeticOperation, TAssignExponentExpr {
final override string getAPrimaryQlClass() { result = "AssignExponentExpr" }
}
/**
* A logical assignment operation: `&&=` and `||=`.
*/
class AssignLogicalOperation extends AssignOperation, TAssignLogicalOperation { }
/**
* A logical AND assignment operation.
* ```rb
* x &&= y.even?
* ```
*/
class AssignLogicalAndExpr extends AssignLogicalOperation, TAssignLogicalAndExpr {
final override string getAPrimaryQlClass() { result = "AssignLogicalAndExpr" }
}
/**
* A logical OR assignment operation.
* ```rb
* x ||= y
* ```
*/
class AssignLogicalOrExpr extends AssignLogicalOperation, TAssignLogicalOrExpr {
final override string getAPrimaryQlClass() { result = "AssignLogicalOrExpr" }
}
/**
* A bitwise assignment operation: `<<=`, `>>=`, `&=`, `|=` and `^=`.
*/
class AssignBitwiseOperation extends AssignOperation, TAssignBitwiseOperation { }
/**
* A left-shift assignment operation.
* ```rb
* x <<= 3
* ```
*/
class AssignLShiftExpr extends AssignBitwiseOperation, TAssignLShiftExpr {
final override string getAPrimaryQlClass() { result = "AssignLShiftExpr" }
}
/**
* A right-shift assignment operation.
* ```rb
* x >>= 3
* ```
*/
class AssignRShiftExpr extends AssignBitwiseOperation, TAssignRShiftExpr {
final override string getAPrimaryQlClass() { result = "AssignRShiftExpr" }
}
/**
* A bitwise AND assignment operation.
* ```rb
* x &= 0xff
* ```
*/
class AssignBitwiseAndExpr extends AssignBitwiseOperation, TAssignBitwiseAndExpr {
final override string getAPrimaryQlClass() { result = "AssignBitwiseAndExpr" }
}
/**
* A bitwise OR assignment operation.
* ```rb
* x |= 0x01
* ```
*/
class AssignBitwiseOrExpr extends AssignBitwiseOperation, TAssignBitwiseOrExpr {
final override string getAPrimaryQlClass() { result = "AssignBitwiseOrExpr" }
}
/**
* An XOR (exclusive OR) assignment operation.
* ```rb
* x ^= y
* ```
*/
class AssignBitwiseXorExpr extends AssignBitwiseOperation, TAssignBitwiseXorExpr {
final override string getAPrimaryQlClass() { result = "AssignBitwiseXorExpr" }
}

View File

@@ -0,0 +1,248 @@
private import codeql.ruby.AST
private import internal.AST
private import internal.Variable
private import internal.Parameter
private import internal.TreeSitter
/** A parameter. */
class Parameter extends AstNode, TParameter {
/** Gets the callable that this parameter belongs to. */
final Callable getCallable() { result.getAParameter() = this }
/** Gets the zero-based position of this parameter. */
final int getPosition() { this = any(Callable c).getParameter(result) }
/** Gets a variable introduced by this parameter. */
LocalVariable getAVariable() { none() }
/** Gets the variable named `name` introduced by this parameter. */
final LocalVariable getVariable(string name) {
result = this.getAVariable() and
result.getName() = name
}
}
/**
* A parameter defined using a pattern.
*
* This includes both simple parameters and tuple parameters.
*/
class PatternParameter extends Parameter, Pattern, TPatternParameter {
override LocalVariable getAVariable() { result = Pattern.super.getAVariable() }
}
/** A parameter defined using a tuple pattern. */
class TuplePatternParameter extends PatternParameter, TuplePattern, TTuplePatternParameter {
final override LocalVariable getAVariable() { result = TuplePattern.super.getAVariable() }
final override string getAPrimaryQlClass() { result = "TuplePatternParameter" }
override AstNode getAChild(string pred) { result = TuplePattern.super.getAChild(pred) }
}
/** A named parameter. */
class NamedParameter extends Parameter, TNamedParameter {
/** Gets the name of this parameter. */
string getName() { none() }
/** Holds if the name of this parameter is `name`. */
final predicate hasName(string name) { this.getName() = name }
/** Gets the variable introduced by this parameter. */
LocalVariable getVariable() { none() }
override LocalVariable getAVariable() { result = this.getVariable() }
/** Gets an access to this parameter. */
final VariableAccess getAnAccess() { result = this.getVariable().getAnAccess() }
/** Gets the access that defines the underlying local variable. */
final VariableAccess getDefiningAccess() { result = this.getVariable().getDefiningAccess() }
override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getDefiningAccess" and
result = this.getDefiningAccess()
}
}
/** A simple (normal) parameter. */
class SimpleParameter extends NamedParameter, PatternParameter, VariablePattern, TSimpleParameter {
private Ruby::Identifier g;
SimpleParameter() { this = TSimpleParameter(g) }
final override string getName() { result = g.getValue() }
final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g) }
final override LocalVariable getAVariable() { result = this.getVariable() }
final override string getAPrimaryQlClass() { result = "SimpleParameter" }
final override string toString() { result = this.getName() }
}
/**
* A parameter that is a block. For example, `&bar` in the following code:
* ```rb
* def foo(&bar)
* bar.call if block_given?
* end
* ```
*/
class BlockParameter extends NamedParameter, TBlockParameter {
private Ruby::BlockParameter g;
BlockParameter() { this = TBlockParameter(g) }
final override string getName() { result = g.getName().getValue() }
final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
final override string toString() { result = "&" + this.getName() }
final override string getAPrimaryQlClass() { result = "BlockParameter" }
}
/**
* A hash-splat (or double-splat) parameter. For example, `**options` in the
* following code:
* ```rb
* def foo(bar, **options)
* ...
* end
* ```
*/
class HashSplatParameter extends NamedParameter, THashSplatParameter {
private Ruby::HashSplatParameter g;
HashSplatParameter() { this = THashSplatParameter(g) }
final override string getAPrimaryQlClass() { result = "HashSplatParameter" }
final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
final override string toString() { result = "**" + this.getName() }
final override string getName() { result = g.getName().getValue() }
}
/**
* A keyword parameter, including a default value if the parameter is optional.
* For example, in the following example, `foo` is a keyword parameter with a
* default value of `0`, and `bar` is a mandatory keyword parameter with no
* default value mandatory parameter).
* ```rb
* def f(foo: 0, bar:)
* foo * 10 + bar
* end
* ```
*/
class KeywordParameter extends NamedParameter, TKeywordParameter {
private Ruby::KeywordParameter g;
KeywordParameter() { this = TKeywordParameter(g) }
final override string getAPrimaryQlClass() { result = "KeywordParameter" }
final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
/**
* Gets the default value, i.e. the value assigned to the parameter when one
* is not provided by the caller. If the parameter is mandatory and does not
* have a default value, this predicate has no result.
*/
final Expr getDefaultValue() { toGenerated(result) = g.getValue() }
/**
* Holds if the parameter is optional. That is, there is a default value that
* is used when the caller omits this parameter.
*/
final predicate isOptional() { exists(this.getDefaultValue()) }
final override string toString() { result = this.getName() }
final override string getName() { result = g.getName().getValue() }
final override Location getLocation() { result = g.getName().getLocation() }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getDefaultValue" and result = this.getDefaultValue()
}
}
/**
* An optional parameter. For example, the parameter `name` in the following
* code:
* ```rb
* def say_hello(name = 'Anon')
* puts "hello #{name}"
* end
* ```
*/
class OptionalParameter extends NamedParameter, TOptionalParameter {
private Ruby::OptionalParameter g;
OptionalParameter() { this = TOptionalParameter(g) }
final override string getAPrimaryQlClass() { result = "OptionalParameter" }
/**
* Gets the default value, i.e. the value assigned to the parameter when one
* is not provided by the caller.
*/
final Expr getDefaultValue() { toGenerated(result) = g.getValue() }
final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
final override string toString() { result = this.getName() }
final override string getName() { result = g.getName().getValue() }
final override Location getLocation() { result = g.getName().getLocation() }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getDefaultValue" and result = this.getDefaultValue()
}
}
/**
* A splat parameter. For example, `*values` in the following code:
* ```rb
* def foo(bar, *values)
* ...
* end
* ```
*/
class SplatParameter extends NamedParameter, TSplatParameter {
private Ruby::SplatParameter g;
SplatParameter() { this = TSplatParameter(g) }
final override string getAPrimaryQlClass() { result = "SplatParameter" }
final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) }
final override string toString() { result = "*" + this.getName() }
final override string getName() { result = g.getName().getValue() }
}
/**
* A special `...` parameter that forwards positional/keyword/block arguments:
* ```rb
* def foo(...)
* end
* ```
*/
class ForwardParameter extends Parameter, TForwardParameter {
final override string getAPrimaryQlClass() { result = "ForwardParameter" }
final override string toString() { result = "..." }
}

View File

@@ -0,0 +1,96 @@
private import codeql.ruby.AST
private import codeql.Locations
private import internal.AST
private import internal.Pattern
private import internal.TreeSitter
private import internal.Variable
/** A pattern. */
class Pattern extends AstNode {
Pattern() {
explicitAssignmentNode(toGenerated(this), _)
or
implicitAssignmentNode(toGenerated(this))
or
implicitParameterAssignmentNode(toGenerated(this), _)
or
this = getSynthChild(any(AssignExpr ae), 0)
}
/** Gets a variable used in (or introduced by) this pattern. */
Variable getAVariable() { none() }
}
private class LhsExpr_ =
TVariableAccess or TTokenConstantAccess or TScopeResolutionConstantAccess or TMethodCall or
TSimpleParameter;
/**
* A "left-hand-side" expression. An `LhsExpr` can occur on the left-hand side of
* operator assignments (`AssignOperation`), in patterns (`Pattern`) on the left-hand side of
* an assignment (`AssignExpr`) or for loop (`ForExpr`), and as the exception
* variable of a `rescue` clause (`RescueClause`).
*
* An `LhsExpr` can be a simple variable, a constant, a call, or an element reference:
* ```rb
* var = 1
* var += 1
* E = 1
* foo.bar = 1
* foo[0] = 1
* rescue E => var
* ```
*/
class LhsExpr extends Pattern, LhsExpr_, Expr {
override Variable getAVariable() { result = this.(VariableAccess).getVariable() }
}
private class TVariablePattern = TVariableAccess or TSimpleParameter;
/** A simple variable pattern. */
class VariablePattern extends Pattern, LhsExpr, TVariablePattern { }
/**
* A tuple pattern.
*
* This includes both tuple patterns in parameters and assignments. Example patterns:
* ```rb
* a, self.b = value
* (a, b), c[3] = value
* a, b, *rest, c, d = value
* ```
*/
class TuplePattern extends Pattern, TTuplePattern {
override string getAPrimaryQlClass() { result = "TuplePattern" }
private TuplePatternImpl getImpl() { result = toGenerated(this) }
private Ruby::AstNode getChild(int i) { result = this.getImpl().getChildNode(i) }
/** Gets the `i`th pattern in this tuple pattern. */
final Pattern getElement(int i) {
exists(Ruby::AstNode c | c = this.getChild(i) |
toGenerated(result) = c.(Ruby::RestAssignment).getChild()
or
toGenerated(result) = c
)
}
/** Gets a sub pattern in this tuple pattern. */
final Pattern getAnElement() { result = this.getElement(_) }
/**
* Gets the index of the pattern with the `*` marker on it, if it exists.
* In the example below the index is `2`.
* ```rb
* a, b, *rest, c, d = value
* ```
*/
final int getRestIndex() { result = this.getImpl().getRestIndex() }
override Variable getAVariable() { result = this.getElement(_).getAVariable() }
override string toString() { result = "(..., ...)" }
override AstNode getAChild(string pred) { pred = "getElement" and result = getElement(_) }
}

View File

@@ -0,0 +1,22 @@
private import codeql.ruby.AST
private import internal.AST
private import internal.Scope
private import internal.TreeSitter
class Scope extends AstNode, TScopeType {
private Scope::Range range;
Scope() { range = toGenerated(this) }
/** Gets the scope in which this scope is nested, if any. */
Scope getOuterScope() { toGenerated(result) = range.getOuterScope() }
/** Gets a variable that is declared in this scope. */
final Variable getAVariable() { result.getDeclaringScope() = this }
/** Gets the variable declared in this scope with the given name, if any. */
final Variable getVariable(string name) {
result = this.getAVariable() and
result.getName() = name
}
}

View File

@@ -0,0 +1,248 @@
private import codeql.ruby.AST
private import codeql.ruby.CFG
private import internal.AST
private import internal.TreeSitter
private import internal.Variable
private import codeql.ruby.controlflow.internal.ControlFlowGraphImpl
/**
* A statement.
*
* This is the root QL class for all statements.
*/
class Stmt extends AstNode, TStmt {
/** Gets a control-flow node for this statement, if any. */
CfgNodes::AstCfgNode getAControlFlowNode() { result.getNode() = this }
/** Gets the control-flow scope of this statement, if any. */
CfgScope getCfgScope() { result = getCfgScope(this) }
/** Gets the enclosing callable, if any. */
Callable getEnclosingCallable() { result = this.getCfgScope() }
}
/**
* An empty statement (`;`).
*/
class EmptyStmt extends Stmt, TEmptyStmt {
final override string getAPrimaryQlClass() { result = "EmptyStmt" }
final override string toString() { result = ";" }
}
/**
* A `begin` statement.
* ```rb
* begin
* puts "hello world"
* end
* ```
*/
class BeginExpr extends BodyStmt, TBeginExpr {
final override string getAPrimaryQlClass() { result = "BeginExpr" }
final override string toString() { result = "begin ... " }
}
/**
* A `BEGIN` block.
* ```rb
* BEGIN { puts "starting ..." }
* ```
*/
class BeginBlock extends StmtSequence, TBeginBlock {
private Ruby::BeginBlock g;
BeginBlock() { this = TBeginBlock(g) }
final override string getAPrimaryQlClass() { result = "BeginBlock" }
final override string toString() { result = "BEGIN { ... }" }
final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
}
/**
* An `END` block.
* ```rb
* END { puts "shutting down" }
* ```
*/
class EndBlock extends StmtSequence, TEndBlock {
private Ruby::EndBlock g;
EndBlock() { this = TEndBlock(g) }
final override string getAPrimaryQlClass() { result = "EndBlock" }
final override string toString() { result = "END { ... }" }
final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) }
}
/**
* An `undef` statement. For example:
* ```rb
* - undef method_name
* - undef &&, :method_name
* - undef :"method_#{ name }"
* ```
*/
class UndefStmt extends Stmt, TUndefStmt {
private Ruby::Undef g;
UndefStmt() { this = TUndefStmt(g) }
/** Gets the `n`th method name to undefine. */
final MethodName getMethodName(int n) { toGenerated(result) = g.getChild(n) }
/** Gets a method name to undefine. */
final MethodName getAMethodName() { result = getMethodName(_) }
final override string getAPrimaryQlClass() { result = "UndefStmt" }
final override string toString() { result = "undef ..." }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getMethodName" and result = this.getMethodName(_)
}
}
/**
* An `alias` statement. For example:
* ```rb
* - alias alias_name method_name
* - alias foo :method_name
* - alias bar :"method_#{ name }"
* ```
*/
class AliasStmt extends Stmt, TAliasStmt {
private Ruby::Alias g;
AliasStmt() { this = TAliasStmt(g) }
/** Gets the new method name. */
final MethodName getNewName() { toGenerated(result) = g.getName() }
/** Gets the original method name. */
final MethodName getOldName() { toGenerated(result) = g.getAlias() }
final override string getAPrimaryQlClass() { result = "AliasStmt" }
final override string toString() { result = "alias ..." }
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getNewName" and result = this.getNewName()
or
pred = "getOldName" and result = this.getOldName()
}
}
/**
* A statement that may return a value: `return`, `break` and `next`.
*
* ```rb
* return
* return value
* break
* break value
* next
* next value
* ```
*/
class ReturningStmt extends Stmt, TReturningStmt {
private Ruby::ArgumentList getArgumentList() {
result = any(Ruby::Return g | this = TReturnStmt(g)).getChild()
or
result = any(Ruby::Break g | this = TBreakStmt(g)).getChild()
or
result = any(Ruby::Next g | this = TNextStmt(g)).getChild()
}
/** Gets the returned value, if any. */
final Expr getValue() {
toGenerated(result) =
any(Ruby::AstNode res |
exists(Ruby::ArgumentList a, int c |
a = this.getArgumentList() and c = count(a.getChild(_))
|
res = a.getChild(0) and c = 1
or
res = a and c > 1
)
)
}
final override AstNode getAChild(string pred) {
result = super.getAChild(pred)
or
pred = "getValue" and result = this.getValue()
}
}
/**
* A `return` statement.
* ```rb
* return
* return value
* ```
*/
class ReturnStmt extends ReturningStmt, TReturnStmt {
final override string getAPrimaryQlClass() { result = "ReturnStmt" }
final override string toString() { result = "return" }
}
/**
* A `break` statement.
* ```rb
* break
* break value
* ```
*/
class BreakStmt extends ReturningStmt, TBreakStmt {
final override string getAPrimaryQlClass() { result = "BreakStmt" }
final override string toString() { result = "break" }
}
/**
* A `next` statement.
* ```rb
* next
* next value
* ```
*/
class NextStmt extends ReturningStmt, TNextStmt {
final override string getAPrimaryQlClass() { result = "NextStmt" }
final override string toString() { result = "next" }
}
/**
* A `redo` statement.
* ```rb
* redo
* ```
*/
class RedoStmt extends Stmt, TRedoStmt {
final override string getAPrimaryQlClass() { result = "RedoStmt" }
final override string toString() { result = "redo" }
}
/**
* A `retry` statement.
* ```rb
* retry
* ```
*/
class RetryStmt extends Stmt, TRetryStmt {
final override string getAPrimaryQlClass() { result = "RetryStmt" }
final override string toString() { result = "retry" }
}

View File

@@ -0,0 +1,187 @@
/** Provides classes for modeling program variables. */
private import codeql.ruby.AST
private import codeql.Locations
private import internal.AST
private import internal.TreeSitter
private import internal.Variable
/** A variable declared in a scope. */
class Variable instanceof VariableImpl {
/** Gets the name of this variable. */
final string getName() { result = super.getNameImpl() }
/** Holds if the name of this variable is `name`. */
final predicate hasName(string name) { this.getName() = name }
/** Gets a textual representation of this variable. */
final string toString() { result = this.getName() }
/** Gets the location of this variable. */
final Location getLocation() { result = super.getLocationImpl() }
/** Gets the scope this variable is declared in. */
final Scope getDeclaringScope() {
toGenerated(result) = this.(VariableReal).getDeclaringScopeImpl()
}
/** Gets an access to this variable. */
VariableAccess getAnAccess() { result.getVariable() = this }
}
/** A local variable. */
class LocalVariable extends Variable, TLocalVariable {
override LocalVariableAccess getAnAccess() { result.getVariable() = this }
/** Gets the access where this local variable is first introduced. */
VariableAccess getDefiningAccess() { result = this.(LocalVariableReal).getDefiningAccessImpl() }
/**
* Holds if this variable is captured. For example in
*
* ```rb
* def m x
* x.times do |y|
* puts x
* end
* puts x
* end
* ```
*
* `x` is a captured variable, whereas `y` is not.
*/
predicate isCaptured() { this.getAnAccess().isCapturedAccess() }
}
/** A global variable. */
class GlobalVariable extends Variable instanceof GlobalVariableImpl {
final override GlobalVariableAccess getAnAccess() { result.getVariable() = this }
}
/** An instance variable. */
class InstanceVariable extends Variable instanceof InstanceVariableImpl {
/** Holds is this variable is a class instance variable. */
final predicate isClassInstanceVariable() { super.isClassInstanceVariable() }
final override InstanceVariableAccess getAnAccess() { result.getVariable() = this }
}
/** A class variable. */
class ClassVariable extends Variable instanceof ClassVariableImpl {
final override ClassVariableAccess getAnAccess() { result.getVariable() = this }
}
/** An access to a variable. */
class VariableAccess extends Expr instanceof VariableAccessImpl {
/** Gets the variable this identifier refers to. */
final Variable getVariable() { result = super.getVariableImpl() }
/**
* Holds if this access is a write access belonging to the explicit
* assignment `assignment`. For example, in
*
* ```rb
* a, b = foo
* ```
*
* both `a` and `b` are write accesses belonging to the same assignment.
*/
predicate isExplicitWrite(AstNode assignment) {
explicitWriteAccess(toGenerated(this), toGenerated(assignment))
or
this = assignment.(AssignExpr).getLeftOperand()
}
/**
* Holds if this access is a write access belonging to an implicit assignment.
* For example, in
*
* ```rb
* def m elements
* for e in elements do
* puts e
* end
* end
* ```
*
* the access to `elements` in the parameter list is an implicit assignment,
* as is the first access to `e`.
*/
predicate isImplicitWrite() { implicitWriteAccess(toGenerated(this)) }
final override string toString() { result = VariableAccessImpl.super.toString() }
}
/** An access to a variable where the value is updated. */
class VariableWriteAccess extends VariableAccess {
VariableWriteAccess() {
this.isExplicitWrite(_) or
this.isImplicitWrite()
}
}
/** An access to a variable where the value is read. */
class VariableReadAccess extends VariableAccess {
VariableReadAccess() { not this instanceof VariableWriteAccess }
}
/** An access to a local variable. */
class LocalVariableAccess extends VariableAccess instanceof LocalVariableAccessImpl {
final override string getAPrimaryQlClass() { result = "LocalVariableAccess" }
/**
* Holds if this access is a captured variable access. For example in
*
* ```rb
* def m x
* x.times do |y|
* puts x
* end
* puts x
* end
* ```
*
* the access to `x` in the first `puts x` is a captured access, while
* the access to `x` in the second `puts x` is not.
*/
final predicate isCapturedAccess() { isCapturedAccess(this) }
}
/** An access to a local variable where the value is updated. */
class LocalVariableWriteAccess extends LocalVariableAccess, VariableWriteAccess { }
/** An access to a local variable where the value is read. */
class LocalVariableReadAccess extends LocalVariableAccess, VariableReadAccess { }
/** An access to a global variable. */
class GlobalVariableAccess extends VariableAccess instanceof GlobalVariableAccessImpl {
final override string getAPrimaryQlClass() { result = "GlobalVariableAccess" }
}
/** An access to a global variable where the value is updated. */
class GlobalVariableWriteAccess extends GlobalVariableAccess, VariableWriteAccess { }
/** An access to a global variable where the value is read. */
class GlobalVariableReadAccess extends GlobalVariableAccess, VariableReadAccess { }
/** An access to an instance variable. */
class InstanceVariableAccess extends VariableAccess instanceof InstanceVariableAccessImpl {
final override string getAPrimaryQlClass() { result = "InstanceVariableAccess" }
}
/** An access to an instance variable where the value is updated. */
class InstanceVariableWriteAccess extends InstanceVariableAccess, VariableWriteAccess { }
/** An access to an instance variable where the value is read. */
class InstanceVariableReadAccess extends InstanceVariableAccess, VariableReadAccess { }
/** An access to a class variable. */
class ClassVariableAccess extends VariableAccess instanceof ClassVariableAccessRealImpl {
final override string getAPrimaryQlClass() { result = "ClassVariableAccess" }
}
/** An access to a class variable where the value is updated. */
class ClassVariableWriteAccess extends ClassVariableAccess, VariableWriteAccess { }
/** An access to a class variable where the value is read. */
class ClassVariableReadAccess extends ClassVariableAccess, VariableReadAccess { }

View File

@@ -0,0 +1,704 @@
import codeql.Locations
private import TreeSitter
private import codeql.ruby.ast.internal.Call
private import codeql.ruby.ast.internal.Parameter
private import codeql.ruby.ast.internal.Variable
private import codeql.ruby.AST as AST
private import Synthesis
module MethodName {
predicate range(Ruby::UnderscoreMethodName g) {
exists(Ruby::Undef u | u.getChild(_) = g)
or
exists(Ruby::Alias a | a.getName() = g or a.getAlias() = g)
}
class Token =
@ruby_setter or @ruby_token_class_variable or @ruby_token_constant or
@ruby_token_global_variable or @ruby_token_identifier or @ruby_token_instance_variable or
@ruby_token_operator;
}
private predicate mkSynthChild(SynthKind kind, AST::AstNode parent, int i) {
any(Synthesis s).child(parent, i, SynthChild(kind))
}
cached
private module Cached {
cached
newtype TAstNode =
TAddExprReal(Ruby::Binary g) { g instanceof @ruby_binary_plus } or
TAddExprSynth(AST::AstNode parent, int i) { mkSynthChild(AddExprKind(), parent, i) } or
TAliasStmt(Ruby::Alias g) or
TArgumentList(Ruby::AstNode g) {
(
g.getParent() instanceof Ruby::Break or
g.getParent() instanceof Ruby::Return or
g.getParent() instanceof Ruby::Next or
g.getParent() instanceof Ruby::Assignment or
g.getParent() instanceof Ruby::OperatorAssignment
) and
(
strictcount(g.(Ruby::ArgumentList).getChild(_)) > 1
or
g instanceof Ruby::RightAssignmentList
)
} or
TAssignAddExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_plusequal } or
TAssignBitwiseAndExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_ampersandequal
} or
TAssignBitwiseOrExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_pipeequal
} or
TAssignBitwiseXorExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_caretequal
} or
TAssignDivExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_slashequal } or
TAssignExponentExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_starstarequal
} or
TAssignExprReal(Ruby::Assignment g) or
TAssignExprSynth(AST::AstNode parent, int i) { mkSynthChild(AssignExprKind(), parent, i) } or
TAssignLShiftExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_langlelangleequal
} or
TAssignLogicalAndExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_ampersandampersandequal
} or
TAssignLogicalOrExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_pipepipeequal
} or
TAssignModuloExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_percentequal
} or
TAssignMulExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_starequal } or
TAssignRShiftExpr(Ruby::OperatorAssignment g) {
g instanceof @ruby_operator_assignment_ranglerangleequal
} or
TAssignSubExpr(Ruby::OperatorAssignment g) { g instanceof @ruby_operator_assignment_minusequal } or
TBareStringLiteral(Ruby::BareString g) or
TBareSymbolLiteral(Ruby::BareSymbol g) or
TBeginBlock(Ruby::BeginBlock g) or
TBeginExpr(Ruby::Begin g) or
TBitwiseAndExprReal(Ruby::Binary g) { g instanceof @ruby_binary_ampersand } or
TBitwiseAndExprSynth(AST::AstNode parent, int i) {
mkSynthChild(BitwiseAndExprKind(), parent, i)
} or
TBitwiseOrExprReal(Ruby::Binary g) { g instanceof @ruby_binary_pipe } or
TBitwiseOrExprSynth(AST::AstNode parent, int i) { mkSynthChild(BitwiseOrExprKind(), parent, i) } or
TBitwiseXorExprReal(Ruby::Binary g) { g instanceof @ruby_binary_caret } or
TBitwiseXorExprSynth(AST::AstNode parent, int i) {
mkSynthChild(BitwiseXorExprKind(), parent, i)
} or
TBlockArgument(Ruby::BlockArgument g) or
TBlockParameter(Ruby::BlockParameter g) or
TBraceBlock(Ruby::Block g) { not g.getParent() instanceof Ruby::Lambda } or
TBreakStmt(Ruby::Break g) or
TCaseEqExpr(Ruby::Binary g) { g instanceof @ruby_binary_equalequalequal } or
TCaseExpr(Ruby::Case g) or
TCharacterLiteral(Ruby::Character g) or
TClassDeclaration(Ruby::Class g) or
TClassVariableAccessReal(Ruby::ClassVariable g, AST::ClassVariable v) {
ClassVariableAccess::range(g, v)
} or
TClassVariableAccessSynth(AST::AstNode parent, int i, AST::ClassVariable v) {
mkSynthChild(ClassVariableAccessKind(v), parent, i)
} or
TComplementExpr(Ruby::Unary g) { g instanceof @ruby_unary_tilde } or
TComplexLiteral(Ruby::Complex g) or
TConstantReadAccessSynth(AST::AstNode parent, int i, string value) {
mkSynthChild(ConstantReadAccessKind(value), parent, i)
} or
TDefinedExpr(Ruby::Unary g) { g instanceof @ruby_unary_definedquestion } or
TDelimitedSymbolLiteral(Ruby::DelimitedSymbol g) or
TDestructuredLeftAssignment(Ruby::DestructuredLeftAssignment g) {
not strictcount(int i | exists(g.getParent().(Ruby::LeftAssignmentList).getChild(i))) = 1
} or
TDivExprReal(Ruby::Binary g) { g instanceof @ruby_binary_slash } or
TDivExprSynth(AST::AstNode parent, int i) { mkSynthChild(DivExprKind(), parent, i) } or
TDo(Ruby::Do g) or
TDoBlock(Ruby::DoBlock g) { not g.getParent() instanceof Ruby::Lambda } or
TElementReference(Ruby::ElementReference g) or
TElse(Ruby::Else g) or
TElsif(Ruby::Elsif g) or
TEmptyStmt(Ruby::EmptyStatement g) or
TEndBlock(Ruby::EndBlock g) or
TEnsure(Ruby::Ensure g) or
TEqExpr(Ruby::Binary g) { g instanceof @ruby_binary_equalequal } or
TExponentExprReal(Ruby::Binary g) { g instanceof @ruby_binary_starstar } or
TExponentExprSynth(AST::AstNode parent, int i) { mkSynthChild(ExponentExprKind(), parent, i) } or
TFalseLiteral(Ruby::False g) or
TFloatLiteral(Ruby::Float g) { not any(Ruby::Rational r).getChild() = g } or
TForExpr(Ruby::For g) or
TForIn(Ruby::In g) or // TODO REMOVE
TForwardParameter(Ruby::ForwardParameter g) or
TForwardArgument(Ruby::ForwardArgument g) or
TGEExpr(Ruby::Binary g) { g instanceof @ruby_binary_rangleequal } or
TGTExpr(Ruby::Binary g) { g instanceof @ruby_binary_rangle } or
TGlobalVariableAccessReal(Ruby::GlobalVariable g, AST::GlobalVariable v) {
GlobalVariableAccess::range(g, v)
} or
TGlobalVariableAccessSynth(AST::AstNode parent, int i, AST::GlobalVariable v) {
mkSynthChild(GlobalVariableAccessKind(v), parent, i)
} or
THashKeySymbolLiteral(Ruby::HashKeySymbol g) or
THashLiteral(Ruby::Hash g) or
THashSplatExpr(Ruby::HashSplatArgument g) or
THashSplatParameter(Ruby::HashSplatParameter g) or
THereDoc(Ruby::HeredocBeginning g) or
TIdentifierMethodCall(Ruby::Identifier g) { isIdentifierMethodCall(g) } or
TIf(Ruby::If g) or
TIfModifierExpr(Ruby::IfModifier g) or
TInstanceVariableAccessReal(Ruby::InstanceVariable g, AST::InstanceVariable v) {
InstanceVariableAccess::range(g, v)
} or
TInstanceVariableAccessSynth(AST::AstNode parent, int i, AST::InstanceVariable v) {
mkSynthChild(InstanceVariableAccessKind(v), parent, i)
} or
TIntegerLiteralReal(Ruby::Integer g) { not any(Ruby::Rational r).getChild() = g } or
TIntegerLiteralSynth(AST::AstNode parent, int i, int value) {
mkSynthChild(IntegerLiteralKind(value), parent, i)
} or
TKeywordParameter(Ruby::KeywordParameter g) or
TLEExpr(Ruby::Binary g) { g instanceof @ruby_binary_langleequal } or
TLShiftExprReal(Ruby::Binary g) { g instanceof @ruby_binary_langlelangle } or
TLShiftExprSynth(AST::AstNode parent, int i) { mkSynthChild(LShiftExprKind(), parent, i) } or
TLTExpr(Ruby::Binary g) { g instanceof @ruby_binary_langle } or
TLambda(Ruby::Lambda g) or
TLeftAssignmentList(Ruby::LeftAssignmentList g) or
TLocalVariableAccessReal(Ruby::Identifier g, AST::LocalVariable v) {
LocalVariableAccess::range(g, v)
} or
TLocalVariableAccessSynth(AST::AstNode parent, int i, AST::LocalVariable v) {
mkSynthChild(LocalVariableAccessRealKind(v), parent, i)
or
mkSynthChild(LocalVariableAccessSynthKind(v), parent, i)
} or
TLogicalAndExprReal(Ruby::Binary g) {
g instanceof @ruby_binary_and or g instanceof @ruby_binary_ampersandampersand
} or
TLogicalAndExprSynth(AST::AstNode parent, int i) {
mkSynthChild(LogicalAndExprKind(), parent, i)
} or
TLogicalOrExprReal(Ruby::Binary g) {
g instanceof @ruby_binary_or or g instanceof @ruby_binary_pipepipe
} or
TLogicalOrExprSynth(AST::AstNode parent, int i) { mkSynthChild(LogicalOrExprKind(), parent, i) } or
TMethod(Ruby::Method g) or
TMethodCallSynth(AST::AstNode parent, int i, string name, boolean setter, int arity) {
mkSynthChild(MethodCallKind(name, setter, arity), parent, i)
} or
TModuleDeclaration(Ruby::Module g) or
TModuloExprReal(Ruby::Binary g) { g instanceof @ruby_binary_percent } or
TModuloExprSynth(AST::AstNode parent, int i) { mkSynthChild(ModuloExprKind(), parent, i) } or
TMulExprReal(Ruby::Binary g) { g instanceof @ruby_binary_star } or
TMulExprSynth(AST::AstNode parent, int i) { mkSynthChild(MulExprKind(), parent, i) } or
TNEExpr(Ruby::Binary g) { g instanceof @ruby_binary_bangequal } or
TNextStmt(Ruby::Next g) or
TNilLiteral(Ruby::Nil g) or
TNoRegExpMatchExpr(Ruby::Binary g) { g instanceof @ruby_binary_bangtilde } or
TNotExpr(Ruby::Unary g) { g instanceof @ruby_unary_bang or g instanceof @ruby_unary_not } or
TOptionalParameter(Ruby::OptionalParameter g) or
TPair(Ruby::Pair g) or
TParenthesizedExpr(Ruby::ParenthesizedStatements g) or
TRShiftExprReal(Ruby::Binary g) { g instanceof @ruby_binary_ranglerangle } or
TRShiftExprSynth(AST::AstNode parent, int i) { mkSynthChild(RShiftExprKind(), parent, i) } or
TRangeLiteralReal(Ruby::Range g) or
TRangeLiteralSynth(AST::AstNode parent, int i, boolean inclusive) {
mkSynthChild(RangeLiteralKind(inclusive), parent, i)
} or
TRationalLiteral(Ruby::Rational g) or
TRedoStmt(Ruby::Redo g) or
TRegExpLiteral(Ruby::Regex g) or
TRegExpMatchExpr(Ruby::Binary g) { g instanceof @ruby_binary_equaltilde } or
TRegularArrayLiteral(Ruby::Array g) or
TRegularMethodCall(Ruby::Call g) { isRegularMethodCall(g) } or
TRegularStringLiteral(Ruby::String g) or
TRegularSuperCall(Ruby::Call g) { g.getMethod() instanceof Ruby::Super } or
TRescueClause(Ruby::Rescue g) or
TRescueModifierExpr(Ruby::RescueModifier g) or
TRetryStmt(Ruby::Retry g) or
TReturnStmt(Ruby::Return g) or
TScopeResolutionConstantAccess(Ruby::ScopeResolution g, Ruby::Constant constant) {
constant = g.getName() and
(
// A tree-sitter `scope_resolution` node with a `constant` name field is a
// read of that constant in any context where an identifier would be a
// vcall.
vcall(g)
or
explicitAssignmentNode(g, _)
)
} or
TScopeResolutionMethodCall(Ruby::ScopeResolution g, Ruby::Identifier i) {
isScopeResolutionMethodCall(g, i)
} or
TSelfReal(Ruby::Self g) or
TSelfSynth(AST::AstNode parent, int i) { mkSynthChild(SelfKind(), parent, i) } or
TSimpleParameter(Ruby::Identifier g) { g instanceof Parameter::Range } or
TSimpleSymbolLiteral(Ruby::SimpleSymbol g) or
TSingletonClass(Ruby::SingletonClass g) or
TSingletonMethod(Ruby::SingletonMethod g) or
TSpaceshipExpr(Ruby::Binary g) { g instanceof @ruby_binary_langleequalrangle } or
TSplatExprReal(Ruby::SplatArgument g) or
TSplatExprSynth(AST::AstNode parent, int i) { mkSynthChild(SplatExprKind(), parent, i) } or
TSplatParameter(Ruby::SplatParameter g) or
TStmtSequenceSynth(AST::AstNode parent, int i) { mkSynthChild(StmtSequenceKind(), parent, i) } or
TStringArrayLiteral(Ruby::StringArray g) or
TStringConcatenation(Ruby::ChainedString g) or
TStringEscapeSequenceComponent(Ruby::EscapeSequence g) or
TStringInterpolationComponent(Ruby::Interpolation g) or
TStringTextComponent(Ruby::Token g) {
g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent
} or
TSubExprReal(Ruby::Binary g) { g instanceof @ruby_binary_minus } or
TSubExprSynth(AST::AstNode parent, int i) { mkSynthChild(SubExprKind(), parent, i) } or
TSubshellLiteral(Ruby::Subshell g) or
TSymbolArrayLiteral(Ruby::SymbolArray g) or
TTernaryIfExpr(Ruby::Conditional g) or
TThen(Ruby::Then g) or
TTokenConstantAccess(Ruby::Constant g) {
// A tree-sitter `constant` token is a read of that constant in any context
// where an identifier would be a vcall.
vcall(g)
or
explicitAssignmentNode(g, _)
} or
TTokenMethodName(MethodName::Token g) { MethodName::range(g) } or
TTokenSuperCall(Ruby::Super g) { vcall(g) } or
TToplevel(Ruby::Program g) or
TTrueLiteral(Ruby::True g) or
TTuplePatternParameter(Ruby::DestructuredParameter g) or
TUnaryMinusExpr(Ruby::Unary g) { g instanceof @ruby_unary_minus } or
TUnaryPlusExpr(Ruby::Unary g) { g instanceof @ruby_unary_plus } or
TUndefStmt(Ruby::Undef g) or
TUnlessExpr(Ruby::Unless g) or
TUnlessModifierExpr(Ruby::UnlessModifier g) or
TUntilExpr(Ruby::Until g) or
TUntilModifierExpr(Ruby::UntilModifier g) or
TWhenExpr(Ruby::When g) or
TWhileExpr(Ruby::While g) or
TWhileModifierExpr(Ruby::WhileModifier g) or
TYieldCall(Ruby::Yield g)
/**
* Gets the underlying TreeSitter entity for a given AST node. This does not
* include synthesized AST nodes, because they are not the primary AST node
* for any given generated node.
*/
cached
Ruby::AstNode toGenerated(AST::AstNode n) {
n = TAddExprReal(result) or
n = TAliasStmt(result) or
n = TArgumentList(result) or
n = TAssignAddExpr(result) or
n = TAssignBitwiseAndExpr(result) or
n = TAssignBitwiseOrExpr(result) or
n = TAssignBitwiseXorExpr(result) or
n = TAssignDivExpr(result) or
n = TAssignExponentExpr(result) or
n = TAssignExprReal(result) or
n = TAssignLShiftExpr(result) or
n = TAssignLogicalAndExpr(result) or
n = TAssignLogicalOrExpr(result) or
n = TAssignModuloExpr(result) or
n = TAssignMulExpr(result) or
n = TAssignRShiftExpr(result) or
n = TAssignSubExpr(result) or
n = TBareStringLiteral(result) or
n = TBareSymbolLiteral(result) or
n = TBeginBlock(result) or
n = TBeginExpr(result) or
n = TBitwiseAndExprReal(result) or
n = TBitwiseOrExprReal(result) or
n = TBitwiseXorExprReal(result) or
n = TBlockArgument(result) or
n = TBlockParameter(result) or
n = TBraceBlock(result) or
n = TBreakStmt(result) or
n = TCaseEqExpr(result) or
n = TCaseExpr(result) or
n = TCharacterLiteral(result) or
n = TClassDeclaration(result) or
n = TClassVariableAccessReal(result, _) or
n = TComplementExpr(result) or
n = TComplexLiteral(result) or
n = TDefinedExpr(result) or
n = TDelimitedSymbolLiteral(result) or
n = TDestructuredLeftAssignment(result) or
n = TDivExprReal(result) or
n = TDo(result) or
n = TDoBlock(result) or
n = TElementReference(result) or
n = TElse(result) or
n = TElsif(result) or
n = TEmptyStmt(result) or
n = TEndBlock(result) or
n = TEnsure(result) or
n = TEqExpr(result) or
n = TExponentExprReal(result) or
n = TFalseLiteral(result) or
n = TFloatLiteral(result) or
n = TForExpr(result) or
n = TForIn(result) or // TODO REMOVE
n = TForwardArgument(result) or
n = TForwardParameter(result) or
n = TGEExpr(result) or
n = TGTExpr(result) or
n = TGlobalVariableAccessReal(result, _) or
n = THashKeySymbolLiteral(result) or
n = THashLiteral(result) or
n = THashSplatExpr(result) or
n = THashSplatParameter(result) or
n = THereDoc(result) or
n = TIdentifierMethodCall(result) or
n = TIf(result) or
n = TIfModifierExpr(result) or
n = TInstanceVariableAccessReal(result, _) or
n = TIntegerLiteralReal(result) or
n = TKeywordParameter(result) or
n = TLEExpr(result) or
n = TLShiftExprReal(result) or
n = TLTExpr(result) or
n = TLambda(result) or
n = TLeftAssignmentList(result) or
n = TLocalVariableAccessReal(result, _) or
n = TLogicalAndExprReal(result) or
n = TLogicalOrExprReal(result) or
n = TMethod(result) or
n = TModuleDeclaration(result) or
n = TModuloExprReal(result) or
n = TMulExprReal(result) or
n = TNEExpr(result) or
n = TNextStmt(result) or
n = TNilLiteral(result) or
n = TNoRegExpMatchExpr(result) or
n = TNotExpr(result) or
n = TOptionalParameter(result) or
n = TPair(result) or
n = TParenthesizedExpr(result) or
n = TRShiftExprReal(result) or
n = TRangeLiteralReal(result) or
n = TRationalLiteral(result) or
n = TRedoStmt(result) or
n = TRegExpLiteral(result) or
n = TRegExpMatchExpr(result) or
n = TRegularArrayLiteral(result) or
n = TRegularMethodCall(result) or
n = TRegularStringLiteral(result) or
n = TRegularSuperCall(result) or
n = TRescueClause(result) or
n = TRescueModifierExpr(result) or
n = TRetryStmt(result) or
n = TReturnStmt(result) or
n = TScopeResolutionConstantAccess(result, _) or
n = TScopeResolutionMethodCall(result, _) or
n = TSelfReal(result) or
n = TSimpleParameter(result) or
n = TSimpleSymbolLiteral(result) or
n = TSingletonClass(result) or
n = TSingletonMethod(result) or
n = TSpaceshipExpr(result) or
n = TSplatExprReal(result) or
n = TSplatParameter(result) or
n = TStringArrayLiteral(result) or
n = TStringConcatenation(result) or
n = TStringEscapeSequenceComponent(result) or
n = TStringInterpolationComponent(result) or
n = TStringTextComponent(result) or
n = TSubExprReal(result) or
n = TSubshellLiteral(result) or
n = TSymbolArrayLiteral(result) or
n = TTernaryIfExpr(result) or
n = TThen(result) or
n = TTokenConstantAccess(result) or
n = TTokenMethodName(result) or
n = TTokenSuperCall(result) or
n = TToplevel(result) or
n = TTrueLiteral(result) or
n = TTuplePatternParameter(result) or
n = TUnaryMinusExpr(result) or
n = TUnaryPlusExpr(result) or
n = TUndefStmt(result) or
n = TUnlessExpr(result) or
n = TUnlessModifierExpr(result) or
n = TUntilExpr(result) or
n = TUntilModifierExpr(result) or
n = TWhenExpr(result) or
n = TWhileExpr(result) or
n = TWhileModifierExpr(result) or
n = TYieldCall(result)
}
/** Gets the `i`th synthesized child of `parent`. */
cached
AST::AstNode getSynthChild(AST::AstNode parent, int i) {
result = TAddExprSynth(parent, i)
or
result = TAssignExprSynth(parent, i)
or
result = TBitwiseAndExprSynth(parent, i)
or
result = TBitwiseOrExprSynth(parent, i)
or
result = TBitwiseXorExprSynth(parent, i)
or
result = TClassVariableAccessSynth(parent, i, _)
or
result = TConstantReadAccessSynth(parent, i, _)
or
result = TDivExprSynth(parent, i)
or
result = TExponentExprSynth(parent, i)
or
result = TGlobalVariableAccessSynth(parent, i, _)
or
result = TInstanceVariableAccessSynth(parent, i, _)
or
result = TIntegerLiteralSynth(parent, i, _)
or
result = TLShiftExprSynth(parent, i)
or
result = TLocalVariableAccessSynth(parent, i, _)
or
result = TLogicalAndExprSynth(parent, i)
or
result = TLogicalOrExprSynth(parent, i)
or
result = TMethodCallSynth(parent, i, _, _, _)
or
result = TModuloExprSynth(parent, i)
or
result = TMulExprSynth(parent, i)
or
result = TRangeLiteralSynth(parent, i, _)
or
result = TRShiftExprSynth(parent, i)
or
result = TSelfSynth(parent, i)
or
result = TSplatExprSynth(parent, i)
or
result = TStmtSequenceSynth(parent, i)
or
result = TSubExprSynth(parent, i)
}
/**
* Holds if the `i`th child of `parent` is `child`. Either `parent` or
* `child` (or both) is a synthesized node.
*/
cached
predicate synthChild(AST::AstNode parent, int i, AST::AstNode child) {
child = getSynthChild(parent, i)
or
any(Synthesis s).child(parent, i, RealChild(child))
}
/**
* Like `toGenerated`, but also returns generated nodes for synthesized AST
* nodes.
*/
cached
Ruby::AstNode toGeneratedInclSynth(AST::AstNode n) {
result = toGenerated(n)
or
not exists(toGenerated(n)) and
exists(AST::AstNode parent |
synthChild(parent, _, n) and
result = toGeneratedInclSynth(parent)
)
}
cached
Location getLocation(AST::AstNode n) {
synthLocation(n, result)
or
n.isSynthesized() and
not synthLocation(n, _) and
result = getLocation(n.getParent())
or
result = toGenerated(n).getLocation()
}
}
import Cached
TAstNode fromGenerated(Ruby::AstNode n) { n = toGenerated(result) }
class TCall = TMethodCall or TYieldCall;
class TMethodCall =
TMethodCallSynth or TIdentifierMethodCall or TScopeResolutionMethodCall or TRegularMethodCall or
TElementReference or TSuperCall or TUnaryOperation or TBinaryOperation;
class TSuperCall = TTokenSuperCall or TRegularSuperCall;
class TConstantAccess =
TTokenConstantAccess or TScopeResolutionConstantAccess or TNamespace or TConstantReadAccessSynth;
class TControlExpr = TConditionalExpr or TCaseExpr or TLoop;
class TConditionalExpr =
TIfExpr or TUnlessExpr or TIfModifierExpr or TUnlessModifierExpr or TTernaryIfExpr;
class TIfExpr = TIf or TElsif;
class TConditionalLoop = TWhileExpr or TUntilExpr or TWhileModifierExpr or TUntilModifierExpr;
class TLoop = TConditionalLoop or TForExpr;
class TSelf = TSelfReal or TSelfSynth;
class TExpr =
TSelf or TArgumentList or TRescueClause or TRescueModifierExpr or TPair or TStringConcatenation or
TCall or TBlockArgument or TConstantAccess or TControlExpr or TWhenExpr or TLiteral or
TCallable or TVariableAccess or TStmtSequence or TOperation or TSimpleParameter or
TForwardArgument;
class TSplatExpr = TSplatExprReal or TSplatExprSynth;
class TStmtSequence =
TBeginBlock or TEndBlock or TThen or TElse or TDo or TEnsure or TStringInterpolationComponent or
TBlock or TBodyStmt or TParenthesizedExpr or TStmtSequenceSynth;
class TBodyStmt = TBeginExpr or TModuleBase or TMethod or TLambda or TDoBlock or TSingletonMethod;
class TLiteral =
TNumericLiteral or TNilLiteral or TBooleanLiteral or TStringlikeLiteral or TCharacterLiteral or
TArrayLiteral or THashLiteral or TRangeLiteral or TTokenMethodName;
class TNumericLiteral = TIntegerLiteral or TFloatLiteral or TRationalLiteral or TComplexLiteral;
class TIntegerLiteral = TIntegerLiteralReal or TIntegerLiteralSynth;
class TBooleanLiteral = TTrueLiteral or TFalseLiteral;
class TStringComponent =
TStringTextComponent or TStringEscapeSequenceComponent or TStringInterpolationComponent;
class TStringlikeLiteral =
TStringLiteral or TRegExpLiteral or TSymbolLiteral or TSubshellLiteral or THereDoc;
class TStringLiteral = TRegularStringLiteral or TBareStringLiteral;
class TSymbolLiteral = TSimpleSymbolLiteral or TComplexSymbolLiteral or THashKeySymbolLiteral;
class TComplexSymbolLiteral = TDelimitedSymbolLiteral or TBareSymbolLiteral;
class TArrayLiteral = TRegularArrayLiteral or TStringArrayLiteral or TSymbolArrayLiteral;
class TCallable = TMethodBase or TLambda or TBlock;
class TMethodBase = TMethod or TSingletonMethod;
class TBlock = TDoBlock or TBraceBlock;
class TModuleBase = TToplevel or TNamespace or TSingletonClass;
class TNamespace = TClassDeclaration or TModuleDeclaration;
class TOperation = TUnaryOperation or TBinaryOperation or TAssignment;
class TUnaryOperation =
TUnaryLogicalOperation or TUnaryArithmeticOperation or TUnaryBitwiseOperation or TDefinedExpr or
TSplatExpr or THashSplatExpr;
class TUnaryLogicalOperation = TNotExpr;
class TUnaryArithmeticOperation = TUnaryPlusExpr or TUnaryMinusExpr;
class TUnaryBitwiseOperation = TComplementExpr;
class TBinaryOperation =
TBinaryArithmeticOperation or TBinaryLogicalOperation or TBinaryBitwiseOperation or
TComparisonOperation or TSpaceshipExpr or TRegExpMatchExpr or TNoRegExpMatchExpr;
class TBinaryArithmeticOperation =
TAddExpr or TSubExpr or TMulExpr or TDivExpr or TModuloExpr or TExponentExpr;
class TAddExpr = TAddExprReal or TAddExprSynth;
class TSubExpr = TSubExprReal or TSubExprSynth;
class TMulExpr = TMulExprReal or TMulExprSynth;
class TDivExpr = TDivExprReal or TDivExprSynth;
class TModuloExpr = TModuloExprReal or TModuloExprSynth;
class TExponentExpr = TExponentExprReal or TExponentExprSynth;
class TBinaryLogicalOperation = TLogicalAndExpr or TLogicalOrExpr;
class TLogicalAndExpr = TLogicalAndExprReal or TLogicalAndExprSynth;
class TLogicalOrExpr = TLogicalOrExprReal or TLogicalOrExprSynth;
class TBinaryBitwiseOperation =
TLShiftExpr or TRShiftExpr or TBitwiseAndExpr or TBitwiseOrExpr or TBitwiseXorExpr;
class TLShiftExpr = TLShiftExprReal or TLShiftExprSynth;
class TRangeLiteral = TRangeLiteralReal or TRangeLiteralSynth;
class TRShiftExpr = TRShiftExprReal or TRShiftExprSynth;
class TBitwiseAndExpr = TBitwiseAndExprReal or TBitwiseAndExprSynth;
class TBitwiseOrExpr = TBitwiseOrExprReal or TBitwiseOrExprSynth;
class TBitwiseXorExpr = TBitwiseXorExprReal or TBitwiseXorExprSynth;
class TComparisonOperation = TEqualityOperation or TRelationalOperation;
class TEqualityOperation = TEqExpr or TNEExpr or TCaseEqExpr;
class TRelationalOperation = TGTExpr or TGEExpr or TLTExpr or TLEExpr;
class TAssignExpr = TAssignExprReal or TAssignExprSynth;
class TAssignment = TAssignExpr or TAssignOperation;
class TAssignOperation =
TAssignArithmeticOperation or TAssignLogicalOperation or TAssignBitwiseOperation;
class TAssignArithmeticOperation =
TAssignAddExpr or TAssignSubExpr or TAssignMulExpr or TAssignDivExpr or TAssignModuloExpr or
TAssignExponentExpr;
class TAssignLogicalOperation = TAssignLogicalAndExpr or TAssignLogicalOrExpr;
class TAssignBitwiseOperation =
TAssignLShiftExpr or TAssignRShiftExpr or TAssignBitwiseAndExpr or TAssignBitwiseOrExpr or
TAssignBitwiseXorExpr;
class TStmt =
TEmptyStmt or TBodyStmt or TStmtSequence or TUndefStmt or TAliasStmt or TReturningStmt or
TRedoStmt or TRetryStmt or TExpr;
class TReturningStmt = TReturnStmt or TBreakStmt or TNextStmt;
class TParameter =
TPatternParameter or TBlockParameter or THashSplatParameter or TKeywordParameter or
TOptionalParameter or TSplatParameter or TForwardParameter;
class TPatternParameter = TSimpleParameter or TTuplePatternParameter;
class TNamedParameter =
TSimpleParameter or TBlockParameter or THashSplatParameter or TKeywordParameter or
TOptionalParameter or TSplatParameter;
class TTuplePattern = TTuplePatternParameter or TDestructuredLeftAssignment or TLeftAssignmentList;
class TVariableAccess =
TLocalVariableAccess or TGlobalVariableAccess or TInstanceVariableAccess or TClassVariableAccess;
class TLocalVariableAccess = TLocalVariableAccessReal or TLocalVariableAccessSynth;
class TGlobalVariableAccess = TGlobalVariableAccessReal or TGlobalVariableAccessSynth;
class TInstanceVariableAccess = TInstanceVariableAccessReal or TInstanceVariableAccessSynth;
class TClassVariableAccess = TClassVariableAccessReal or TClassVariableAccessSynth;

View File

@@ -0,0 +1,186 @@
private import TreeSitter
private import Variable
private import codeql.ruby.AST
private import codeql.ruby.ast.internal.AST
predicate isIdentifierMethodCall(Ruby::Identifier g) { vcall(g) and not access(g, _) }
predicate isRegularMethodCall(Ruby::Call g) { not g.getMethod() instanceof Ruby::Super }
predicate isScopeResolutionMethodCall(Ruby::ScopeResolution g, Ruby::Identifier i) {
i = g.getName() and
not exists(Ruby::Call c | c.getMethod() = g)
}
abstract class CallImpl extends Expr, TCall {
abstract AstNode getArgumentImpl(int n);
/**
* It is not possible to define this predicate as
*
* ```ql
* result = count(this.getArgumentImpl(_))
* ```
*
* since that will result in a non-monotonicity error.
*/
abstract int getNumberOfArgumentsImpl();
}
abstract class MethodCallImpl extends CallImpl, TMethodCall {
abstract AstNode getReceiverImpl();
abstract string getMethodNameImpl();
abstract Block getBlockImpl();
}
class MethodCallSynth extends MethodCallImpl, TMethodCallSynth {
final override string getMethodNameImpl() {
exists(boolean setter, string name | this = TMethodCallSynth(_, _, name, setter, _) |
setter = true and result = name + "="
or
setter = false and result = name
)
}
final override AstNode getReceiverImpl() { synthChild(this, 0, result) }
final override AstNode getArgumentImpl(int n) { synthChild(this, n + 1, result) and n >= 0 }
final override int getNumberOfArgumentsImpl() { this = TMethodCallSynth(_, _, _, _, result) }
final override Block getBlockImpl() { none() }
}
class IdentifierMethodCall extends MethodCallImpl, TIdentifierMethodCall {
private Ruby::Identifier g;
IdentifierMethodCall() { this = TIdentifierMethodCall(g) }
final override string getMethodNameImpl() { result = g.getValue() }
final override AstNode getReceiverImpl() { result = TSelfSynth(this, 0) }
final override Expr getArgumentImpl(int n) { none() }
final override int getNumberOfArgumentsImpl() { result = 0 }
final override Block getBlockImpl() { none() }
}
class ScopeResolutionMethodCall extends MethodCallImpl, TScopeResolutionMethodCall {
private Ruby::ScopeResolution g;
private Ruby::Identifier i;
ScopeResolutionMethodCall() { this = TScopeResolutionMethodCall(g, i) }
final override string getMethodNameImpl() { result = i.getValue() }
final override Expr getReceiverImpl() { toGenerated(result) = g.getScope() }
final override Expr getArgumentImpl(int n) { none() }
final override int getNumberOfArgumentsImpl() { result = 0 }
final override Block getBlockImpl() { none() }
}
class RegularMethodCall extends MethodCallImpl, TRegularMethodCall {
private Ruby::Call g;
RegularMethodCall() { this = TRegularMethodCall(g) }
final override Expr getReceiverImpl() {
toGenerated(result) = g.getReceiver()
or
not exists(g.getReceiver()) and
toGenerated(result) = g.getMethod().(Ruby::ScopeResolution).getScope()
or
result = TSelfSynth(this, 0)
}
final override string getMethodNameImpl() {
isRegularMethodCall(g) and
(
result = "call" and g.getMethod() instanceof Ruby::ArgumentList
or
result = g.getMethod().(Ruby::Token).getValue()
or
result = g.getMethod().(Ruby::ScopeResolution).getName().(Ruby::Token).getValue()
)
}
final override Expr getArgumentImpl(int n) {
toGenerated(result) = g.getArguments().getChild(n)
or
toGenerated(result) = g.getMethod().(Ruby::ArgumentList).getChild(n)
}
final override int getNumberOfArgumentsImpl() {
result =
count(g.getArguments().getChild(_)) + count(g.getMethod().(Ruby::ArgumentList).getChild(_))
}
final override Block getBlockImpl() { toGenerated(result) = g.getBlock() }
}
class ElementReferenceImpl extends MethodCallImpl, TElementReference {
private Ruby::ElementReference g;
ElementReferenceImpl() { this = TElementReference(g) }
final override Expr getReceiverImpl() { toGenerated(result) = g.getObject() }
final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getChild(n) }
final override int getNumberOfArgumentsImpl() { result = count(g.getChild(_)) }
final override string getMethodNameImpl() { result = "[]" }
final override Block getBlockImpl() { none() }
}
abstract class SuperCallImpl extends MethodCallImpl, TSuperCall { }
class TokenSuperCall extends SuperCallImpl, TTokenSuperCall {
private Ruby::Super g;
TokenSuperCall() { this = TTokenSuperCall(g) }
final override string getMethodNameImpl() { result = g.getValue() }
final override Expr getReceiverImpl() { none() }
final override Expr getArgumentImpl(int n) { none() }
final override int getNumberOfArgumentsImpl() { result = 0 }
final override Block getBlockImpl() { none() }
}
class RegularSuperCall extends SuperCallImpl, TRegularSuperCall {
private Ruby::Call g;
RegularSuperCall() { this = TRegularSuperCall(g) }
final override string getMethodNameImpl() { result = g.getMethod().(Ruby::Super).getValue() }
final override Expr getReceiverImpl() { none() }
final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getArguments().getChild(n) }
final override int getNumberOfArgumentsImpl() { result = count(g.getArguments().getChild(_)) }
final override Block getBlockImpl() { toGenerated(result) = g.getBlock() }
}
class YieldCallImpl extends CallImpl, TYieldCall {
Ruby::Yield g;
YieldCallImpl() { this = TYieldCall(g) }
final override Expr getArgumentImpl(int n) { toGenerated(result) = g.getChild().getChild(n) }
final override int getNumberOfArgumentsImpl() { result = count(g.getChild().getChild(_)) }
}

View File

@@ -0,0 +1,43 @@
import codeql.Locations
private import TreeSitter
private import codeql.ruby.ast.Erb
cached
private module Cached {
cached
newtype TAstNode =
TCommentDirective(Erb::CommentDirective g) or
TDirective(Erb::Directive g) or
TGraphqlDirective(Erb::GraphqlDirective g) or
TOutputDirective(Erb::OutputDirective g) or
TTemplate(Erb::Template g) or
TToken(Erb::Token g) or
TComment(Erb::Comment g) or
TCode(Erb::Code g)
/**
* Gets the underlying TreeSitter entity for a given erb AST node.
*/
cached
Erb::AstNode toGenerated(ErbAstNode n) {
n = TCommentDirective(result) or
n = TDirective(result) or
n = TGraphqlDirective(result) or
n = TOutputDirective(result) or
n = TTemplate(result) or
n = TToken(result) or
n = TComment(result) or
n = TCode(result)
}
cached
Location getLocation(ErbAstNode n) { result = toGenerated(n).getLocation() }
}
import Cached
TAstNode fromGenerated(Erb::AstNode n) { n = toGenerated(result) }
class TDirectiveNode = TCommentDirective or TDirective or TGraphqlDirective or TOutputDirective;
class TTokenNode = TToken or TComment or TCode;

View File

@@ -0,0 +1,409 @@
private import codeql.Locations
private import codeql.ruby.AST
private import codeql.ruby.ast.Call
private import codeql.ruby.ast.Constant
private import codeql.ruby.ast.Expr
private import codeql.ruby.ast.Module
private import codeql.ruby.ast.Operation
private import codeql.ruby.ast.Scope
// Names of built-in modules and classes
private string builtin() {
result =
[
"Object", "Kernel", "BasicObject", "Class", "Module", "NilClass", "FalseClass", "TrueClass",
"Numeric", "Integer", "Float", "Rational", "Complex", "Array", "Hash", "Symbol", "Proc"
]
}
cached
private module Cached {
cached
newtype TModule =
TResolved(string qName) {
qName = builtin()
or
qName = namespaceDeclaration(_)
} or
TUnresolved(Namespace n) { not exists(namespaceDeclaration(n)) }
cached
string namespaceDeclaration(Namespace n) {
isToplevel(n) and result = n.getName()
or
not isToplevel(n) and
not exists(n.getScopeExpr()) and
result = scopeAppend(namespaceDeclaration(n.getEnclosingModule()), n.getName())
or
exists(string container |
TResolved(container) = resolveScopeExpr(n.getScopeExpr()) and
result = scopeAppend(container, n.getName())
)
}
cached
Module getSuperClass(Module cls) {
cls = TResolved("Object") and result = TResolved("BasicObject")
or
cls = TResolved(["Module", "Numeric", "Array", "Hash", "FalseClass", "TrueClass", "NilClass"]) and
result = TResolved("Object")
or
cls = TResolved(["Integer", "Float", "Rational", "Complex"]) and
result = TResolved("Numeric")
or
cls = TResolved("Class") and
result = TResolved("Module")
or
not cls = TResolved(builtin()) and
(
exists(ClassDeclaration d |
d = cls.getADeclaration() and
result = resolveScopeExpr(d.getSuperclassExpr())
)
or
result = TResolved("Object") and
forex(ClassDeclaration d | d = cls.getADeclaration() |
not exists(resolveScopeExpr(d.getSuperclassExpr()))
)
)
}
cached
Module getAnIncludedModule(Module m) {
m = TResolved("Object") and result = TResolved("Kernel")
or
exists(IncludeOrPrependCall c |
c.getMethodName() = "include" and
(
m = resolveScopeExpr(c.getReceiver())
or
m = enclosingModule(c).getModule() and
c.getReceiver() instanceof Self
) and
result = resolveScopeExpr(c.getAnArgument())
)
}
cached
Module getAPrependedModule(Module m) {
exists(IncludeOrPrependCall c |
c.getMethodName() = "prepend" and
(
m = resolveScopeExpr(c.getReceiver())
or
m = enclosingModule(c).getModule() and
c.getReceiver() instanceof Self
) and
result = resolveScopeExpr(c.getAnArgument())
)
}
/**
* Resolve class or module read access to a qualified module name.
*/
cached
TResolved resolveScopeExpr(ConstantReadAccess r) {
exists(string qname | qname = resolveConstant(r) and result = TResolved(qname))
}
/**
* Resolve constant access (class, module or otherwise) to a qualified module name.
* `resolveScopeExpr/1` picks the best (lowest priority number) result of
* `resolveScopeExpr/2` that resolves to a constant definition. If the constant
* definition is a Namespace then it is returned, if it's a constant assignment then
* the right-hand side of the assignment is resolved.
*/
cached
string resolveConstant(ConstantReadAccess r) {
exists(string qname |
qname =
min(string qn, int p |
isDefinedConstant(qn) and
qn = resolveScopeExpr(r, p) and
// prevent classes/modules that contain/extend themselves
not exists(ConstantWriteAccess w | qn = constantDefinition0(w) |
r = w.getScopeExpr()
or
r = w.(ClassDeclaration).getSuperclassExpr()
)
|
qn order by p
)
|
result = qname
or
exists(ConstantAssignment a |
qname = constantDefinition0(a) and
result = resolveConstant(a.getParent().(Assignment).getRightOperand())
)
)
}
cached
Method lookupMethod(Module m, string name) { TMethod(result) = lookupMethodOrConst(m, name) }
cached
Expr lookupConst(Module m, string name) {
TExpr(result) = lookupMethodOrConst(m, name)
or
exists(AssignExpr ae, ConstantWriteAccess w |
w = ae.getLeftOperand() and
w.getName() = name and
m = resolveScopeExpr(w.getScopeExpr()) and
result = ae.getRightOperand()
)
}
}
import Cached
private predicate isToplevel(ConstantAccess n) {
not exists(n.getScopeExpr()) and
(
n.hasGlobalScope()
or
n.getEnclosingModule() instanceof Toplevel
)
}
private predicate isDefinedConstant(string qualifiedModuleName) {
qualifiedModuleName = [builtin(), constantDefinition0(_)]
}
private int maxDepth() { result = 1 + max(int level | exists(enclosing(_, level))) }
private ModuleBase enclosing(ModuleBase m, int level) {
result = m and level = 0
or
result = enclosing(m.getEnclosingModule(), level - 1)
}
pragma[noinline]
private Namespace enclosingNameSpaceConstantReadAccess(
ConstantReadAccess c, int priority, string name
) {
result = enclosing(c.getEnclosingModule(), priority) and
name = c.getName()
}
/**
* Resolve constant read access (typically a scope expression) to a qualified name. The
* `priority` value indicates the precedence of the solution with respect to the lookup order.
* A constant name without scope specifier is resolved against its enclosing modules (inner-most first);
* if the constant is not found in any of the enclosing modules, then the constant will be resolved
* with respect to the ancestors (prepends, includes, super classes, and their ancestors) of the
* directly enclosing module.
*/
private string resolveScopeExpr(ConstantReadAccess c, int priority) {
c.hasGlobalScope() and result = c.getName() and priority = 0
or
exists(string name |
result = qualifiedModuleName(resolveScopeExprConstantReadAccess(c, priority, name), name)
)
or
not exists(c.getScopeExpr()) and
not c.hasGlobalScope() and
(
exists(string name |
exists(Namespace n |
n = enclosingNameSpaceConstantReadAccess(c, priority, name) and
result = qualifiedModuleName(constantDefinition0(n), name)
)
or
result =
qualifiedModuleName(ancestors(qualifiedModuleNameConstantReadAccess(c, name),
priority - maxDepth()), name)
)
or
priority = maxDepth() + 4 and
qualifiedModuleNameConstantReadAccess(c, result) != "BasicObject"
)
}
pragma[nomagic]
private string resolveScopeExprConstantReadAccess(ConstantReadAccess c, int priority, string name) {
result = resolveScopeExpr(c.getScopeExpr(), priority) and
name = c.getName()
}
bindingset[qualifier, name]
private string scopeAppend(string qualifier, string name) {
if qualifier = "Object" then result = name else result = qualifier + "::" + name
}
private string qualifiedModuleName(ModuleBase m) {
result = "Object" and m instanceof Toplevel
or
result = constantDefinition0(m)
}
pragma[noinline]
private string qualifiedModuleNameConstantWriteAccess(ConstantWriteAccess c, string name) {
result = qualifiedModuleName(c.getEnclosingModule()) and
name = c.getName()
}
pragma[noinline]
private string qualifiedModuleNameConstantReadAccess(ConstantReadAccess c, string name) {
result = qualifiedModuleName(c.getEnclosingModule()) and
name = c.getName()
}
/**
* Get a qualified name for a constant definition. May return multiple qualified
* names because we over-approximate when resolving scope resolutions and ignore
* lookup order precedence. Taking lookup order into account here would lead to
* non-monotonic recursion.
*/
private string constantDefinition0(ConstantWriteAccess c) {
c.hasGlobalScope() and result = c.getName()
or
result = scopeAppend(resolveScopeExpr(c.getScopeExpr(), _), c.getName())
or
not exists(c.getScopeExpr()) and
not c.hasGlobalScope() and
exists(string name | result = scopeAppend(qualifiedModuleNameConstantWriteAccess(c, name), name))
}
/**
* The qualified names of the ancestors of a class/module. The ancestors should be an ordered list
* of the ancestores of `prepend`ed modules, the module itself , the ancestors or `include`d modules
* and the ancestors of the super class. The priority value only distinguishes the kind of ancestor,
* it does not order the ancestors within a group of the same kind. This is an over-approximation, however,
* computing the precise order is tricky because it depends on the evaluation/file loading order.
*/
// TODO: the order of super classes can be determined more precisely even without knowing the evaluation
// order, so we should be able to make this more precise.
private string ancestors(string qname, int priority) {
result = ancestors(prepends(qname), _) and priority = 0
or
result = qname and priority = 1 and isDefinedConstant(qname)
or
result = ancestors(includes(qname), _) and priority = 2
or
result = ancestors(superclass(qname), _) and priority = 3
}
private class IncludeOrPrependCall extends MethodCall {
IncludeOrPrependCall() { this.getMethodName() = ["include", "prepend"] }
string getAModule() { result = resolveScopeExpr(this.getAnArgument(), _) }
string getTarget() {
result = resolveScopeExpr(this.getReceiver(), _)
or
result = qualifiedModuleName(enclosingModule(this)) and
(
this.getReceiver() instanceof Self
or
not exists(this.getReceiver())
)
}
}
/**
* A variant of AstNode::getEnclosingModule that excludes
* results that are enclosed in a block. This is a bit wrong because
* it could lead to false negatives. However, `include` statements in
* blocks are very rare in normal code. The majority of cases are in calls
* to methods like `module_eval` and `Rspec.describe` / `Rspec.context`. These
* methods evaluate the block in the context of some other module/class instead of
* the enclosing one.
*/
private ModuleBase enclosingModule(AstNode node) { result = parent*(node).getParent() }
private AstNode parent(AstNode n) {
result = n.getParent() and
not result instanceof ModuleBase and
not result instanceof Block
}
private string prepends(string qname) {
exists(IncludeOrPrependCall m |
m.getMethodName() = "prepend" and
qname = m.getTarget() and
result = m.getAModule()
)
}
private string includes(string qname) {
qname = "Object" and
result = "Kernel"
or
exists(IncludeOrPrependCall m |
m.getMethodName() = "include" and
qname = m.getTarget() and
result = m.getAModule()
)
}
private Expr superexpr(string qname) {
exists(ClassDeclaration c | qname = constantDefinition0(c) and result = c.getSuperclassExpr())
}
private string superclass(string qname) {
qname = "Object" and result = "BasicObject"
or
result = resolveScopeExpr(superexpr(qname), _)
}
private string qualifiedModuleName(string container, string name) {
isDefinedConstant(result) and
(
container = result.regexpCapture("(.+)::([^:]+)", 1) and
name = result.regexpCapture("(.+)::([^:]+)", 2)
or
container = "Object" and name = result
)
}
private Module getAncestors(Module m) {
result = m or
result = getAncestors(m.getAnIncludedModule()) or
result = getAncestors(m.getAPrependedModule())
}
private newtype TMethodOrExpr =
TMethod(Method m) or
TExpr(Expr e)
private TMethodOrExpr getMethodOrConst(TModule owner, string name) {
exists(ModuleBase m | m.getModule() = owner |
result = TMethod(m.getMethod(name))
or
result = TExpr(m.getConstant(name))
)
}
module ExposedForTestingOnly {
Method getMethod(TModule owner, string name) { TMethod(result) = getMethodOrConst(owner, name) }
Expr getConst(TModule owner, string name) { TExpr(result) = getMethodOrConst(owner, name) }
}
private TMethodOrExpr lookupMethodOrConst0(Module m, string name) {
result = lookupMethodOrConst0(m.getAPrependedModule(), name)
or
not exists(getMethodOrConst(getAncestors(m.getAPrependedModule()), name)) and
(
result = getMethodOrConst(m, name)
or
not exists(getMethodOrConst(m, name)) and
result = lookupMethodOrConst0(m.getAnIncludedModule(), name)
)
}
private AstNode getNode(TMethodOrExpr e) { e = TMethod(result) or e = TExpr(result) }
private TMethodOrExpr lookupMethodOrConst(Module m, string name) {
result = lookupMethodOrConst0(m, name)
or
not exists(lookupMethodOrConst0(m, name)) and
result = lookupMethodOrConst(m.getSuperClass(), name) and
// For now, we restrict the scope of top-level declarations to their file.
// This may remove some plausible targets, but also removes a lot of
// implausible targets
if getNode(result).getEnclosingModule() instanceof Toplevel
then getNode(result).getFile() = m.getADeclaration().getFile()
else any()
}

View File

@@ -0,0 +1,198 @@
private import codeql.ruby.AST
private import AST
private import TreeSitter
private import Call
abstract class OperationImpl extends Expr, TOperation {
abstract string getOperatorImpl();
abstract Expr getAnOperandImpl();
}
abstract class UnaryOperationImpl extends OperationImpl, MethodCallImpl, TUnaryOperation {
abstract Expr getOperandImpl();
final override Expr getAnOperandImpl() { result = this.getOperandImpl() }
final override string getMethodNameImpl() { result = this.getOperatorImpl() }
final override AstNode getReceiverImpl() { result = this.getOperandImpl() }
final override Expr getArgumentImpl(int n) { none() }
final override int getNumberOfArgumentsImpl() { result = 0 }
final override Block getBlockImpl() { none() }
}
class UnaryOperationGenerated extends UnaryOperationImpl {
private Ruby::Unary g;
UnaryOperationGenerated() { g = toGenerated(this) }
final override Expr getOperandImpl() { toGenerated(result) = g.getOperand() }
final override string getOperatorImpl() { result = g.getOperator() }
}
class SplatExprReal extends UnaryOperationImpl, TSplatExprReal {
private Ruby::SplatArgument g;
SplatExprReal() { this = TSplatExprReal(g) }
final override string getOperatorImpl() { result = "*" }
final override Expr getOperandImpl() { toGenerated(result) = g.getChild() }
}
class SplatExprSynth extends UnaryOperationImpl, TSplatExprSynth {
final override string getOperatorImpl() { result = "*" }
final override Expr getOperandImpl() { synthChild(this, 0, result) }
}
class HashSplatExprImpl extends UnaryOperationImpl, THashSplatExpr {
private Ruby::HashSplatArgument g;
HashSplatExprImpl() { this = THashSplatExpr(g) }
final override Expr getOperandImpl() { toGenerated(result) = g.getChild() }
final override string getOperatorImpl() { result = "**" }
}
abstract class BinaryOperationImpl extends OperationImpl, MethodCallImpl, TBinaryOperation {
abstract Stmt getLeftOperandImpl();
abstract Stmt getRightOperandImpl();
final override Expr getAnOperandImpl() {
result = this.getLeftOperandImpl()
or
result = this.getRightOperandImpl()
}
final override string getMethodNameImpl() { result = this.getOperatorImpl() }
final override AstNode getReceiverImpl() { result = this.getLeftOperandImpl() }
final override Expr getArgumentImpl(int n) { n = 0 and result = this.getRightOperandImpl() }
final override int getNumberOfArgumentsImpl() { result = 1 }
final override Block getBlockImpl() { none() }
}
class BinaryOperationReal extends BinaryOperationImpl {
private Ruby::Binary g;
BinaryOperationReal() { g = toGenerated(this) }
final override string getOperatorImpl() { result = g.getOperator() }
final override Stmt getLeftOperandImpl() { toGenerated(result) = g.getLeft() }
final override Stmt getRightOperandImpl() { toGenerated(result) = g.getRight() }
}
abstract class BinaryOperationSynth extends BinaryOperationImpl {
final override Stmt getLeftOperandImpl() { synthChild(this, 0, result) }
final override Stmt getRightOperandImpl() { synthChild(this, 1, result) }
}
class AddExprSynth extends BinaryOperationSynth, TAddExprSynth {
final override string getOperatorImpl() { result = "+" }
}
class SubExprSynth extends BinaryOperationSynth, TSubExprSynth {
final override string getOperatorImpl() { result = "-" }
}
class MulExprSynth extends BinaryOperationSynth, TMulExprSynth {
final override string getOperatorImpl() { result = "*" }
}
class DivExprSynth extends BinaryOperationSynth, TDivExprSynth {
final override string getOperatorImpl() { result = "/" }
}
class ModuloExprSynth extends BinaryOperationSynth, TModuloExprSynth {
final override string getOperatorImpl() { result = "%" }
}
class ExponentExprSynth extends BinaryOperationSynth, TExponentExprSynth {
final override string getOperatorImpl() { result = "**" }
}
class LogicalAndExprSynth extends BinaryOperationSynth, TLogicalAndExprSynth {
final override string getOperatorImpl() { result = "&&" }
}
class LogicalOrExprSynth extends BinaryOperationSynth, TLogicalOrExprSynth {
final override string getOperatorImpl() { result = "||" }
}
class LShiftExprSynth extends BinaryOperationSynth, TLShiftExprSynth {
final override string getOperatorImpl() { result = "<<" }
}
class RShiftExprSynth extends BinaryOperationSynth, TRShiftExprSynth {
final override string getOperatorImpl() { result = ">>" }
}
class BitwiseAndSynthExpr extends BinaryOperationSynth, TBitwiseAndExprSynth {
final override string getOperatorImpl() { result = "&" }
}
class BitwiseOrSynthExpr extends BinaryOperationSynth, TBitwiseOrExprSynth {
final override string getOperatorImpl() { result = "|" }
}
class BitwiseXorSynthExpr extends BinaryOperationSynth, TBitwiseXorExprSynth {
final override string getOperatorImpl() { result = "^" }
}
abstract class AssignmentImpl extends OperationImpl, TAssignment {
abstract Pattern getLeftOperandImpl();
abstract Expr getRightOperandImpl();
final override Expr getAnOperandImpl() {
result = this.getLeftOperandImpl()
or
result = this.getRightOperandImpl()
}
}
class AssignExprReal extends AssignmentImpl, TAssignExprReal {
private Ruby::Assignment g;
AssignExprReal() { this = TAssignExprReal(g) }
final override string getOperatorImpl() { result = "=" }
final override Pattern getLeftOperandImpl() { toGenerated(result) = g.getLeft() }
final override Expr getRightOperandImpl() { toGenerated(result) = g.getRight() }
}
class AssignExprSynth extends AssignmentImpl, TAssignExprSynth {
final override string getOperatorImpl() { result = "=" }
final override Pattern getLeftOperandImpl() { synthChild(this, 0, result) }
final override Expr getRightOperandImpl() { synthChild(this, 1, result) }
}
class AssignOperationImpl extends AssignmentImpl, TAssignOperation {
Ruby::OperatorAssignment g;
AssignOperationImpl() { g = toGenerated(this) }
final override string getOperatorImpl() { result = g.getOperator() }
final override Pattern getLeftOperandImpl() { toGenerated(result) = g.getLeft() }
final override Expr getRightOperandImpl() { toGenerated(result) = g.getRight() }
}

View File

@@ -0,0 +1,19 @@
private import codeql.ruby.AST
private import AST
private import TreeSitter
module Parameter {
class Range extends Ruby::AstNode {
private int pos;
Range() {
this = any(Ruby::BlockParameters bp).getChild(pos)
or
this = any(Ruby::MethodParameters mp).getChild(pos)
or
this = any(Ruby::LambdaParameters lp).getChild(pos)
}
int getPosition() { result = pos }
}
}

View File

@@ -0,0 +1,32 @@
private import codeql.ruby.AST
private import AST
private import TreeSitter
abstract class TuplePatternImpl extends Ruby::AstNode {
abstract Ruby::AstNode getChildNode(int i);
final int getRestIndex() {
result = unique(int i | this.getChildNode(i) instanceof Ruby::RestAssignment)
}
}
class TuplePatternParameterImpl extends TuplePatternImpl, Ruby::DestructuredParameter {
override Ruby::AstNode getChildNode(int i) { result = this.getChild(i) }
}
class DestructuredLeftAssignmentImpl extends TuplePatternImpl, Ruby::DestructuredLeftAssignment {
override Ruby::AstNode getChildNode(int i) { result = this.getChild(i) }
}
class LeftAssignmentListImpl extends TuplePatternImpl, Ruby::LeftAssignmentList {
override Ruby::AstNode getChildNode(int i) {
this =
any(Ruby::LeftAssignmentList lal |
if
strictcount(int j | exists(lal.getChild(j))) = 1 and
lal.getChild(0) instanceof Ruby::DestructuredLeftAssignment
then result = lal.getChild(0).(Ruby::DestructuredLeftAssignment).getChild(i)
else result = lal.getChild(i)
)
}
}

View File

@@ -0,0 +1,109 @@
private import TreeSitter
private import codeql.ruby.ast.Scope
private import codeql.ruby.ast.internal.AST
private import codeql.ruby.ast.internal.Parameter
class TScopeType = TMethodBase or TModuleLike or TBlockLike;
private class TBlockLike = TDoBlock or TLambda or TBlock or TEndBlock;
private class TModuleLike = TToplevel or TModuleDeclaration or TClassDeclaration or TSingletonClass;
module Scope {
class TypeRange = Callable::TypeRange or ModuleBase::TypeRange or @ruby_end_block;
class Range extends Ruby::AstNode, TypeRange {
Range() { not this = any(Ruby::Lambda l).getBody() }
ModuleBase::Range getEnclosingModule() {
result = this
or
not this instanceof ModuleBase::Range and result = this.getOuterScope().getEnclosingModule()
}
MethodBase::Range getEnclosingMethod() {
result = this
or
not this instanceof MethodBase::Range and
not this instanceof ModuleBase::Range and
result = this.getOuterScope().getEnclosingMethod()
}
Range getOuterScope() { result = scopeOf(this) }
}
}
module MethodBase {
class TypeRange = @ruby_method or @ruby_singleton_method;
class Range extends Scope::Range, TypeRange { }
}
module Callable {
class TypeRange = MethodBase::TypeRange or @ruby_do_block or @ruby_lambda or @ruby_block;
class Range extends Scope::Range, TypeRange {
Parameter::Range getParameter(int i) {
result = this.(Ruby::Method).getParameters().getChild(i) or
result = this.(Ruby::SingletonMethod).getParameters().getChild(i) or
result = this.(Ruby::DoBlock).getParameters().getChild(i) or
result = this.(Ruby::Lambda).getParameters().getChild(i) or
result = this.(Ruby::Block).getParameters().getChild(i)
}
}
}
module ModuleBase {
class TypeRange = @ruby_program or @ruby_module or @ruby_class or @ruby_singleton_class;
class Range extends Scope::Range, TypeRange { }
}
pragma[noinline]
private predicate rankHeredocBody(File f, Ruby::HeredocBody b, int i) {
b =
rank[i](Ruby::HeredocBody b0 |
f = b0.getLocation().getFile()
|
b0 order by b0.getLocation().getStartLine(), b0.getLocation().getStartColumn()
)
}
Ruby::HeredocBody getHereDocBody(Ruby::HeredocBeginning g) {
exists(int i, File f |
g =
rank[i](Ruby::HeredocBeginning b |
f = b.getLocation().getFile()
|
b order by b.getLocation().getStartLine(), b.getLocation().getStartColumn()
) and
rankHeredocBody(f, result, i)
)
}
private Ruby::AstNode parentOf(Ruby::AstNode n) {
n = getHereDocBody(result)
or
exists(Ruby::AstNode parent | parent = n.getParent() |
if
n =
[
parent.(Ruby::Module).getName(), parent.(Ruby::Class).getName(),
parent.(Ruby::Class).getSuperclass(), parent.(Ruby::SingletonClass).getValue(),
parent.(Ruby::Method).getName(), parent.(Ruby::SingletonMethod).getName(),
parent.(Ruby::SingletonMethod).getObject()
]
then result = parent.getParent()
else result = parent
)
}
/** Gets the enclosing scope of a node */
cached
Scope::Range scopeOf(Ruby::AstNode n) {
exists(Ruby::AstNode p | p = parentOf(n) |
p = result
or
not p instanceof Scope::Range and result = scopeOf(p)
)
}

View File

@@ -0,0 +1,797 @@
/** Provides predicates for synthesizing AST nodes. */
private import AST
private import TreeSitter
private import codeql.ruby.ast.internal.Call
private import codeql.ruby.ast.internal.Variable
private import codeql.ruby.ast.internal.Pattern
private import codeql.ruby.AST
/** A synthesized AST node kind. */
newtype SynthKind =
AddExprKind() or
AssignExprKind() or
BitwiseAndExprKind() or
BitwiseOrExprKind() or
BitwiseXorExprKind() or
ClassVariableAccessKind(ClassVariable v) or
DivExprKind() or
ExponentExprKind() or
GlobalVariableAccessKind(GlobalVariable v) or
InstanceVariableAccessKind(InstanceVariable v) or
IntegerLiteralKind(int i) { i in [-1000 .. 1000] } or
LShiftExprKind() or
LocalVariableAccessRealKind(LocalVariableReal v) or
LocalVariableAccessSynthKind(TLocalVariableSynth v) or
LogicalAndExprKind() or
LogicalOrExprKind() or
MethodCallKind(string name, boolean setter, int arity) {
any(Synthesis s).methodCall(name, setter, arity)
} or
ModuloExprKind() or
MulExprKind() or
RangeLiteralKind(boolean inclusive) { inclusive in [false, true] } or
RShiftExprKind() or
SplatExprKind() or
StmtSequenceKind() or
SelfKind() or
SubExprKind() or
ConstantReadAccessKind(string value) { any(Synthesis s).constantReadAccess(value) }
/**
* An AST child.
*
* Either a new synthesized node or a reference to an existing node.
*/
newtype Child =
SynthChild(SynthKind k) or
RealChild(AstNode n)
private newtype TSynthesis = MkSynthesis()
/** A class used for synthesizing AST nodes. */
class Synthesis extends TSynthesis {
/**
* Holds if a node should be synthesized as the `i`th child of `parent`, or if
* a non-synthesized node should be the `i`th child of synthesized node `parent`.
*
* `i = -1` is used to represent that the synthesized node is a desugared version
* of its parent.
*/
predicate child(AstNode parent, int i, Child child) { none() }
/**
* Holds if synthesized node `n` should have location `l`. Synthesized nodes for
* which this predicate does not hold, inherit their location (recursively) from
* their parent node.
*/
predicate location(AstNode n, Location l) { none() }
/**
* Holds if a local variable, identified by `i`, should be synthesized for AST
* node `n`.
*/
predicate localVariable(AstNode n, int i) { none() }
/**
* Holds if a method call to `name` with arity `arity` is needed.
*/
predicate methodCall(string name, boolean setter, int arity) { none() }
/**
* Holds if a constant read access of `name` is needed.
*/
predicate constantReadAccess(string name) { none() }
/**
* Holds if `n` should be excluded from `ControlFlowTree` in the CFG construction.
*/
predicate excludeFromControlFlowTree(AstNode n) { none() }
final string toString() { none() }
}
private class Desugared extends AstNode {
Desugared() { this = any(AstNode sugar).getDesugared() }
AstNode getADescendant() { result = this.getAChild*() }
}
/**
* Gets the desugaring level of `n`. That is, the number of desugaring
* transformations required before the context in which `n` occurs is
* fully desugared.
*/
int desugarLevel(AstNode n) { result = count(Desugared desugared | n = desugared.getADescendant()) }
/**
* Use this predicate in `Synthesis::child` to generate an assignment of `value` to
* synthesized variable `v`, where the assignment is a child of `assignParent` at
* index `assignIndex`.
*/
bindingset[v, assignParent, assignIndex, value]
private predicate assign(
AstNode parent, int i, Child child, TLocalVariableSynth v, AstNode assignParent, int assignIndex,
AstNode value
) {
parent = assignParent and
i = assignIndex and
child = SynthChild(AssignExprKind())
or
parent = TAssignExprSynth(assignParent, assignIndex) and
(
i = 0 and
child = SynthChild(LocalVariableAccessSynthKind(v))
or
i = 1 and
child = RealChild(value)
)
}
/** Holds if synthesized node `n` should have location `l`. */
predicate synthLocation(AstNode n, Location l) {
n.isSynthesized() and any(Synthesis s).location(n, l)
}
private predicate hasLocation(AstNode n, Location l) {
l = toGenerated(n).getLocation()
or
synthLocation(n, l)
}
private module ImplicitSelfSynthesis {
pragma[nomagic]
private predicate identifierMethodCallSelfSynthesis(AstNode mc, int i, Child child) {
child = SynthChild(SelfKind()) and
mc = TIdentifierMethodCall(_) and
i = 0
}
private class IdentifierMethodCallSelfSynthesis extends Synthesis {
final override predicate child(AstNode parent, int i, Child child) {
identifierMethodCallSelfSynthesis(parent, i, child)
}
}
pragma[nomagic]
private predicate regularMethodCallSelfSynthesis(TRegularMethodCall mc, int i, Child child) {
exists(Ruby::AstNode g |
mc = TRegularMethodCall(g) and
// If there's no explicit receiver (or scope resolution that acts like a
// receiver), then the receiver is implicitly `self`. N.B. `::Foo()` is
// not valid Ruby.
not exists(g.(Ruby::Call).getReceiver()) and
not exists(g.(Ruby::Call).getMethod().(Ruby::ScopeResolution).getScope())
) and
child = SynthChild(SelfKind()) and
i = 0
}
private class RegularMethodCallSelfSynthesis extends Synthesis {
final override predicate child(AstNode parent, int i, Child child) {
regularMethodCallSelfSynthesis(parent, i, child)
}
}
}
private module SetterDesugar {
/** An assignment where the left-hand side is a method call. */
private class SetterAssignExpr extends AssignExpr {
private MethodCall mc;
pragma[nomagic]
SetterAssignExpr() { mc = this.getLeftOperand() }
MethodCall getMethodCall() { result = mc }
pragma[nomagic]
MethodCallKind getCallKind(boolean setter, int arity) {
result = MethodCallKind(mc.getMethodName(), setter, arity)
}
pragma[nomagic]
Expr getReceiver() { result = mc.getReceiver() }
pragma[nomagic]
Expr getArgument(int i) { result = mc.getArgument(i) }
pragma[nomagic]
int getNumberOfArguments() { result = mc.getNumberOfArguments() }
pragma[nomagic]
Location getMethodCallLocation() { hasLocation(mc, result) }
}
pragma[nomagic]
private predicate setterMethodCallSynthesis(AstNode parent, int i, Child child) {
exists(SetterAssignExpr sae |
parent = sae and
i = -1 and
child = SynthChild(StmtSequenceKind())
or
exists(AstNode seq | seq = TStmtSequenceSynth(sae, -1) |
parent = seq and
i = 0 and
child = SynthChild(sae.getCallKind(true, sae.getNumberOfArguments() + 1))
or
exists(AstNode call | call = TMethodCallSynth(seq, 0, _, _, _) |
parent = call and
i = 0 and
child = RealChild(sae.getReceiver())
or
parent = call and
child = RealChild(sae.getArgument(i - 1))
or
exists(int valueIndex | valueIndex = sae.getNumberOfArguments() + 1 |
parent = call and
i = valueIndex and
child = SynthChild(AssignExprKind())
or
parent = TAssignExprSynth(call, valueIndex) and
(
i = 0 and
child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sae, 0)))
or
i = 1 and
child = RealChild(sae.getRightOperand())
)
)
)
or
parent = seq and
i = 1 and
child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sae, 0)))
)
)
}
/**
* ```rb
* x.foo = y
* ```
*
* desugars to
*
* ```rb
* x.foo=(__synth_0 = y);
* __synth_0;
* ```
*/
private class SetterMethodCallSynthesis extends Synthesis {
final override predicate child(AstNode parent, int i, Child child) {
setterMethodCallSynthesis(parent, i, child)
}
final override predicate location(AstNode n, Location l) {
exists(SetterAssignExpr sae, StmtSequence seq |
seq = sae.getDesugared() and
l = sae.getMethodCallLocation() and
n = seq.getAStmt()
)
}
final override predicate excludeFromControlFlowTree(AstNode n) {
n = any(SetterAssignExpr sae).getMethodCall()
}
final override predicate localVariable(AstNode n, int i) {
n instanceof SetterAssignExpr and
i = 0
}
final override predicate methodCall(string name, boolean setter, int arity) {
exists(SetterAssignExpr sae |
name = sae.getMethodCall().getMethodName() and
setter = true and
arity = sae.getNumberOfArguments() + 1
)
}
}
}
private module AssignOperationDesugar {
/**
* Gets the operator kind to synthesize for operator assignment `ao`.
*/
private SynthKind getKind(AssignOperation ao) {
ao instanceof AssignAddExpr and result = AddExprKind()
or
ao instanceof AssignSubExpr and result = SubExprKind()
or
ao instanceof AssignMulExpr and result = MulExprKind()
or
ao instanceof AssignDivExpr and result = DivExprKind()
or
ao instanceof AssignModuloExpr and result = ModuloExprKind()
or
ao instanceof AssignExponentExpr and result = ExponentExprKind()
or
ao instanceof AssignLogicalAndExpr and result = LogicalAndExprKind()
or
ao instanceof AssignLogicalOrExpr and result = LogicalOrExprKind()
or
ao instanceof AssignLShiftExpr and result = LShiftExprKind()
or
ao instanceof AssignRShiftExpr and result = RShiftExprKind()
or
ao instanceof AssignBitwiseAndExpr and result = BitwiseAndExprKind()
or
ao instanceof AssignBitwiseOrExpr and result = BitwiseOrExprKind()
or
ao instanceof AssignBitwiseXorExpr and result = BitwiseXorExprKind()
}
private Location getAssignOperationLocation(AssignOperation ao) {
exists(Ruby::OperatorAssignment g, Ruby::Token op |
g = toGenerated(ao) and
op.getParent() = g and
op.getParentIndex() = 1 and
result = op.getLocation()
)
}
/** An assignment operation where the left-hand side is a variable. */
private class VariableAssignOperation extends AssignOperation {
private Variable v;
pragma[nomagic]
VariableAssignOperation() { v = this.getLeftOperand().(VariableAccess).getVariable() }
pragma[nomagic]
SynthKind getVariableAccessKind() {
result in [
LocalVariableAccessRealKind(v).(SynthKind), InstanceVariableAccessKind(v),
ClassVariableAccessKind(v), GlobalVariableAccessKind(v)
]
}
}
pragma[nomagic]
private predicate variableAssignOperationSynthesis(AstNode parent, int i, Child child) {
exists(VariableAssignOperation vao |
parent = vao and
i = -1 and
child = SynthChild(AssignExprKind())
or
exists(AstNode assign | assign = TAssignExprSynth(vao, -1) |
parent = assign and
i = 0 and
child = RealChild(vao.getLeftOperand())
or
parent = assign and
i = 1 and
child = SynthChild(getKind(vao))
or
parent = getSynthChild(assign, 1) and
(
i = 0 and
child = SynthChild(vao.getVariableAccessKind())
or
i = 1 and
child = RealChild(vao.getRightOperand())
)
)
)
}
/**
* ```rb
* x += y
* ```
*
* desugars to
*
* ```rb
* x = x + y
* ```
*
* when `x` is a variable.
*/
private class VariableAssignOperationSynthesis extends Synthesis {
final override predicate child(AstNode parent, int i, Child child) {
variableAssignOperationSynthesis(parent, i, child)
}
final override predicate location(AstNode n, Location l) {
exists(VariableAssignOperation vao, BinaryOperation bo |
bo = vao.getDesugared().(AssignExpr).getRightOperand()
|
n = bo and
l = getAssignOperationLocation(vao)
or
n = bo.getLeftOperand() and
hasLocation(vao.getLeftOperand(), l)
)
}
}
/** An assignment operation where the left-hand side is a method call. */
private class SetterAssignOperation extends AssignOperation {
private MethodCall mc;
pragma[nomagic]
SetterAssignOperation() { mc = this.getLeftOperand() }
MethodCall getMethodCall() { result = mc }
pragma[nomagic]
MethodCallKind getCallKind(boolean setter, int arity) {
result = MethodCallKind(mc.getMethodName(), setter, arity)
}
pragma[nomagic]
Expr getReceiver() { result = mc.getReceiver() }
pragma[nomagic]
Expr getArgument(int i) { result = mc.getArgument(i) }
pragma[nomagic]
int getNumberOfArguments() { result = mc.getNumberOfArguments() }
pragma[nomagic]
Location getMethodCallLocation() { hasLocation(mc, result) }
}
pragma[nomagic]
private predicate methodCallAssignOperationSynthesis(AstNode parent, int i, Child child) {
exists(SetterAssignOperation sao |
parent = sao and
i = -1 and
child = SynthChild(StmtSequenceKind())
or
exists(AstNode seq | seq = TStmtSequenceSynth(sao, -1) |
// `__synth__0 = foo`
assign(parent, i, child, TLocalVariableSynth(sao, 0), seq, 0, sao.getReceiver())
or
// `__synth__1 = bar`
exists(Expr arg, int j | arg = sao.getArgument(j - 1) |
assign(parent, i, child, TLocalVariableSynth(sao, j), seq, j, arg)
)
or
// `__synth__2 = __synth__0.[](__synth__1) + y`
exists(int opAssignIndex | opAssignIndex = sao.getNumberOfArguments() + 1 |
parent = seq and
i = opAssignIndex and
child = SynthChild(AssignExprKind())
or
exists(AstNode assign | assign = TAssignExprSynth(seq, opAssignIndex) |
parent = assign and
i = 0 and
child =
SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex)))
or
parent = assign and
i = 1 and
child = SynthChild(getKind(sao))
or
// `__synth__0.[](__synth__1) + y`
exists(AstNode op | op = getSynthChild(assign, 1) |
parent = op and
i = 0 and
child = SynthChild(sao.getCallKind(false, sao.getNumberOfArguments()))
or
parent = TMethodCallSynth(op, 0, _, _, _) and
child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, i))) and
i in [0 .. sao.getNumberOfArguments()]
or
parent = op and
i = 1 and
child = RealChild(sao.getRightOperand())
)
)
or
// `__synth__0.[]=(__synth__1, __synth__2);`
parent = seq and
i = opAssignIndex + 1 and
child = SynthChild(sao.getCallKind(true, opAssignIndex))
or
exists(AstNode setter | setter = TMethodCallSynth(seq, opAssignIndex + 1, _, _, _) |
parent = setter and
child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, i))) and
i in [0 .. sao.getNumberOfArguments()]
or
parent = setter and
i = opAssignIndex + 1 and
child =
SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex)))
)
or
parent = seq and
i = opAssignIndex + 2 and
child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(sao, opAssignIndex)))
)
)
)
}
/**
* ```rb
* foo[bar] += y
* ```
*
* desugars to
*
* ```rb
* __synth__0 = foo;
* __synth__1 = bar;
* __synth__2 = __synth__0.[](__synth__1) + y;
* __synth__0.[]=(__synth__1, __synth__2);
* __synth__2;
* ```
*/
private class MethodCallAssignOperationSynthesis extends Synthesis {
final override predicate child(AstNode parent, int i, Child child) {
methodCallAssignOperationSynthesis(parent, i, child)
}
final override predicate location(AstNode n, Location l) {
exists(SetterAssignOperation sao, StmtSequence seq | seq = sao.getDesugared() |
n = seq.getStmt(0) and
hasLocation(sao.getReceiver(), l)
or
exists(int i |
n = seq.getStmt(i + 1) and
hasLocation(sao.getArgument(i), l)
)
or
exists(AssignExpr ae, int opAssignIndex |
opAssignIndex = sao.getNumberOfArguments() + 1 and
ae = seq.getStmt(opAssignIndex)
|
l = getAssignOperationLocation(sao) and
n = ae
or
exists(BinaryOperation bo | bo = ae.getRightOperand() |
n = bo.getLeftOperand() and
l = sao.getMethodCallLocation()
or
exists(MethodCall mc | mc = bo.getLeftOperand() |
n = mc.getReceiver() and
hasLocation(sao.getReceiver(), l)
or
exists(int i |
n = mc.getArgument(i) and
hasLocation(sao.getArgument(i), l)
)
)
)
or
exists(MethodCall mc | mc = seq.getStmt(opAssignIndex + 1) |
n = mc and
l = sao.getMethodCallLocation()
or
n = mc.getReceiver() and
hasLocation(sao.getReceiver(), l)
or
exists(int i | n = mc.getArgument(i) |
hasLocation(sao.getArgument(i), l)
or
i = opAssignIndex and
l = getAssignOperationLocation(sao)
)
)
or
n = seq.getStmt(opAssignIndex + 2) and
l = getAssignOperationLocation(sao)
)
)
}
final override predicate localVariable(AstNode n, int i) {
n = any(SetterAssignOperation sao | i in [0 .. sao.getNumberOfArguments() + 1])
}
final override predicate methodCall(string name, boolean setter, int arity) {
exists(SetterAssignOperation sao | name = sao.getMethodCall().getMethodName() |
setter = false and
arity = sao.getNumberOfArguments()
or
setter = true and
arity = sao.getNumberOfArguments() + 1
)
}
final override predicate excludeFromControlFlowTree(AstNode n) {
n = any(SetterAssignOperation sao).getMethodCall()
}
}
}
private module CompoundAssignDesugar {
/** An assignment where the left-hand side is a tuple pattern. */
private class TupleAssignExpr extends AssignExpr {
private TuplePattern tp;
pragma[nomagic]
TupleAssignExpr() { tp = this.getLeftOperand() }
TuplePattern getTuplePattern() { result = tp }
pragma[nomagic]
Pattern getElement(int i) { result = tp.getElement(i) }
pragma[nomagic]
int getNumberOfElements() {
toGenerated(tp) = any(TuplePatternImpl impl | result = count(impl.getChildNode(_)))
}
pragma[nomagic]
int getRestIndexOrNumberOfElements() {
result = tp.getRestIndex()
or
toGenerated(tp) = any(TuplePatternImpl impl | not exists(impl.getRestIndex())) and
result = this.getNumberOfElements()
}
}
pragma[nomagic]
private predicate compoundAssignSynthesis(AstNode parent, int i, Child child) {
exists(TupleAssignExpr tae |
parent = tae and
i = -1 and
child = SynthChild(StmtSequenceKind())
or
exists(AstNode seq | seq = TStmtSequenceSynth(tae, -1) |
parent = seq and
i = 0 and
child = SynthChild(AssignExprKind())
or
exists(AstNode assign | assign = TAssignExprSynth(seq, 0) |
parent = assign and
i = 0 and
child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(tae, 0)))
or
parent = assign and
i = 1 and
child = SynthChild(SplatExprKind())
or
parent = TSplatExprSynth(assign, 1) and
i = 0 and
child = RealChild(tae.getRightOperand())
)
or
exists(Pattern p, int j, int restIndex |
p = tae.getElement(j) and
restIndex = tae.getRestIndexOrNumberOfElements()
|
parent = seq and
i = j + 1 and
child = SynthChild(AssignExprKind())
or
exists(AstNode assign | assign = TAssignExprSynth(seq, j + 1) |
parent = assign and
i = 0 and
child = RealChild(p)
or
parent = assign and
i = 1 and
child = SynthChild(MethodCallKind("[]", false, 1))
or
parent = TMethodCallSynth(assign, 1, _, _, _) and
i = 0 and
child = SynthChild(LocalVariableAccessSynthKind(TLocalVariableSynth(tae, 0)))
or
j < restIndex and
parent = TMethodCallSynth(assign, 1, _, _, _) and
i = 1 and
child = SynthChild(IntegerLiteralKind(j))
or
j = restIndex and
(
parent = TMethodCallSynth(assign, 1, _, _, _) and
i = 1 and
child = SynthChild(RangeLiteralKind(true))
or
exists(AstNode call |
call = TMethodCallSynth(assign, 1, _, _, _) and
parent = TRangeLiteralSynth(call, 1, _)
|
i = 0 and
child = SynthChild(IntegerLiteralKind(j))
or
i = 1 and
child = SynthChild(IntegerLiteralKind(restIndex - tae.getNumberOfElements()))
)
)
or
j > restIndex and
parent = TMethodCallSynth(assign, 1, _, _, _) and
i = 1 and
child = SynthChild(IntegerLiteralKind(j - tae.getNumberOfElements()))
)
)
)
)
}
/**
* ```rb
* x, *y, z = w
* ```
* desugars to
*
* ```rb
* __synth__0 = *w;
* x = __synth__0[0];
* y = __synth__0[1..-2];
* z = __synth__0[-1];
* ```
*/
private class CompoundAssignSynthesis extends Synthesis {
final override predicate child(AstNode parent, int i, Child child) {
compoundAssignSynthesis(parent, i, child)
}
final override predicate location(AstNode n, Location l) {
exists(TupleAssignExpr tae, StmtSequence seq | seq = tae.getDesugared() |
n = seq.getStmt(0) and
hasLocation(tae.getRightOperand(), l)
or
exists(Pattern p, int j |
p = tae.getElement(j) and
n = seq.getStmt(j + 1) and
hasLocation(p, l)
)
)
}
final override predicate localVariable(AstNode n, int i) {
n instanceof TupleAssignExpr and
i = 0
}
final override predicate methodCall(string name, boolean setter, int arity) {
name = "[]" and
setter = false and
arity = 1
}
final override predicate excludeFromControlFlowTree(AstNode n) {
n = any(TupleAssignExpr tae).getTuplePattern()
}
}
}
private module ArrayLiteralDesugar {
pragma[nomagic]
private predicate arrayLiteralSynthesis(AstNode parent, int i, Child child) {
exists(ArrayLiteral al |
parent = al and
i = -1 and
child = SynthChild(MethodCallKind("[]", false, al.getNumberOfElements() + 1))
or
exists(AstNode mc | mc = TMethodCallSynth(al, -1, _, _, _) |
parent = mc and
i = 0 and
child = SynthChild(ConstantReadAccessKind("::Array"))
or
parent = mc and
child = RealChild(al.getElement(i - 1))
)
)
}
/**
* ```rb
* [1, 2, 3]
* ```
* desugars to
*
* ```rb
* ::Array.[](1, 2, 3)
* ```
*/
private class CompoundAssignSynthesis extends Synthesis {
final override predicate child(AstNode parent, int i, Child child) {
arrayLiteralSynthesis(parent, i, child)
}
final override predicate methodCall(string name, boolean setter, int arity) {
name = "[]" and
setter = false and
arity = any(ArrayLiteral al).getNumberOfElements() + 1
}
final override predicate constantReadAccess(string name) { name = "::Array" }
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,604 @@
private import TreeSitter
private import codeql.Locations
private import codeql.ruby.AST
private import codeql.ruby.ast.internal.AST
private import codeql.ruby.ast.internal.Parameter
private import codeql.ruby.ast.internal.Scope
private import codeql.ruby.ast.internal.Synthesis
/**
* Holds if `n` is in the left-hand-side of an explicit assignment `assignment`.
*/
predicate explicitAssignmentNode(Ruby::AstNode n, Ruby::AstNode assignment) {
n = assignment.(Ruby::Assignment).getLeft()
or
n = assignment.(Ruby::OperatorAssignment).getLeft()
or
exists(Ruby::AstNode parent |
parent = n.getParent() and
explicitAssignmentNode(parent, assignment)
|
parent instanceof Ruby::DestructuredLeftAssignment
or
parent instanceof Ruby::LeftAssignmentList
or
parent instanceof Ruby::RestAssignment
)
}
/** Holds if `n` is inside an implicit assignment. */
predicate implicitAssignmentNode(Ruby::AstNode n) {
n = any(Ruby::ExceptionVariable ev).getChild()
or
n = any(Ruby::For for).getPattern()
or
implicitAssignmentNode(n.getParent())
}
/** Holds if `n` is inside a parameter. */
predicate implicitParameterAssignmentNode(Ruby::AstNode n, Callable::Range c) {
n = c.getParameter(_)
or
implicitParameterAssignmentNode(n.getParent().(Ruby::DestructuredParameter), c)
}
private predicate instanceVariableAccess(
Ruby::InstanceVariable var, string name, Scope::Range scope, boolean instance
) {
name = var.getValue() and
scope = enclosingModuleOrClass(var) and
if hasEnclosingMethod(var) then instance = true else instance = false
}
private predicate classVariableAccess(Ruby::ClassVariable var, string name, Scope::Range scope) {
name = var.getValue() and
scope = enclosingModuleOrClass(var)
}
private predicate hasEnclosingMethod(Ruby::AstNode node) {
exists(Scope::Range s | scopeOf(node) = s and exists(s.getEnclosingMethod()))
}
private ModuleBase::Range enclosingModuleOrClass(Ruby::AstNode node) {
exists(Scope::Range s | scopeOf(node) = s and result = s.getEnclosingModule())
}
private predicate parameterAssignment(Callable::Range scope, string name, Ruby::Identifier i) {
implicitParameterAssignmentNode(i, scope) and
name = i.getValue()
}
/** Holds if `scope` defines `name` in its parameter declaration at `i`. */
private predicate scopeDefinesParameterVariable(
Callable::Range scope, string name, Ruby::Identifier i
) {
// In case of overlapping parameter names (e.g. `_`), only the first
// parameter will give rise to a variable
i =
min(Ruby::Identifier other |
parameterAssignment(scope, name, other)
|
other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
)
or
exists(Parameter::Range p |
p = scope.getParameter(_) and
name = i.getValue()
|
i = p.(Ruby::BlockParameter).getName() or
i = p.(Ruby::HashSplatParameter).getName() or
i = p.(Ruby::KeywordParameter).getName() or
i = p.(Ruby::OptionalParameter).getName() or
i = p.(Ruby::SplatParameter).getName()
)
}
/** Holds if `name` is assigned in `scope` at `i`. */
private predicate scopeAssigns(Scope::Range scope, string name, Ruby::Identifier i) {
(explicitAssignmentNode(i, _) or implicitAssignmentNode(i)) and
name = i.getValue() and
scope = scopeOf(i)
}
cached
private module Cached {
cached
newtype TVariable =
TGlobalVariable(string name) { name = any(Ruby::GlobalVariable var).getValue() } or
TClassVariable(Scope::Range scope, string name, Ruby::AstNode decl) {
decl =
min(Ruby::ClassVariable other |
classVariableAccess(other, name, scope)
|
other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
)
} or
TInstanceVariable(Scope::Range scope, string name, boolean instance, Ruby::AstNode decl) {
decl =
min(Ruby::InstanceVariable other |
instanceVariableAccess(other, name, scope, instance)
|
other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
)
} or
TLocalVariableReal(Scope::Range scope, string name, Ruby::Identifier i) {
scopeDefinesParameterVariable(scope, name, i)
or
i =
min(Ruby::Identifier other |
scopeAssigns(scope, name, other)
|
other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn()
) and
not scopeDefinesParameterVariable(scope, name, _) and
not inherits(scope, name, _)
} or
TLocalVariableSynth(AstNode n, int i) { any(Synthesis s).localVariable(n, i) }
// Db types that can be vcalls
private class VcallToken =
@ruby_scope_resolution or @ruby_token_constant or @ruby_token_identifier or @ruby_token_super;
/**
* Holds if `i` is an `identifier` node occurring in the context where it
* should be considered a VCALL. VCALL is the term that MRI/Ripper uses
* internally when there's an identifier without arguments or parentheses,
* i.e. it *might* be a method call, but it might also be a variable access,
* depending on the bindings in the current scope.
* ```rb
* foo # in MRI this is a VCALL, and the predicate should hold for this
* bar() # in MRI this would be an FCALL. Tree-sitter gives us a `call` node,
* # and the `method` field will be an `identifier`, but this predicate
* # will not hold for that identifier.
* ```
*/
cached
predicate vcall(VcallToken i) {
i = any(Ruby::ArgumentList x).getChild(_)
or
i = any(Ruby::Array x).getChild(_)
or
i = any(Ruby::Assignment x).getRight()
or
i = any(Ruby::Begin x).getChild(_)
or
i = any(Ruby::BeginBlock x).getChild(_)
or
i = any(Ruby::Binary x).getLeft()
or
i = any(Ruby::Binary x).getRight()
or
i = any(Ruby::Block x).getChild(_)
or
i = any(Ruby::BlockArgument x).getChild()
or
i = any(Ruby::Call x).getReceiver()
or
i = any(Ruby::Case x).getValue()
or
i = any(Ruby::Class x).getChild(_)
or
i = any(Ruby::Conditional x).getCondition()
or
i = any(Ruby::Conditional x).getConsequence()
or
i = any(Ruby::Conditional x).getAlternative()
or
i = any(Ruby::Do x).getChild(_)
or
i = any(Ruby::DoBlock x).getChild(_)
or
i = any(Ruby::ElementReference x).getChild(_)
or
i = any(Ruby::ElementReference x).getObject()
or
i = any(Ruby::Else x).getChild(_)
or
i = any(Ruby::Elsif x).getCondition()
or
i = any(Ruby::EndBlock x).getChild(_)
or
i = any(Ruby::Ensure x).getChild(_)
or
i = any(Ruby::Exceptions x).getChild(_)
or
i = any(Ruby::HashSplatArgument x).getChild()
or
i = any(Ruby::If x).getCondition()
or
i = any(Ruby::IfModifier x).getCondition()
or
i = any(Ruby::IfModifier x).getBody()
or
i = any(Ruby::In x).getChild()
or
i = any(Ruby::Interpolation x).getChild(_)
or
i = any(Ruby::KeywordParameter x).getValue()
or
i = any(Ruby::Method x).getChild(_)
or
i = any(Ruby::Module x).getChild(_)
or
i = any(Ruby::OperatorAssignment x).getRight()
or
i = any(Ruby::OptionalParameter x).getValue()
or
i = any(Ruby::Pair x).getKey()
or
i = any(Ruby::Pair x).getValue()
or
i = any(Ruby::ParenthesizedStatements x).getChild(_)
or
i = any(Ruby::Pattern x).getChild()
or
i = any(Ruby::Program x).getChild(_)
or
i = any(Ruby::Range x).getBegin()
or
i = any(Ruby::Range x).getEnd()
or
i = any(Ruby::RescueModifier x).getBody()
or
i = any(Ruby::RescueModifier x).getHandler()
or
i = any(Ruby::RightAssignmentList x).getChild(_)
or
i = any(Ruby::ScopeResolution x).getScope()
or
i = any(Ruby::SingletonClass x).getValue()
or
i = any(Ruby::SingletonClass x).getChild(_)
or
i = any(Ruby::SingletonMethod x).getChild(_)
or
i = any(Ruby::SingletonMethod x).getObject()
or
i = any(Ruby::SplatArgument x).getChild()
or
i = any(Ruby::Superclass x).getChild()
or
i = any(Ruby::Then x).getChild(_)
or
i = any(Ruby::Unary x).getOperand()
or
i = any(Ruby::Unless x).getCondition()
or
i = any(Ruby::UnlessModifier x).getCondition()
or
i = any(Ruby::UnlessModifier x).getBody()
or
i = any(Ruby::Until x).getCondition()
or
i = any(Ruby::UntilModifier x).getCondition()
or
i = any(Ruby::UntilModifier x).getBody()
or
i = any(Ruby::While x).getCondition()
or
i = any(Ruby::WhileModifier x).getCondition()
or
i = any(Ruby::WhileModifier x).getBody()
}
cached
predicate access(Ruby::Identifier access, VariableReal variable) {
exists(string name |
variable.getNameImpl() = name and
name = access.getValue()
|
variable.getDeclaringScopeImpl() = scopeOf(access) and
not access.getLocation().strictlyBefore(variable.getLocationImpl()) and
// In case of overlapping parameter names, later parameters should not
// be considered accesses to the first parameter
if parameterAssignment(_, _, access)
then scopeDefinesParameterVariable(_, _, access)
else any()
or
exists(Scope::Range declScope |
variable.getDeclaringScopeImpl() = declScope and
inherits(scopeOf(access), name, declScope)
)
)
}
private class Access extends Ruby::Token {
Access() {
access(this, _) or
this instanceof Ruby::GlobalVariable or
this instanceof Ruby::InstanceVariable or
this instanceof Ruby::ClassVariable
}
}
cached
predicate explicitWriteAccess(Access access, Ruby::AstNode assignment) {
explicitAssignmentNode(access, assignment)
}
cached
predicate implicitWriteAccess(Access access) {
implicitAssignmentNode(access)
or
scopeDefinesParameterVariable(_, _, access)
}
cached
predicate isCapturedAccess(LocalVariableAccess access) {
toGenerated(access.getVariable().getDeclaringScope()) != scopeOf(toGenerated(access))
}
cached
predicate instanceVariableAccess(Ruby::InstanceVariable var, InstanceVariable v) {
exists(string name, Scope::Range scope, boolean instance |
v = TInstanceVariable(scope, name, instance, _) and
instanceVariableAccess(var, name, scope, instance)
)
}
cached
predicate classVariableAccess(Ruby::ClassVariable var, ClassVariable variable) {
exists(Scope::Range scope, string name |
variable = TClassVariable(scope, name, _) and
classVariableAccess(var, name, scope)
)
}
}
import Cached
/** Holds if this scope inherits `name` from an outer scope `outer`. */
private predicate inherits(Scope::Range scope, string name, Scope::Range outer) {
(scope instanceof Ruby::Block or scope instanceof Ruby::DoBlock) and
not scopeDefinesParameterVariable(scope, name, _) and
(
outer = scope.getOuterScope() and
(
scopeDefinesParameterVariable(outer, name, _)
or
exists(Ruby::Identifier i |
scopeAssigns(outer, name, i) and
i.getLocation().strictlyBefore(scope.getLocation())
)
)
or
inherits(scope.getOuterScope(), name, outer)
)
}
abstract class VariableImpl extends TVariable {
abstract string getNameImpl();
final string toString() { result = this.getNameImpl() }
abstract Location getLocationImpl();
}
class TVariableReal = TGlobalVariable or TClassVariable or TInstanceVariable or TLocalVariableReal;
class TLocalVariable = TLocalVariableReal or TLocalVariableSynth;
/**
* This class only exists to avoid negative recursion warnings. Ideally,
* we would use `VariableImpl` directly, but that results in incorrect
* negative recursion warnings. Adding new root-defs for the predicates
* below works around this.
*/
abstract class VariableReal extends TVariableReal {
abstract string getNameImpl();
abstract Location getLocationImpl();
abstract Scope::Range getDeclaringScopeImpl();
final string toString() { result = this.getNameImpl() }
}
// Convert extensions of `VariableReal` into extensions of `VariableImpl`
private class VariableRealAdapter extends VariableImpl, TVariableReal instanceof VariableReal {
final override string getNameImpl() { result = VariableReal.super.getNameImpl() }
final override Location getLocationImpl() { result = VariableReal.super.getLocationImpl() }
}
class LocalVariableReal extends VariableReal, TLocalVariableReal {
private Scope::Range scope;
private string name;
private Ruby::Identifier i;
LocalVariableReal() { this = TLocalVariableReal(scope, name, i) }
final override string getNameImpl() { result = name }
final override Location getLocationImpl() { result = i.getLocation() }
final override Scope::Range getDeclaringScopeImpl() { result = scope }
final VariableAccess getDefiningAccessImpl() { toGenerated(result) = i }
}
class LocalVariableSynth extends VariableImpl, TLocalVariableSynth {
private AstNode n;
private int i;
LocalVariableSynth() { this = TLocalVariableSynth(n, i) }
final override string getNameImpl() {
exists(int level | level = desugarLevel(n) |
if level > 0 then result = "__synth__" + i + "__" + level else result = "__synth__" + i
)
}
final override Location getLocationImpl() { result = n.getLocation() }
}
class GlobalVariableImpl extends VariableReal, TGlobalVariable {
private string name;
GlobalVariableImpl() { this = TGlobalVariable(name) }
final override string getNameImpl() { result = name }
final override Location getLocationImpl() { none() }
final override Scope::Range getDeclaringScopeImpl() { none() }
}
class InstanceVariableImpl extends VariableReal, TInstanceVariable {
private ModuleBase::Range scope;
private boolean instance;
private string name;
private Ruby::AstNode decl;
InstanceVariableImpl() { this = TInstanceVariable(scope, name, instance, decl) }
final override string getNameImpl() { result = name }
final predicate isClassInstanceVariable() { instance = false }
final override Location getLocationImpl() { result = decl.getLocation() }
final override Scope::Range getDeclaringScopeImpl() { result = scope }
}
class ClassVariableImpl extends VariableReal, TClassVariable {
private ModuleBase::Range scope;
private string name;
private Ruby::AstNode decl;
ClassVariableImpl() { this = TClassVariable(scope, name, decl) }
final override string getNameImpl() { result = name }
final override Location getLocationImpl() { result = decl.getLocation() }
final override Scope::Range getDeclaringScopeImpl() { result = scope }
}
abstract class VariableAccessImpl extends Expr, TVariableAccess {
abstract VariableImpl getVariableImpl();
}
module LocalVariableAccess {
predicate range(Ruby::Identifier id, LocalVariable v) {
access(id, v) and
(
explicitWriteAccess(id, _)
or
implicitWriteAccess(id)
or
vcall(id)
)
}
}
class TVariableAccessReal =
TLocalVariableAccessReal or TGlobalVariableAccess or TInstanceVariableAccess or
TClassVariableAccess;
abstract class LocalVariableAccessImpl extends VariableAccessImpl, TLocalVariableAccess { }
private class LocalVariableAccessReal extends LocalVariableAccessImpl, TLocalVariableAccessReal {
private Ruby::Identifier g;
private LocalVariable v;
LocalVariableAccessReal() { this = TLocalVariableAccessReal(g, v) }
final override LocalVariable getVariableImpl() { result = v }
final override string toString() { result = g.getValue() }
}
private class LocalVariableAccessSynth extends LocalVariableAccessImpl, TLocalVariableAccessSynth {
private LocalVariable v;
LocalVariableAccessSynth() { this = TLocalVariableAccessSynth(_, _, v) }
final override LocalVariable getVariableImpl() { result = v }
final override string toString() { result = v.getName() }
}
module GlobalVariableAccess {
predicate range(Ruby::GlobalVariable n, GlobalVariableImpl v) { n.getValue() = v.getNameImpl() }
}
abstract class GlobalVariableAccessImpl extends VariableAccessImpl, TGlobalVariableAccess { }
private class GlobalVariableAccessReal extends GlobalVariableAccessImpl, TGlobalVariableAccessReal {
private Ruby::GlobalVariable g;
private GlobalVariable v;
GlobalVariableAccessReal() { this = TGlobalVariableAccessReal(g, v) }
final override GlobalVariable getVariableImpl() { result = v }
final override string toString() { result = g.getValue() }
}
private class GlobalVariableAccessSynth extends GlobalVariableAccessImpl, TGlobalVariableAccessSynth {
private GlobalVariable v;
GlobalVariableAccessSynth() { this = TGlobalVariableAccessSynth(_, _, v) }
final override GlobalVariable getVariableImpl() { result = v }
final override string toString() { result = v.getName() }
}
module InstanceVariableAccess {
predicate range(Ruby::InstanceVariable n, InstanceVariable v) { instanceVariableAccess(n, v) }
}
abstract class InstanceVariableAccessImpl extends VariableAccessImpl, TInstanceVariableAccess { }
private class InstanceVariableAccessReal extends InstanceVariableAccessImpl,
TInstanceVariableAccessReal {
private Ruby::InstanceVariable g;
private InstanceVariable v;
InstanceVariableAccessReal() { this = TInstanceVariableAccessReal(g, v) }
final override InstanceVariable getVariableImpl() { result = v }
final override string toString() { result = g.getValue() }
}
private class InstanceVariableAccessSynth extends InstanceVariableAccessImpl,
TInstanceVariableAccessSynth {
private InstanceVariable v;
InstanceVariableAccessSynth() { this = TInstanceVariableAccessSynth(_, _, v) }
final override InstanceVariable getVariableImpl() { result = v }
final override string toString() { result = v.getName() }
}
module ClassVariableAccess {
predicate range(Ruby::ClassVariable n, ClassVariable v) { classVariableAccess(n, v) }
}
abstract class ClassVariableAccessRealImpl extends VariableAccessImpl, TClassVariableAccess { }
private class ClassVariableAccessReal extends ClassVariableAccessRealImpl, TClassVariableAccessReal {
private Ruby::ClassVariable g;
private ClassVariable v;
ClassVariableAccessReal() { this = TClassVariableAccessReal(g, v) }
final override ClassVariable getVariableImpl() { result = v }
final override string toString() { result = g.getValue() }
}
private class ClassVariableAccessSynth extends ClassVariableAccessRealImpl,
TClassVariableAccessSynth {
private ClassVariable v;
ClassVariableAccessSynth() { this = TClassVariableAccessSynth(_, _, v) }
final override ClassVariable getVariableImpl() { result = v }
final override string toString() { result = v.getName() }
}

View File

@@ -0,0 +1,414 @@
/** Provides classes representing basic blocks. */
private import codeql.Locations
private import codeql.ruby.AST
private import codeql.ruby.ast.internal.AST
private import codeql.ruby.ast.internal.TreeSitter
private import codeql.ruby.controlflow.ControlFlowGraph
private import internal.ControlFlowGraphImpl
private import CfgNodes
private import SuccessorTypes
/**
* A basic block, that is, a maximal straight-line sequence of control flow nodes
* without branches or joins.
*/
class BasicBlock extends TBasicBlockStart {
/** Gets the scope of this basic block. */
CfgScope getScope() { result = this.getAPredecessor().getScope() }
/** Gets an immediate successor of this basic block, if any. */
BasicBlock getASuccessor() { result = this.getASuccessor(_) }
/** Gets an immediate successor of this basic block of a given type, if any. */
BasicBlock getASuccessor(SuccessorType t) {
result.getFirstNode() = this.getLastNode().getASuccessor(t)
}
/** Gets an immediate predecessor of this basic block, if any. */
BasicBlock getAPredecessor() { result.getASuccessor() = this }
/** Gets an immediate predecessor of this basic block of a given type, if any. */
BasicBlock getAPredecessor(SuccessorType t) { result.getASuccessor(t) = this }
/** Gets the control flow node at a specific (zero-indexed) position in this basic block. */
CfgNode getNode(int pos) { bbIndex(this.getFirstNode(), result, pos) }
/** Gets a control flow node in this basic block. */
CfgNode getANode() { result = this.getNode(_) }
/** Gets the first control flow node in this basic block. */
CfgNode getFirstNode() { this = TBasicBlockStart(result) }
/** Gets the last control flow node in this basic block. */
CfgNode getLastNode() { result = this.getNode(this.length() - 1) }
/** Gets the length of this basic block. */
int length() { result = strictcount(this.getANode()) }
/**
* Holds if this basic block immediately dominates basic block `bb`.
*
* That is, all paths reaching basic block `bb` from some entry point
* basic block must go through this basic block (which is an immediate
* predecessor of `bb`).
*
* Example:
*
* ```rb
* def m b
* if b
* return 0
* end
* return 1
* end
* ```
*
* The basic block starting on line 2 immediately dominates the
* basic block on line 5 (all paths from the entry point of `m`
* to `return 1` must go through the `if` block).
*/
predicate immediatelyDominates(BasicBlock bb) { bbIDominates(this, bb) }
/**
* Holds if this basic block strictly dominates basic block `bb`.
*
* That is, all paths reaching basic block `bb` from some entry point
* basic block must go through this basic block (which must be different
* from `bb`).
*
* Example:
*
* ```rb
* def m b
* if b
* return 0
* end
* return 1
* end
* ```
*
* The basic block starting on line 2 strictly dominates the
* basic block on line 5 (all paths from the entry point of `m`
* to `return 1` must go through the `if` block).
*/
predicate strictlyDominates(BasicBlock bb) { bbIDominates+(this, bb) }
/**
* Holds if this basic block dominates basic block `bb`.
*
* That is, all paths reaching basic block `bb` from some entry point
* basic block must go through this basic block.
*
* Example:
*
* ```rb
* def m b
* if b
* return 0
* end
* return 1
* end
* ```
*
* The basic block starting on line 2 dominates the basic
* basic block on line 5 (all paths from the entry point of `m`
* to `return 1` must go through the `if` block).
*/
predicate dominates(BasicBlock bb) {
bb = this or
this.strictlyDominates(bb)
}
/**
* Holds if `df` is in the dominance frontier of this basic block.
* That is, this basic block dominates a predecessor of `df`, but
* does not dominate `df` itself.
*
* Example:
*
* ```rb
* def m x
* if x < 0
* x = -x
* if x > 10
* x = x - 1
* end
* end
* puts x
* end
* ```
*
* The basic block on line 8 is in the dominance frontier
* of the basic block starting on line 3 because that block
* dominates the basic block on line 4, which is a predecessor of
* `puts x`. Also, the basic block starting on line 3 does not
* dominate the basic block on line 8.
*/
predicate inDominanceFrontier(BasicBlock df) {
this.dominatesPredecessor(df) and
not strictlyDominates(df)
}
/**
* Holds if this basic block dominates a predecessor of `df`.
*/
private predicate dominatesPredecessor(BasicBlock df) { this.dominates(df.getAPredecessor()) }
/**
* Gets the basic block that immediately dominates this basic block, if any.
*
* That is, all paths reaching this basic block from some entry point
* basic block must go through the result, which is an immediate basic block
* predecessor of this basic block.
*
* Example:
*
* ```rb
* def m b
* if b
* return 0
* end
* return 1
* end
* ```
*
* The basic block starting on line 2 is an immediate dominator of
* the basic block on line 5 (all paths from the entry point of `m`
* to `return 1` must go through the `if` block, and the `if` block
* is an immediate predecessor of `return 1`).
*/
BasicBlock getImmediateDominator() { bbIDominates(result, this) }
/**
* Holds if this basic block strictly post-dominates basic block `bb`.
*
* That is, all paths reaching a normal exit point basic block from basic
* block `bb` must go through this basic block (which must be different
* from `bb`).
*
* Example:
*
* ```rb
* def m b
* if b
* puts "b"
* end
* puts "m"
* end
* ```
*
* The basic block on line 5 strictly post-dominates the basic block on
* line 3 (all paths to the exit point of `m` from `puts "b"` must go
* through `puts "m"`).
*/
predicate strictlyPostDominates(BasicBlock bb) { bbIPostDominates+(this, bb) }
/**
* Holds if this basic block post-dominates basic block `bb`.
*
* That is, all paths reaching a normal exit point basic block from basic
* block `bb` must go through this basic block.
*
* Example:
*
* ```rb
* def m b
* if b
* puts "b"
* end
* puts "m"
* end
* ```
*
* The basic block on line 5 post-dominates the basic block on line 3
* (all paths to the exit point of `m` from `puts "b"` must go through
* `puts "m"`).
*/
predicate postDominates(BasicBlock bb) {
this.strictlyPostDominates(bb) or
this = bb
}
/** Holds if this basic block is in a loop in the control flow graph. */
predicate inLoop() { this.getASuccessor+() = this }
/** Gets a textual representation of this basic block. */
string toString() { result = this.getFirstNode().toString() }
/** Gets the location of this basic block. */
Location getLocation() { result = this.getFirstNode().getLocation() }
}
cached
private module Cached {
/** Internal representation of basic blocks. */
cached
newtype TBasicBlock = TBasicBlockStart(CfgNode cfn) { startsBB(cfn) }
/** Holds if `cfn` starts a new basic block. */
private predicate startsBB(CfgNode cfn) {
not exists(cfn.getAPredecessor()) and exists(cfn.getASuccessor())
or
cfn.isJoin()
or
cfn.getAPredecessor().isBranch()
}
/**
* Holds if `succ` is a control flow successor of `pred` within
* the same basic block.
*/
private predicate intraBBSucc(CfgNode pred, CfgNode succ) {
succ = pred.getASuccessor() and
not startsBB(succ)
}
/**
* Holds if `cfn` is the `i`th node in basic block `bb`.
*
* In other words, `i` is the shortest distance from a node `bb`
* that starts a basic block to `cfn` along the `intraBBSucc` relation.
*/
cached
predicate bbIndex(CfgNode bbStart, CfgNode cfn, int i) =
shortestDistances(startsBB/1, intraBBSucc/2)(bbStart, cfn, i)
/**
* Holds if the first node of basic block `succ` is a control flow
* successor of the last node of basic block `pred`.
*/
private predicate succBB(BasicBlock pred, BasicBlock succ) { succ = pred.getASuccessor() }
/** Holds if `dom` is an immediate dominator of `bb`. */
cached
predicate bbIDominates(BasicBlock dom, BasicBlock bb) =
idominance(entryBB/1, succBB/2)(_, dom, bb)
/** Holds if `pred` is a basic block predecessor of `succ`. */
private predicate predBB(BasicBlock succ, BasicBlock pred) { succBB(pred, succ) }
/** Holds if `bb` is an exit basic block that represents normal exit. */
private predicate normalExitBB(BasicBlock bb) { bb.getANode().(AnnotatedExitNode).isNormal() }
/** Holds if `dom` is an immediate post-dominator of `bb`. */
cached
predicate bbIPostDominates(BasicBlock dom, BasicBlock bb) =
idominance(normalExitBB/1, predBB/2)(_, dom, bb)
/**
* Gets the `i`th predecessor of join block `jb`, with respect to some
* arbitrary order.
*/
cached
JoinBlockPredecessor getJoinBlockPredecessor(JoinBlock jb, int i) {
result =
rank[i + 1](JoinBlockPredecessor jbp |
jbp = jb.getAPredecessor()
|
jbp order by JoinBlockPredecessors::getId(jbp), JoinBlockPredecessors::getSplitString(jbp)
)
}
}
private import Cached
/** Holds if `bb` is an entry basic block. */
private predicate entryBB(BasicBlock bb) { bb.getFirstNode() instanceof EntryNode }
/**
* An entry basic block, that is, a basic block whose first node is
* an entry node.
*/
class EntryBasicBlock extends BasicBlock {
EntryBasicBlock() { entryBB(this) }
override CfgScope getScope() { this.getFirstNode() = TEntryNode(result) }
}
/**
* An annotated exit basic block, that is, a basic block whose last node is
* an annotated exit node.
*/
class AnnotatedExitBasicBlock extends BasicBlock {
private boolean normal;
AnnotatedExitBasicBlock() {
exists(AnnotatedExitNode n |
n = this.getANode() and
if n.isNormal() then normal = true else normal = false
)
}
/** Holds if this block represent a normal exit. */
final predicate isNormal() { normal = true }
}
/**
* An exit basic block, that is, a basic block whose last node is
* an exit node.
*/
class ExitBasicBlock extends BasicBlock {
ExitBasicBlock() { this.getLastNode() instanceof ExitNode }
}
private module JoinBlockPredecessors {
private predicate id(Ruby::AstNode x, Ruby::AstNode y) { x = y }
private predicate idOf(Ruby::AstNode x, int y) = equivalenceRelation(id/2)(x, y)
int getId(JoinBlockPredecessor jbp) {
idOf(toGeneratedInclSynth(jbp.getFirstNode().(AstCfgNode).getNode()), result)
or
idOf(toGeneratedInclSynth(jbp.(EntryBasicBlock).getScope()), result)
}
string getSplitString(JoinBlockPredecessor jbp) {
result = jbp.getFirstNode().(AstCfgNode).getSplitsString()
or
not exists(jbp.getFirstNode().(AstCfgNode).getSplitsString()) and
result = ""
}
}
/** A basic block with more than one predecessor. */
class JoinBlock extends BasicBlock {
JoinBlock() { getFirstNode().isJoin() }
/**
* Gets the `i`th predecessor of this join block, with respect to some
* arbitrary order.
*/
JoinBlockPredecessor getJoinBlockPredecessor(int i) { result = getJoinBlockPredecessor(this, i) }
}
/** A basic block that is an immediate predecessor of a join block. */
class JoinBlockPredecessor extends BasicBlock {
JoinBlockPredecessor() { this.getASuccessor() instanceof JoinBlock }
}
/** A basic block that terminates in a condition, splitting the subsequent control flow. */
class ConditionBlock extends BasicBlock {
ConditionBlock() { this.getLastNode().isCondition() }
/**
* Holds if basic block `succ` is immediately controlled by this basic
* block with conditional value `s`. That is, `succ` is an immediate
* successor of this block, and `succ` can only be reached from
* the callable entry point by going via the `s` edge out of this basic block.
*/
pragma[nomagic]
predicate immediatelyControls(BasicBlock succ, BooleanSuccessor s) {
succ = this.getASuccessor(s) and
forall(BasicBlock pred | pred = succ.getAPredecessor() and pred != this | succ.dominates(pred))
}
/**
* Holds if basic block `controlled` is controlled by this basic block with
* conditional value `s`. That is, `controlled` can only be reached from
* the callable entry point by going via the `s` edge out of this basic block.
*/
predicate controls(BasicBlock controlled, BooleanSuccessor s) {
exists(BasicBlock succ | this.immediatelyControls(succ, s) | succ.dominates(controlled))
}
}

View File

@@ -0,0 +1,484 @@
/** Provides classes representing nodes in a control flow graph. */
private import codeql.ruby.AST
private import codeql.ruby.controlflow.BasicBlocks
private import codeql.ruby.dataflow.SSA
private import ControlFlowGraph
private import internal.ControlFlowGraphImpl
private import internal.Splitting
/** An entry node for a given scope. */
class EntryNode extends CfgNode, TEntryNode {
private CfgScope scope;
EntryNode() { this = TEntryNode(scope) }
final override EntryBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() }
final override Location getLocation() { result = scope.getLocation() }
final override string toString() { result = "enter " + scope }
}
/** An exit node for a given scope, annotated with the type of exit. */
class AnnotatedExitNode extends CfgNode, TAnnotatedExitNode {
private CfgScope scope;
private boolean normal;
AnnotatedExitNode() { this = TAnnotatedExitNode(scope, normal) }
/** Holds if this node represent a normal exit. */
final predicate isNormal() { normal = true }
final override AnnotatedExitBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() }
final override Location getLocation() { result = scope.getLocation() }
final override string toString() {
exists(string s |
normal = true and s = "normal"
or
normal = false and s = "abnormal"
|
result = "exit " + scope + " (" + s + ")"
)
}
}
/** An exit node for a given scope. */
class ExitNode extends CfgNode, TExitNode {
private CfgScope scope;
ExitNode() { this = TExitNode(scope) }
final override Location getLocation() { result = scope.getLocation() }
final override string toString() { result = "exit " + scope }
}
/**
* A node for an AST node.
*
* Each AST node maps to zero or more `AstCfgNode`s: zero when the node in unreachable
* (dead) code or not important for control flow, and multiple when there are different
* splits for the AST node.
*/
class AstCfgNode extends CfgNode, TElementNode {
private Splits splits;
private AstNode n;
AstCfgNode() { this = TElementNode(n, splits) }
final override AstNode getNode() { result = n }
override Location getLocation() { result = n.getLocation() }
final override string toString() {
exists(string s | s = n.(AstNode).toString() |
result = "[" + this.getSplitsString() + "] " + s
or
not exists(this.getSplitsString()) and result = s
)
}
/** Gets a comma-separated list of strings for each split in this node, if any. */
final string getSplitsString() {
result = splits.toString() and
result != ""
}
/** Gets a split for this control flow node, if any. */
final Split getASplit() { result = splits.getASplit() }
}
/** A control-flow node that wraps an AST expression. */
class ExprCfgNode extends AstCfgNode {
Expr e;
ExprCfgNode() { e = this.getNode() }
/** Gets the underlying expression. */
Expr getExpr() { result = e }
private ExprCfgNode getSource() {
exists(Ssa::WriteDefinition def |
def.assigns(result) and
this = def.getARead()
)
}
/** Gets the textual (constant) value of this expression, if any. */
string getValueText() { result = this.getSource().getValueText() }
}
/** A control-flow node that wraps a return-like statement. */
class ReturningCfgNode extends AstCfgNode {
ReturningStmt s;
ReturningCfgNode() { s = this.getNode() }
/** Gets the node of the returned value, if any. */
ExprCfgNode getReturnedValueNode() {
result = this.getAPredecessor() and
result.getNode() = s.getValue()
}
}
/** A control-flow node that wraps a `StringComponent` AST expression. */
class StringComponentCfgNode extends AstCfgNode {
StringComponentCfgNode() { this.getNode() instanceof StringComponent }
}
private Expr desugar(Expr n) {
result = n.getDesugared()
or
not exists(n.getDesugared()) and
result = n
}
/**
* A class for mapping parent-child AST nodes to parent-child CFG nodes.
*/
abstract private class ExprChildMapping extends Expr {
/**
* Holds if `child` is a (possibly nested) child of this expression
* for which we would like to find a matching CFG child.
*/
abstract predicate relevantChild(Expr child);
pragma[nomagic]
private predicate reachesBasicBlock(Expr child, CfgNode cfn, BasicBlock bb) {
this.relevantChild(child) and
cfn = this.getAControlFlowNode() and
bb.getANode() = cfn
or
exists(BasicBlock mid |
this.reachesBasicBlock(child, cfn, mid) and
bb = mid.getAPredecessor() and
not mid.getANode().getNode() = child
)
}
/**
* Holds if there is a control-flow path from `cfn` to `cfnChild`, where `cfn`
* is a control-flow node for this expression, and `cfnChild` is a control-flow
* node for `child`.
*
* The path never escapes the syntactic scope of this expression.
*/
cached
predicate hasCfgChild(Expr child, CfgNode cfn, CfgNode cfnChild) {
this.reachesBasicBlock(child, cfn, cfnChild.getBasicBlock()) and
cfnChild = desugar(child).getAControlFlowNode()
}
}
/** Provides classes for control-flow nodes that wrap AST expressions. */
module ExprNodes {
private class LiteralChildMapping extends ExprChildMapping, Literal {
override predicate relevantChild(Expr e) { none() }
}
/** A control-flow node that wraps an `ArrayLiteral` AST expression. */
class LiteralCfgNode extends ExprCfgNode {
override LiteralChildMapping e;
override Literal getExpr() { result = super.getExpr() }
override string getValueText() { result = e.getValueText() }
}
private class AssignExprChildMapping extends ExprChildMapping, AssignExpr {
override predicate relevantChild(Expr e) { e = this.getAnOperand() }
}
/** A control-flow node that wraps an `AssignExpr` AST expression. */
class AssignExprCfgNode extends ExprCfgNode {
override AssignExprChildMapping e;
final override AssignExpr getExpr() { result = ExprCfgNode.super.getExpr() }
/** Gets the LHS of this assignment. */
final ExprCfgNode getLhs() { e.hasCfgChild(e.getLeftOperand(), this, result) }
/** Gets the RHS of this assignment. */
final ExprCfgNode getRhs() { e.hasCfgChild(e.getRightOperand(), this, result) }
}
private class OperationExprChildMapping extends ExprChildMapping, Operation {
override predicate relevantChild(Expr e) { e = this.getAnOperand() }
}
/** A control-flow node that wraps an `Operation` AST expression. */
class OperationCfgNode extends ExprCfgNode {
override OperationExprChildMapping e;
override Operation getExpr() { result = super.getExpr() }
/** Gets an operand of this operation. */
final ExprCfgNode getAnOperand() { e.hasCfgChild(e.getAnOperand(), this, result) }
}
/** A control-flow node that wraps a `BinaryOperation` AST expression. */
class BinaryOperationCfgNode extends OperationCfgNode {
private BinaryOperation bo;
BinaryOperationCfgNode() { e = bo }
override BinaryOperation getExpr() { result = super.getExpr() }
/** Gets the left operand of this binary operation. */
final ExprCfgNode getLeftOperand() { e.hasCfgChild(bo.getLeftOperand(), this, result) }
/** Gets the right operand of this binary operation. */
final ExprCfgNode getRightOperand() { e.hasCfgChild(bo.getRightOperand(), this, result) }
final override string getValueText() {
exists(string left, string right, string op |
left = this.getLeftOperand().getValueText() and
right = this.getRightOperand().getValueText() and
op = this.getExpr().getOperator()
|
op = "+" and
(
result = (left.toInt() + right.toInt()).toString()
or
not (exists(left.toInt()) and exists(right.toInt())) and
result = (left.toFloat() + right.toFloat()).toString()
or
not (exists(left.toFloat()) and exists(right.toFloat())) and
result = left + right
)
or
op = "-" and
(
result = (left.toInt() - right.toInt()).toString()
or
not (exists(left.toInt()) and exists(right.toInt())) and
result = (left.toFloat() - right.toFloat()).toString()
)
or
op = "*" and
(
result = (left.toInt() * right.toInt()).toString()
or
not (exists(left.toInt()) and exists(right.toInt())) and
result = (left.toFloat() * right.toFloat()).toString()
)
or
op = "/" and
(
result = (left.toInt() / right.toInt()).toString()
or
not (exists(left.toInt()) and exists(right.toInt())) and
result = (left.toFloat() / right.toFloat()).toString()
)
)
}
}
private class BlockArgumentChildMapping extends ExprChildMapping, BlockArgument {
override predicate relevantChild(Expr e) { e = this.getValue() }
}
/** A control-flow node that wraps a `BlockArgument` AST expression. */
class BlockArgumentCfgNode extends ExprCfgNode {
override BlockArgumentChildMapping e;
final override BlockArgument getExpr() { result = ExprCfgNode.super.getExpr() }
/** Gets the value of this block argument. */
final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) }
}
private class CallExprChildMapping extends ExprChildMapping, Call {
override predicate relevantChild(Expr e) {
e = [this.getAnArgument(), this.(MethodCall).getReceiver(), this.(MethodCall).getBlock()]
}
}
/** A control-flow node that wraps a `Call` AST expression. */
class CallCfgNode extends ExprCfgNode {
override CallExprChildMapping e;
override Call getExpr() { result = super.getExpr() }
/** Gets the `n`th argument of this call. */
final ExprCfgNode getArgument(int n) { e.hasCfgChild(e.getArgument(n), this, result) }
/** Gets the the keyword argument whose key is `keyword` of this call. */
final ExprCfgNode getKeywordArgument(string keyword) {
e.hasCfgChild(e.getKeywordArgument(keyword), this, result)
}
/** Gets the number of arguments of this call. */
final int getNumberOfArguments() { result = e.getNumberOfArguments() }
/** Gets the receiver of this call. */
final ExprCfgNode getReceiver() { e.hasCfgChild(e.(MethodCall).getReceiver(), this, result) }
/** Gets the block of this call. */
final ExprCfgNode getBlock() { e.hasCfgChild(e.(MethodCall).getBlock(), this, result) }
}
private class CaseExprChildMapping extends ExprChildMapping, CaseExpr {
override predicate relevantChild(Expr e) { e = this.getValue() or e = this.getBranch(_) }
}
/** A control-flow node that wraps a `MethodCall` AST expression. */
class MethodCallCfgNode extends CallCfgNode {
MethodCallCfgNode() { super.getExpr() instanceof MethodCall }
override MethodCall getExpr() { result = super.getExpr() }
}
/** A control-flow node that wraps a `CaseExpr` AST expression. */
class CaseExprCfgNode extends ExprCfgNode {
override CaseExprChildMapping e;
final override CaseExpr getExpr() { result = ExprCfgNode.super.getExpr() }
/** Gets the expression being compared, if any. */
final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) }
/**
* Gets the `n`th branch of this case expression.
*/
final ExprCfgNode getBranch(int n) { e.hasCfgChild(e.getBranch(n), this, result) }
}
private class ConditionalExprChildMapping extends ExprChildMapping, ConditionalExpr {
override predicate relevantChild(Expr e) { e = this.getCondition() or e = this.getBranch(_) }
}
/** A control-flow node that wraps a `ConditionalExpr` AST expression. */
class ConditionalExprCfgNode extends ExprCfgNode {
override ConditionalExprChildMapping e;
final override ConditionalExpr getExpr() { result = ExprCfgNode.super.getExpr() }
/** Gets the condition expression. */
final ExprCfgNode getCondition() { e.hasCfgChild(e.getCondition(), this, result) }
/**
* Gets the branch of this conditional expression that is taken when the condition
* evaluates to cond, if any.
*/
final ExprCfgNode getBranch(boolean cond) { e.hasCfgChild(e.getBranch(cond), this, result) }
}
private class ConstantAccessChildMapping extends ExprChildMapping, ConstantAccess {
override predicate relevantChild(Expr e) { e = this.getScopeExpr() }
}
/** A control-flow node that wraps a `ConditionalExpr` AST expression. */
class ConstantAccessCfgNode extends ExprCfgNode {
override ConstantAccessChildMapping e;
final override ConstantAccess getExpr() { result = super.getExpr() }
/** Gets the scope expression. */
final ExprCfgNode getScopeExpr() { e.hasCfgChild(e.getScopeExpr(), this, result) }
}
private class StmtSequenceChildMapping extends ExprChildMapping, StmtSequence {
override predicate relevantChild(Expr e) { e = this.getLastStmt() }
}
/** A control-flow node that wraps a `StmtSequence` AST expression. */
class StmtSequenceCfgNode extends ExprCfgNode {
override StmtSequenceChildMapping e;
final override StmtSequence getExpr() { result = ExprCfgNode.super.getExpr() }
/** Gets the last statement in this sequence, if any. */
final ExprCfgNode getLastStmt() { e.hasCfgChild(e.getLastStmt(), this, result) }
}
private class ForExprChildMapping extends ExprChildMapping, ForExpr {
override predicate relevantChild(Expr e) { e = this.getValue() }
}
/** A control-flow node that wraps a `ForExpr` AST expression. */
class ForExprCfgNode extends ExprCfgNode {
override ForExprChildMapping e;
final override ForExpr getExpr() { result = ExprCfgNode.super.getExpr() }
/** Gets the value being iterated over. */
final ExprCfgNode getValue() { e.hasCfgChild(e.getValue(), this, result) }
}
/** A control-flow node that wraps a `ParenthesizedExpr` AST expression. */
class ParenthesizedExprCfgNode extends StmtSequenceCfgNode {
ParenthesizedExprCfgNode() { this.getExpr() instanceof ParenthesizedExpr }
}
/** A control-flow node that wraps a `VariableReadAccess` AST expression. */
class VariableReadAccessCfgNode extends ExprCfgNode {
override VariableReadAccess e;
final override VariableReadAccess getExpr() { result = ExprCfgNode.super.getExpr() }
}
/** A control-flow node that wraps a `InstanceVariableWriteAccess` AST expression. */
class InstanceVariableWriteAccessCfgNode extends ExprCfgNode {
override InstanceVariableWriteAccess e;
final override InstanceVariableWriteAccess getExpr() { result = ExprCfgNode.super.getExpr() }
}
/** A control-flow node that wraps a `StringInterpolationComponent` AST expression. */
class StringInterpolationComponentCfgNode extends StmtSequenceCfgNode {
StringInterpolationComponentCfgNode() { this.getNode() instanceof StringInterpolationComponent }
}
private class StringlikeLiteralChildMapping extends ExprChildMapping, StringlikeLiteral {
override predicate relevantChild(Expr e) { e = this.getComponent(_) }
}
/** A control-flow node that wraps a `StringlikeLiteral` AST expression. */
class StringlikeLiteralCfgNode extends ExprCfgNode {
override StringlikeLiteralChildMapping e;
final override StringlikeLiteral getExpr() { result = super.getExpr() }
/** Gets a component of this `StringlikeLiteral` */
StringComponentCfgNode getAComponent() { e.hasCfgChild(e.getComponent(_), this, result) }
}
/** A control-flow node that wraps a `StringLiteral` AST expression. */
class StringLiteralCfgNode extends ExprCfgNode {
override StringLiteral e;
final override StringLiteral getExpr() { result = super.getExpr() }
}
/** A control-flow node that wraps a `RegExpLiteral` AST expression. */
class RegExpLiteralCfgNode extends ExprCfgNode {
override RegExpLiteral e;
final override RegExpLiteral getExpr() { result = super.getExpr() }
}
/** A control-flow node that wraps a `ComparisonOperation` AST expression. */
class ComparisonOperationCfgNode extends BinaryOperationCfgNode {
ComparisonOperationCfgNode() { e instanceof ComparisonOperation }
override ComparisonOperation getExpr() { result = super.getExpr() }
}
/** A control-flow node that wraps a `RelationalOperation` AST expression. */
class RelationalOperationCfgNode extends ComparisonOperationCfgNode {
RelationalOperationCfgNode() { e instanceof RelationalOperation }
final override RelationalOperation getExpr() { result = super.getExpr() }
}
/** A control-flow node that wraps an `ElementReference` AST expression. */
class ElementReferenceCfgNode extends MethodCallCfgNode {
ElementReferenceCfgNode() { e instanceof ElementReference }
final override ElementReference getExpr() { result = super.getExpr() }
}
}

View File

@@ -0,0 +1,341 @@
/** Provides classes representing the control flow graph. */
private import codeql.Locations
private import codeql.ruby.AST
private import codeql.ruby.controlflow.BasicBlocks
private import SuccessorTypes
private import internal.ControlFlowGraphImpl
private import internal.Splitting
private import internal.Completion
/** An AST node with an associated control-flow graph. */
class CfgScope extends Scope instanceof CfgScope::Range_ {
/** Gets the CFG scope that this scope is nested under, if any. */
final CfgScope getOuterCfgScope() {
exists(AstNode parent |
parent = this.getParent() and
result = getCfgScope(parent)
)
}
}
/**
* A control flow node.
*
* A control flow node is a node in the control flow graph (CFG). There is a
* many-to-one relationship between CFG nodes and AST nodes.
*
* Only nodes that can be reached from an entry point are included in the CFG.
*/
class CfgNode extends TNode {
/** Gets a textual representation of this control flow node. */
string toString() { none() }
/** Gets the AST node that this node corresponds to, if any. */
AstNode getNode() { none() }
/** Gets the location of this control flow node. */
Location getLocation() { none() }
/** Gets the file of this control flow node. */
final File getFile() { result = this.getLocation().getFile() }
/** Holds if this control flow node has conditional successors. */
final predicate isCondition() { exists(this.getASuccessor(any(BooleanSuccessor bs))) }
/** Gets the scope of this node. */
final CfgScope getScope() { result = this.getBasicBlock().getScope() }
/** Gets the basic block that this control flow node belongs to. */
BasicBlock getBasicBlock() { result.getANode() = this }
/** Gets a successor node of a given type, if any. */
final CfgNode getASuccessor(SuccessorType t) { result = getASuccessor(this, t) }
/** Gets an immediate successor, if any. */
final CfgNode getASuccessor() { result = this.getASuccessor(_) }
/** Gets an immediate predecessor node of a given flow type, if any. */
final CfgNode getAPredecessor(SuccessorType t) { result.getASuccessor(t) = this }
/** Gets an immediate predecessor, if any. */
final CfgNode getAPredecessor() { result = this.getAPredecessor(_) }
/** Holds if this node has more than one predecessor. */
final predicate isJoin() { strictcount(this.getAPredecessor()) > 1 }
/** Holds if this node has more than one successor. */
final predicate isBranch() { strictcount(this.getASuccessor()) > 1 }
}
/** The type of a control flow successor. */
class SuccessorType extends TSuccessorType {
/** Gets a textual representation of successor type. */
string toString() { none() }
}
/** Provides different types of control flow successor types. */
module SuccessorTypes {
/** A normal control flow successor. */
class NormalSuccessor extends SuccessorType, TSuccessorSuccessor {
final override string toString() { result = "successor" }
}
/**
* A conditional control flow successor. Either a Boolean successor (`BooleanSuccessor`),
* an emptiness successor (`EmptinessSuccessor`), or a matching successor
* (`MatchingSuccessor`)
*/
class ConditionalSuccessor extends SuccessorType {
boolean value;
ConditionalSuccessor() {
this = TBooleanSuccessor(value) or
this = TEmptinessSuccessor(value) or
this = TMatchingSuccessor(value)
}
/** Gets the Boolean value of this successor. */
final boolean getValue() { result = value }
override string toString() { result = getValue().toString() }
}
/**
* A Boolean control flow successor.
*
* For example, in
*
* ```rb
* if x >= 0
* puts "positive"
* else
* puts "negative"
* end
* ```
*
* `x >= 0` has both a `true` successor and a `false` successor.
*/
class BooleanSuccessor extends ConditionalSuccessor, TBooleanSuccessor { }
/**
* An emptiness control flow successor.
*
* For example, this program fragment:
*
* ```rb
* for arg in args do
* puts arg
* end
* puts "done";
* ```
*
* has a control flow graph containing emptiness successors:
*
* ```
* args
* |
* for------<-----
* / \ \
* / \ |
* / \ |
* / \ |
* empty non-empty |
* | \ |
* puts "done" \ |
* arg |
* | |
* puts arg |
* \___/
* ```
*/
class EmptinessSuccessor extends ConditionalSuccessor, TEmptinessSuccessor {
override string toString() { if value = true then result = "empty" else result = "non-empty" }
}
/**
* A matching control flow successor.
*
* For example, this program fragment:
*
* ```rb
* case x
* when 1 then puts "one"
* else puts "not one"
* end
* ```
*
* has a control flow graph containing matching successors:
*
* ```
* x
* |
* 1
* / \
* / \
* / \
* / \
* match non-match
* | |
* puts "one" puts "not one"
* ```
*/
class MatchingSuccessor extends ConditionalSuccessor, TMatchingSuccessor {
override string toString() { if value = true then result = "match" else result = "no-match" }
}
/**
* A `return` control flow successor.
*
* Example:
*
* ```rb
* def sum(x,y)
* return x + y
* end
* ```
*
* The exit node of `sum` is a `return` successor of the `return x + y`
* statement.
*/
class ReturnSuccessor extends SuccessorType, TReturnSuccessor {
final override string toString() { result = "return" }
}
/**
* A `break` control flow successor.
*
* Example:
*
* ```rb
* def m
* while x >= 0
* x -= 1
* if num > 100
* break
* end
* end
* puts "done"
* end
* ```
*
* The node `puts "done"` is `break` successor of the node `break`.
*/
class BreakSuccessor extends SuccessorType, TBreakSuccessor {
final override string toString() { result = "break" }
}
/**
* A `next` control flow successor.
*
* Example:
*
* ```rb
* def m
* while x >= 0
* x -= 1
* if num > 100
* next
* end
* end
* puts "done"
* end
* ```
*
* The node `x >= 0` is `next` successor of the node `next`.
*/
class NextSuccessor extends SuccessorType, TNextSuccessor {
final override string toString() { result = "next" }
}
/**
* A `redo` control flow successor.
*
* Example:
*
* Example:
*
* ```rb
* def m
* while x >= 0
* x -= 1
* if num > 100
* redo
* end
* end
* puts "done"
* end
* ```
*
* The node `x -= 1` is `redo` successor of the node `redo`.
*/
class RedoSuccessor extends SuccessorType, TRedoSuccessor {
final override string toString() { result = "redo" }
}
/**
* A `retry` control flow successor.
*
* Example:
*
* Example:
*
* ```rb
* def m
* begin
* puts "Retry"
* raise
* rescue
* retry
* end
* end
* ```
*
* The node `puts "Retry"` is `retry` successor of the node `retry`.
*/
class RetrySuccessor extends SuccessorType, TRetrySuccessor {
final override string toString() { result = "retry" }
}
/**
* An exceptional control flow successor.
*
* Example:
*
* ```rb
* def m x
* if x > 2
* raise "x > 2"
* end
* puts "x <= 2"
* end
* ```
*
* The exit node of `m` is an exceptional successor of the node
* `raise "x > 2"`.
*/
class RaiseSuccessor extends SuccessorType, TRaiseSuccessor {
final override string toString() { result = "raise" }
}
/**
* An exit control flow successor.
*
* Example:
*
* ```rb
* def m x
* if x > 2
* exit 1
* end
* puts "x <= 2"
* end
* ```
*
* The exit node of `m` is an exit successor of the node
* `exit 1`.
*/
class ExitSuccessor extends SuccessorType, TExitSuccessor {
final override string toString() { result = "exit" }
}
}

View File

@@ -0,0 +1,507 @@
/**
* Provides classes representing control flow completions.
*
* A completion represents how a statement or expression terminates.
*/
private import codeql.ruby.AST
private import codeql.ruby.ast.internal.AST
private import codeql.ruby.controlflow.ControlFlowGraph
private import ControlFlowGraphImpl
private import NonReturning
private import SuccessorTypes
private newtype TCompletion =
TSimpleCompletion() or
TBooleanCompletion(boolean b) { b in [false, true] } or
TEmptinessCompletion(boolean isEmpty) { isEmpty in [false, true] } or
TMatchingCompletion(boolean isMatch) { isMatch in [false, true] } or
TReturnCompletion() or
TBreakCompletion() or
TNextCompletion() or
TRedoCompletion() or
TRetryCompletion() or
TRaiseCompletion() or // TODO: Add exception type?
TExitCompletion() or
TNestedCompletion(Completion inner, Completion outer, int nestLevel) {
inner = TBreakCompletion() and
outer instanceof NonNestedNormalCompletion and
nestLevel = 0
or
inner instanceof NormalCompletion and
nestedEnsureCompletion(outer, nestLevel)
}
pragma[noinline]
private predicate nestedEnsureCompletion(Completion outer, int nestLevel) {
(
outer = TReturnCompletion()
or
outer = TBreakCompletion()
or
outer = TNextCompletion()
or
outer = TRedoCompletion()
or
outer = TRetryCompletion()
or
outer = TRaiseCompletion()
or
outer = TExitCompletion()
) and
nestLevel = any(Trees::BodyStmtTree t).getNestLevel()
}
pragma[noinline]
private predicate completionIsValidForStmt(AstNode n, Completion c) {
n = TForIn(_) and
c instanceof EmptinessCompletion
or
n instanceof BreakStmt and
c = TBreakCompletion()
or
n instanceof NextStmt and
c = TNextCompletion()
or
n instanceof RedoStmt and
c = TRedoCompletion()
or
n instanceof ReturnStmt and
c = TReturnCompletion()
}
/**
* Holds if `c` happens in an exception-aware context, that is, it may be
* `rescue`d or `ensure`d. In such cases, we assume that the target of `c`
* may raise an exception (in addition to evaluating normally).
*/
private predicate mayRaise(Call c) {
exists(Trees::BodyStmtTree bst | c = bst.getBodyChild(_, true).getAChild*() |
exists(bst.getARescue())
or
exists(bst.getEnsure())
)
}
/** A completion of a statement or an expression. */
abstract class Completion extends TCompletion {
/** Holds if this completion is valid for node `n`. */
predicate isValidFor(AstNode n) {
this = n.(NonReturningCall).getACompletion()
or
completionIsValidForStmt(n, this)
or
mustHaveBooleanCompletion(n) and
(
exists(boolean value | isBooleanConstant(n, value) | this = TBooleanCompletion(value))
or
not isBooleanConstant(n, _) and
this = TBooleanCompletion(_)
)
or
mustHaveMatchingCompletion(n) and
this = TMatchingCompletion(_)
or
n = any(RescueModifierExpr parent).getBody() and this = TRaiseCompletion()
or
mayRaise(n) and
this = TRaiseCompletion()
or
not n instanceof NonReturningCall and
not completionIsValidForStmt(n, _) and
not mustHaveBooleanCompletion(n) and
not mustHaveMatchingCompletion(n) and
this = TSimpleCompletion()
}
/**
* Holds if this completion will continue a loop when it is the completion
* of a loop body.
*/
predicate continuesLoop() {
this instanceof NormalCompletion or
this instanceof NextCompletion
}
/**
* Gets the inner completion. This is either the inner completion,
* when the completion is nested, or the completion itself.
*/
Completion getInnerCompletion() { result = this }
/**
* Gets the outer completion. This is either the outer completion,
* when the completion is nested, or the completion itself.
*/
Completion getOuterCompletion() { result = this }
/** Gets a successor type that matches this completion. */
abstract SuccessorType getAMatchingSuccessorType();
/** Gets a textual representation of this completion. */
abstract string toString();
}
/** Holds if node `n` has the Boolean constant value `value`. */
private predicate isBooleanConstant(AstNode n, boolean value) {
mustHaveBooleanCompletion(n) and
(
n.(BooleanLiteral).isTrue() and
value = true
or
n.(BooleanLiteral).isFalse() and
value = false
)
}
/**
* Holds if a normal completion of `n` must be a Boolean completion.
*/
private predicate mustHaveBooleanCompletion(AstNode n) {
inBooleanContext(n) and
not n instanceof NonReturningCall
}
/**
* Holds if `n` is used in a Boolean context. That is, the value
* that `n` evaluates to determines a true/false branch successor.
*/
private predicate inBooleanContext(AstNode n) {
exists(ConditionalExpr i |
n = i.getCondition()
or
inBooleanContext(i) and
n = i.getBranch(_)
)
or
n = any(ConditionalLoop parent).getCondition()
or
exists(LogicalAndExpr parent |
n = parent.getLeftOperand()
or
inBooleanContext(parent) and
n = parent.getRightOperand()
)
or
exists(LogicalOrExpr parent |
n = parent.getLeftOperand()
or
inBooleanContext(parent) and
n = parent.getRightOperand()
)
or
n = any(NotExpr parent | inBooleanContext(parent)).getOperand()
or
n = any(StmtSequence parent | inBooleanContext(parent)).getLastStmt()
or
exists(CaseExpr c, WhenExpr w |
not exists(c.getValue()) and
c.getAWhenBranch() = w and
w.getPattern(_) = n
)
}
/**
* Holds if a normal completion of `n` must be a matching completion.
*/
private predicate mustHaveMatchingCompletion(AstNode n) {
inMatchingContext(n) and
not n instanceof NonReturningCall
}
/**
* Holds if `n` is used in a matching context. That is, whether or
* not the value of `n` matches, determines the successor.
*/
private predicate inMatchingContext(AstNode n) {
n = any(RescueClause r).getException(_)
or
exists(CaseExpr c, WhenExpr w |
exists(c.getValue()) and
c.getAWhenBranch() = w and
w.getPattern(_) = n
)
or
n.(Trees::DefaultValueParameterTree).hasDefaultValue()
}
/**
* A completion that represents normal evaluation of a statement or an
* expression.
*/
abstract class NormalCompletion extends Completion { }
abstract private class NonNestedNormalCompletion extends NormalCompletion { }
/** A simple (normal) completion. */
class SimpleCompletion extends NonNestedNormalCompletion, TSimpleCompletion {
override NormalSuccessor getAMatchingSuccessorType() { any() }
override string toString() { result = "simple" }
}
/**
* A completion that represents evaluation of an expression, whose value determines
* the successor. Either a Boolean completion (`BooleanCompletion`), an emptiness
* completion (`EmptinessCompletion`), or a matching completion (`MatchingCompletion`).
*/
abstract class ConditionalCompletion extends NonNestedNormalCompletion {
boolean value;
bindingset[value]
ConditionalCompletion() { any() }
/** Gets the Boolean value of this conditional completion. */
final boolean getValue() { result = value }
}
/**
* A completion that represents evaluation of an expression
* with a Boolean value.
*/
class BooleanCompletion extends ConditionalCompletion, TBooleanCompletion {
BooleanCompletion() { this = TBooleanCompletion(value) }
/** Gets the dual Boolean completion. */
BooleanCompletion getDual() { result = TBooleanCompletion(value.booleanNot()) }
override BooleanSuccessor getAMatchingSuccessorType() { result.getValue() = value }
override string toString() { result = value.toString() }
}
/** A Boolean `true` completion. */
class TrueCompletion extends BooleanCompletion {
TrueCompletion() { this.getValue() = true }
}
/** A Boolean `false` completion. */
class FalseCompletion extends BooleanCompletion {
FalseCompletion() { this.getValue() = false }
}
/**
* A completion that represents evaluation of an emptiness test, for example
* a test in a `for in` statement.
*/
class EmptinessCompletion extends ConditionalCompletion, TEmptinessCompletion {
EmptinessCompletion() { this = TEmptinessCompletion(value) }
override EmptinessSuccessor getAMatchingSuccessorType() { result.getValue() = value }
override string toString() { if value = true then result = "empty" else result = "non-empty" }
}
/**
* A completion that represents evaluation of a matching test, for example
* a test in a `rescue` statement.
*/
class MatchingCompletion extends ConditionalCompletion, TMatchingCompletion {
MatchingCompletion() { this = TMatchingCompletion(value) }
override MatchingSuccessor getAMatchingSuccessorType() { result.getValue() = value }
override string toString() { if value = true then result = "match" else result = "no-match" }
}
/**
* A completion that represents evaluation of a statement or an
* expression resulting in a return.
*/
class ReturnCompletion extends Completion {
ReturnCompletion() {
this = TReturnCompletion() or
this = TNestedCompletion(_, TReturnCompletion(), _)
}
override ReturnSuccessor getAMatchingSuccessorType() { any() }
override string toString() {
// `NestedCompletion` defines `toString()` for the other case
this = TReturnCompletion() and result = "return"
}
}
/**
* A completion that represents evaluation of a statement or an
* expression resulting in a break from a loop.
*/
class BreakCompletion extends Completion {
BreakCompletion() {
this = TBreakCompletion() or
this = TNestedCompletion(_, TBreakCompletion(), _)
}
override BreakSuccessor getAMatchingSuccessorType() { any() }
override string toString() {
// `NestedCompletion` defines `toString()` for the other case
this = TBreakCompletion() and result = "break"
}
}
/**
* A completion that represents evaluation of a statement or an
* expression resulting in a continuation of a loop.
*/
class NextCompletion extends Completion {
NextCompletion() {
this = TNextCompletion() or
this = TNestedCompletion(_, TNextCompletion(), _)
}
override NextSuccessor getAMatchingSuccessorType() { any() }
override string toString() {
// `NestedCompletion` defines `toString()` for the other case
this = TNextCompletion() and result = "next"
}
}
/**
* A completion that represents evaluation of a statement or an
* expression resulting in a redo of a loop iteration.
*/
class RedoCompletion extends Completion {
RedoCompletion() {
this = TRedoCompletion() or
this = TNestedCompletion(_, TRedoCompletion(), _)
}
override RedoSuccessor getAMatchingSuccessorType() { any() }
override string toString() {
// `NestedCompletion` defines `toString()` for the other case
this = TRedoCompletion() and result = "redo"
}
}
/**
* A completion that represents evaluation of a statement or an
* expression resulting in a retry.
*/
class RetryCompletion extends Completion {
RetryCompletion() {
this = TRetryCompletion() or
this = TNestedCompletion(_, TRetryCompletion(), _)
}
override RetrySuccessor getAMatchingSuccessorType() { any() }
override string toString() {
// `NestedCompletion` defines `toString()` for the other case
this = TRetryCompletion() and result = "retry"
}
}
/**
* A completion that represents evaluation of a statement or an
* expression resulting in a thrown exception.
*/
class RaiseCompletion extends Completion {
RaiseCompletion() {
this = TRaiseCompletion() or
this = TNestedCompletion(_, TRaiseCompletion(), _)
}
override RaiseSuccessor getAMatchingSuccessorType() { any() }
override string toString() {
// `NestedCompletion` defines `toString()` for the other case
this = TRaiseCompletion() and result = "raise"
}
}
/**
* A completion that represents evaluation of a statement or an
* expression resulting in an abort/exit.
*/
class ExitCompletion extends Completion {
ExitCompletion() {
this = TExitCompletion() or
this = TNestedCompletion(_, TExitCompletion(), _)
}
override ExitSuccessor getAMatchingSuccessorType() { any() }
override string toString() {
// `NestedCompletion` defines `toString()` for the other case
this = TExitCompletion() and result = "exit"
}
}
/**
* A nested completion. For example, in
*
* ```rb
* def m
* while x >= 0
* x -= 1
* if num > 100
* break
* end
* end
* puts "done"
* end
* ```
*
* the `while` loop can have a nested completion where the inner completion
* is a `break` and the outer completion is a simple successor.
*/
abstract class NestedCompletion extends Completion, TNestedCompletion {
Completion inner;
Completion outer;
int nestLevel;
NestedCompletion() { this = TNestedCompletion(inner, outer, nestLevel) }
/** Gets a completion that is compatible with the inner completion. */
abstract Completion getAnInnerCompatibleCompletion();
/** Gets the level of this nested completion. */
final int getNestLevel() { result = nestLevel }
override string toString() { result = outer + " [" + inner + "] (" + nestLevel + ")" }
}
class NestedBreakCompletion extends NormalCompletion, NestedCompletion {
NestedBreakCompletion() {
inner = TBreakCompletion() and
outer instanceof NonNestedNormalCompletion
}
override BreakCompletion getInnerCompletion() { result = inner }
override NonNestedNormalCompletion getOuterCompletion() { result = outer }
override Completion getAnInnerCompatibleCompletion() {
result = inner and
outer = TSimpleCompletion()
or
result = TNestedCompletion(outer, inner, _)
}
override SuccessorType getAMatchingSuccessorType() {
outer instanceof SimpleCompletion and
result instanceof BreakSuccessor
or
result = outer.(ConditionalCompletion).getAMatchingSuccessorType()
}
}
class NestedEnsureCompletion extends NestedCompletion {
NestedEnsureCompletion() {
inner instanceof NormalCompletion and
nestedEnsureCompletion(outer, nestLevel)
}
override NormalCompletion getInnerCompletion() { result = inner }
override Completion getOuterCompletion() { result = outer }
override Completion getAnInnerCompatibleCompletion() {
result.getOuterCompletion() = this.getInnerCompletion()
}
override SuccessorType getAMatchingSuccessorType() { none() }
}

View File

@@ -0,0 +1,945 @@
/** Provides language-independent definitions for AST-to-CFG construction. */
private import ControlFlowGraphImplSpecific
/** An element with associated control flow. */
abstract class ControlFlowTree extends ControlFlowTreeBase {
/** Holds if `first` is the first element executed within this element. */
pragma[nomagic]
abstract predicate first(ControlFlowElement first);
/**
* Holds if `last` with completion `c` is a potential last element executed
* within this element.
*/
pragma[nomagic]
abstract predicate last(ControlFlowElement last, Completion c);
/** Holds if abnormal execution of `child` should propagate upwards. */
abstract predicate propagatesAbnormal(ControlFlowElement child);
/**
* Holds if `succ` is a control flow successor for `pred`, given that `pred`
* finishes with completion `c`.
*/
pragma[nomagic]
abstract predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c);
}
/**
* Holds if `first` is the first element executed within control flow
* element `cft`.
*/
predicate first(ControlFlowTree cft, ControlFlowElement first) { cft.first(first) }
/**
* Holds if `last` with completion `c` is a potential last element executed
* within control flow element `cft`.
*/
predicate last(ControlFlowTree cft, ControlFlowElement last, Completion c) {
cft.last(last, c)
or
exists(ControlFlowElement cfe |
cft.propagatesAbnormal(cfe) and
last(cfe, last, c) and
not completionIsNormal(c)
)
}
/**
* Holds if `succ` is a control flow successor for `pred`, given that `pred`
* finishes with completion `c`.
*/
pragma[nomagic]
predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
any(ControlFlowTree cft).succ(pred, succ, c)
}
/** An element that is executed in pre-order. */
abstract class PreOrderTree extends ControlFlowTree {
final override predicate first(ControlFlowElement first) { first = this }
}
/** An element that is executed in post-order. */
abstract class PostOrderTree extends ControlFlowTree {
override predicate last(ControlFlowElement last, Completion c) {
last = this and
completionIsValidFor(c, last)
}
}
/**
* An element where the children are evaluated following a standard left-to-right
* evaluation. The actual evaluation order is determined by the predicate
* `getChildElement()`.
*/
abstract class StandardTree extends ControlFlowTree {
/** Gets the `i`th child element, in order of evaluation. */
abstract ControlFlowElement getChildElement(int i);
private ControlFlowElement getChildElementRanked(int i) {
result =
rank[i + 1](ControlFlowElement child, int j |
child = this.getChildElement(j)
|
child order by j
)
}
/** Gets the first child node of this element. */
final ControlFlowElement getFirstChildElement() { result = this.getChildElementRanked(0) }
/** Gets the last child node of this node. */
final ControlFlowElement getLastChildElement() {
exists(int last |
result = this.getChildElementRanked(last) and
not exists(this.getChildElementRanked(last + 1))
)
}
/** Holds if this element has no children. */
predicate isLeafElement() { not exists(this.getFirstChildElement()) }
override predicate propagatesAbnormal(ControlFlowElement child) {
child = this.getChildElement(_)
}
pragma[nomagic]
override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
exists(int i |
last(this.getChildElementRanked(i), pred, c) and
completionIsNormal(c) and
first(this.getChildElementRanked(i + 1), succ)
)
}
}
/** A standard element that is executed in pre-order. */
abstract class StandardPreOrderTree extends StandardTree, PreOrderTree {
override predicate last(ControlFlowElement last, Completion c) {
last(this.getLastChildElement(), last, c)
or
this.isLeafElement() and
completionIsValidFor(c, this) and
last = this
}
override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
StandardTree.super.succ(pred, succ, c)
or
pred = this and
first(this.getFirstChildElement(), succ) and
completionIsSimple(c)
}
}
/** A standard element that is executed in post-order. */
abstract class StandardPostOrderTree extends StandardTree, PostOrderTree {
override predicate first(ControlFlowElement first) {
first(this.getFirstChildElement(), first)
or
not exists(this.getFirstChildElement()) and
first = this
}
override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
StandardTree.super.succ(pred, succ, c)
or
last(this.getLastChildElement(), pred, c) and
succ = this and
completionIsNormal(c)
}
}
/** An element that is a leaf in the control flow graph. */
abstract class LeafTree extends PreOrderTree, PostOrderTree {
override predicate propagatesAbnormal(ControlFlowElement child) { none() }
override predicate succ(ControlFlowElement pred, ControlFlowElement succ, Completion c) { none() }
}
/**
* Holds if split kinds `sk1` and `sk2` may overlap. That is, they may apply
* to at least one common AST node inside `scope`.
*/
private predicate overlapping(CfgScope scope, SplitKind sk1, SplitKind sk2) {
exists(ControlFlowElement e |
sk1.appliesTo(e) and
sk2.appliesTo(e) and
scope = getCfgScope(e)
)
}
/**
* A split kind. Each control flow node can have at most one split of a
* given kind.
*/
abstract class SplitKind extends SplitKindBase {
/** Gets a split of this kind. */
SplitImpl getASplit() { result.getKind() = this }
/** Holds if some split of this kind applies to AST node `n`. */
predicate appliesTo(ControlFlowElement n) { this.getASplit().appliesTo(n) }
/**
* Gets a unique integer representing this split kind. The integer is used
* to represent sets of splits as ordered lists.
*/
abstract int getListOrder();
/** Gets the rank of this split kind among all overlapping kinds for `c`. */
private int getRank(CfgScope scope) {
this = rank[result](SplitKind sk | overlapping(scope, this, sk) | sk order by sk.getListOrder())
}
/**
* Holds if this split kind is enabled for AST node `n`. For performance reasons,
* the number of splits is restricted by the `maxSplits()` predicate.
*/
predicate isEnabled(ControlFlowElement n) {
this.appliesTo(n) and
this.getRank(getCfgScope(n)) <= maxSplits()
}
/**
* Gets the rank of this split kind among all the split kinds that apply to
* AST node `n`. The rank is based on the order defined by `getListOrder()`.
*/
int getListRank(ControlFlowElement n) {
this.isEnabled(n) and
this = rank[result](SplitKind sk | sk.appliesTo(n) | sk order by sk.getListOrder())
}
/** Gets a textual representation of this split kind. */
abstract string toString();
}
/** Provides the interface for implementing an entity to split on. */
abstract class SplitImpl extends Split {
/** Gets the kind of this split. */
abstract SplitKind getKind();
/**
* Holds if this split is entered when control passes from `pred` to `succ` with
* completion `c`.
*
* Invariant: `hasEntry(pred, succ, c) implies succ(pred, succ, c)`.
*/
abstract predicate hasEntry(ControlFlowElement pred, ControlFlowElement succ, Completion c);
/**
* Holds if this split is entered when control passes from `scope` to the entry point
* `first`.
*
* Invariant: `hasEntryScope(scope, first) implies scopeFirst(scope, first)`.
*/
abstract predicate hasEntryScope(CfgScope scope, ControlFlowElement first);
/**
* Holds if this split is left when control passes from `pred` to `succ` with
* completion `c`.
*
* Invariant: `hasExit(pred, succ, c) implies succ(pred, succ, c)`.
*/
abstract predicate hasExit(ControlFlowElement pred, ControlFlowElement succ, Completion c);
/**
* Holds if this split is left when control passes from `last` out of the enclosing
* scope `scope` with completion `c`.
*
* Invariant: `hasExitScope(scope, last, c) implies scopeLast(scope, last, c)`
*/
abstract predicate hasExitScope(CfgScope scope, ControlFlowElement last, Completion c);
/**
* Holds if this split is maintained when control passes from `pred` to `succ` with
* completion `c`.
*
* Invariant: `hasSuccessor(pred, succ, c) implies succ(pred, succ, c)`
*/
abstract predicate hasSuccessor(ControlFlowElement pred, ControlFlowElement succ, Completion c);
/** Holds if this split applies to control flow element `cfe`. */
final predicate appliesTo(ControlFlowElement cfe) {
this.hasEntry(_, cfe, _)
or
this.hasEntryScope(_, cfe)
or
exists(ControlFlowElement pred | this.appliesTo(pred) | this.hasSuccessor(pred, cfe, _))
}
/** The `succ` relation restricted to predecessors `pred` that this split applies to. */
pragma[noinline]
final predicate appliesSucc(ControlFlowElement pred, ControlFlowElement succ, Completion c) {
this.appliesTo(pred) and
succ(pred, succ, c)
}
}
/**
* A set of control flow node splits. The set is represented by a list of splits,
* ordered by ascending rank.
*/
class Splits extends TSplits {
/** Gets a textual representation of this set of splits. */
string toString() { result = splitsToString(this) }
/** Gets a split belonging to this set of splits. */
SplitImpl getASplit() {
exists(SplitImpl head, Splits tail | this = TSplitsCons(head, tail) |
result = head
or
result = tail.getASplit()
)
}
}
private predicate succEntrySplitsFromRank(
CfgScope pred, ControlFlowElement succ, Splits splits, int rnk
) {
splits = TSplitsNil() and
scopeFirst(pred, succ) and
rnk = 0
or
exists(SplitImpl head, Splits tail | succEntrySplitsCons(pred, succ, head, tail, rnk) |
splits = TSplitsCons(head, tail)
)
}
private predicate succEntrySplitsCons(
CfgScope pred, ControlFlowElement succ, SplitImpl head, Splits tail, int rnk
) {
succEntrySplitsFromRank(pred, succ, tail, rnk - 1) and
head.hasEntryScope(pred, succ) and
rnk = head.getKind().getListRank(succ)
}
/**
* Holds if `succ` with splits `succSplits` is the first element that is executed
* when entering callable `pred`.
*/
pragma[noinline]
private predicate succEntrySplits(
CfgScope pred, ControlFlowElement succ, Splits succSplits, SuccessorType t
) {
exists(int rnk |
scopeFirst(pred, succ) and
successorTypeIsSimple(t) and
succEntrySplitsFromRank(pred, succ, succSplits, rnk)
|
rnk = 0 and
not any(SplitImpl split).hasEntryScope(pred, succ)
or
rnk = max(SplitImpl split | split.hasEntryScope(pred, succ) | split.getKind().getListRank(succ))
)
}
/**
* Holds if `pred` with splits `predSplits` can exit the enclosing callable
* `succ` with type `t`.
*/
private predicate succExitSplits(
ControlFlowElement pred, Splits predSplits, CfgScope succ, SuccessorType t
) {
exists(Reachability::SameSplitsBlock b, Completion c | pred = b.getAnElement() |
b.isReachable(predSplits) and
t = getAMatchingSuccessorType(c) and
scopeLast(succ, pred, c) and
forall(SplitImpl predSplit | predSplit = predSplits.getASplit() |
predSplit.hasExitScope(succ, pred, c)
)
)
}
/**
* Provides a predicate for the successor relation with split information,
* as well as logic used to construct the type `TSplits` representing sets
* of splits. Only sets of splits that can be reached are constructed, hence
* the predicates are mutually recursive.
*
* For the successor relation
*
* ```ql
* succSplits(ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits, Completion c)
* ```
*
* the following invariants are maintained:
*
* 1. `pred` is reachable with split set `predSplits`.
* 2. For all `split` in `predSplits`:
* - If `split.hasSuccessor(pred, succ, c)` then `split` in `succSplits`.
* 3. For all `split` in `predSplits`:
* - If `split.hasExit(pred, succ, c)` and not `split.hasEntry(pred, succ, c)` then
* `split` not in `succSplits`.
* 4. For all `split` with kind not in `predSplits`:
* - If `split.hasEntry(pred, succ, c)` then `split` in `succSplits`.
* 5. For all `split` in `succSplits`:
* - `split.hasSuccessor(pred, succ, c)` and `split` in `predSplits`, or
* - `split.hasEntry(pred, succ, c)`.
*
* The algorithm divides into four cases:
*
* 1. The set of splits for the successor is the same as the set of splits
* for the predecessor:
* a) The successor is in the same `SameSplitsBlock` as the predecessor.
* b) The successor is *not* in the same `SameSplitsBlock` as the predecessor.
* 2. The set of splits for the successor is different from the set of splits
* for the predecessor:
* a) The set of splits for the successor is *maybe* non-empty.
* b) The set of splits for the successor is *always* empty.
*
* Only case 2a may introduce new sets of splits, so only predicates from
* this case are used in the definition of `TSplits`.
*
* The predicates in this module are named after the cases above.
*/
private module SuccSplits {
private predicate succInvariant1(
Reachability::SameSplitsBlock b, ControlFlowElement pred, Splits predSplits,
ControlFlowElement succ, Completion c
) {
pred = b.getAnElement() and
b.isReachable(predSplits) and
succ(pred, succ, c)
}
private predicate case1b0(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
) {
exists(Reachability::SameSplitsBlock b |
// Invariant 1
succInvariant1(b, pred, predSplits, succ, c)
|
(succ = b.getAnElement() implies succ = b) and
// Invariant 4
not exists(SplitImpl split | split.hasEntry(pred, succ, c))
)
}
/**
* Case 1b.
*
* Invariants 1 and 4 hold in the base case, and invariants 2, 3, and 5 are
* maintained for all splits in `predSplits` (= `succSplits`), except
* possibly for the splits in `except`.
*
* The predicate is written using explicit recursion, as opposed to a `forall`,
* to avoid negative recursion.
*/
private predicate case1bForall(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, Splits except
) {
case1b0(pred, predSplits, succ, c) and
except = predSplits
or
exists(SplitImpl split |
case1bForallCons(pred, predSplits, succ, c, split, except) and
split.hasSuccessor(pred, succ, c)
)
}
pragma[noinline]
private predicate case1bForallCons(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c,
SplitImpl exceptHead, Splits exceptTail
) {
case1bForall(pred, predSplits, succ, c, TSplitsCons(exceptHead, exceptTail))
}
private predicate case1(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
) {
// Case 1a
exists(Reachability::SameSplitsBlock b | succInvariant1(b, pred, predSplits, succ, c) |
succ = b.getAnElement() and
not succ = b
)
or
// Case 1b
case1bForall(pred, predSplits, succ, c, TSplitsNil())
}
pragma[noinline]
private SplitImpl succInvariant1GetASplit(
Reachability::SameSplitsBlock b, ControlFlowElement pred, Splits predSplits,
ControlFlowElement succ, Completion c
) {
succInvariant1(b, pred, predSplits, succ, c) and
result = predSplits.getASplit()
}
private predicate case2aux(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
) {
exists(Reachability::SameSplitsBlock b |
succInvariant1(b, pred, predSplits, succ, c) and
(succ = b.getAnElement() implies succ = b)
|
succInvariant1GetASplit(b, pred, predSplits, succ, c).hasExit(pred, succ, c)
or
any(SplitImpl split).hasEntry(pred, succ, c)
)
}
/**
* Holds if `succSplits` should not inherit a split of kind `sk` from
* `predSplits`, except possibly because of a split in `except`.
*
* The predicate is written using explicit recursion, as opposed to a `forall`,
* to avoid negative recursion.
*/
private predicate case2aNoneInheritedOfKindForall(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk,
Splits except
) {
case2aux(pred, predSplits, succ, c) and
sk.appliesTo(succ) and
except = predSplits
or
exists(Splits mid, SplitImpl split |
case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk, mid) and
mid = TSplitsCons(split, except)
|
split.getKind() = any(SplitKind sk0 | sk0 != sk and sk0.appliesTo(succ))
or
split.hasExit(pred, succ, c)
)
}
pragma[nomagic]
private predicate entryOfKind(
ControlFlowElement pred, ControlFlowElement succ, Completion c, SplitImpl split, SplitKind sk
) {
split.hasEntry(pred, succ, c) and
sk = split.getKind()
}
/** Holds if `succSplits` should not have a split of kind `sk`. */
pragma[nomagic]
private predicate case2aNoneOfKind(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk
) {
// None inherited from predecessor
case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk, TSplitsNil()) and
// None newly entered into
not entryOfKind(pred, succ, c, _, sk)
}
/** Holds if `succSplits` should not have a split of kind `sk` at rank `rnk`. */
pragma[nomagic]
private predicate case2aNoneAtRank(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk
) {
exists(SplitKind sk | case2aNoneOfKind(pred, predSplits, succ, c, sk) |
rnk = sk.getListRank(succ)
)
}
pragma[nomagic]
private SplitImpl case2auxGetAPredecessorSplit(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c
) {
case2aux(pred, predSplits, succ, c) and
result = predSplits.getASplit()
}
/** Gets a split that should be in `succSplits`. */
pragma[nomagic]
private SplitImpl case2aSome(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, SplitKind sk
) {
(
// Inherited from predecessor
result = case2auxGetAPredecessorSplit(pred, predSplits, succ, c) and
result.hasSuccessor(pred, succ, c)
or
// Newly entered into
exists(SplitKind sk0 |
case2aNoneInheritedOfKindForall(pred, predSplits, succ, c, sk0, TSplitsNil())
|
entryOfKind(pred, succ, c, result, sk0)
)
) and
sk = result.getKind()
}
/** Gets a split that should be in `succSplits` at rank `rnk`. */
pragma[nomagic]
SplitImpl case2aSomeAtRank(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk
) {
exists(SplitKind sk | result = case2aSome(pred, predSplits, succ, c, sk) |
rnk = sk.getListRank(succ)
)
}
/**
* Case 2a.
*
* As opposed to the other cases, in this case we need to construct a new set
* of splits `succSplits`. Since this involves constructing the very IPA type,
* we cannot recurse directly over the structure of `succSplits`. Instead, we
* recurse over the ranks of all splits that *might* be in `succSplits`.
*
* - Invariant 1 holds in the base case,
* - invariant 2 holds for all splits with rank at least `rnk`,
* - invariant 3 holds for all splits in `predSplits`,
* - invariant 4 holds for all splits in `succSplits` with rank at least `rnk`,
* and
* - invariant 4 holds for all splits in `succSplits` with rank at least `rnk`.
*/
predicate case2aFromRank(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
Completion c, int rnk
) {
case2aux(pred, predSplits, succ, c) and
succSplits = TSplitsNil() and
rnk = max(any(SplitKind sk).getListRank(succ)) + 1
or
case2aFromRank(pred, predSplits, succ, succSplits, c, rnk + 1) and
case2aNoneAtRank(pred, predSplits, succ, c, rnk)
or
exists(Splits mid, SplitImpl split | split = case2aCons(pred, predSplits, succ, mid, c, rnk) |
succSplits = TSplitsCons(split, mid)
)
}
pragma[noinline]
private SplitImpl case2aCons(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
Completion c, int rnk
) {
case2aFromRank(pred, predSplits, succ, succSplits, c, rnk + 1) and
result = case2aSomeAtRank(pred, predSplits, succ, c, rnk)
}
/**
* Case 2b.
*
* Invariants 1, 4, and 5 hold in the base case, and invariants 2 and 3 are
* maintained for all splits in `predSplits`, except possibly for the splits
* in `except`.
*
* The predicate is written using explicit recursion, as opposed to a `forall`,
* to avoid negative recursion.
*/
private predicate case2bForall(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, Splits except
) {
// Invariant 1
case2aux(pred, predSplits, succ, c) and
// Invariants 4 and 5
not any(SplitKind sk).appliesTo(succ) and
except = predSplits
or
exists(SplitImpl split | case2bForallCons(pred, predSplits, succ, c, split, except) |
// Invariants 2 and 3
split.hasExit(pred, succ, c)
)
}
pragma[noinline]
private predicate case2bForallCons(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c,
SplitImpl exceptHead, Splits exceptTail
) {
case2bForall(pred, predSplits, succ, c, TSplitsCons(exceptHead, exceptTail))
}
private predicate case2(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
Completion c
) {
case2aFromRank(pred, predSplits, succ, succSplits, c, 1)
or
case2bForall(pred, predSplits, succ, c, TSplitsNil()) and
succSplits = TSplitsNil()
}
/**
* Holds if `succ` with splits `succSplits` is a successor of type `t` for `pred`
* with splits `predSplits`.
*/
predicate succSplits(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
Completion c
) {
case1(pred, predSplits, succ, c) and
succSplits = predSplits
or
case2(pred, predSplits, succ, succSplits, c)
}
}
import SuccSplits
/** Provides logic for calculating reachable control flow nodes. */
private module Reachability {
/**
* Holds if `cfe` is a control flow element where the set of possible splits may
* be different from the set of possible splits for one of `cfe`'s predecessors.
* That is, `cfe` starts a new block of elements with the same set of splits.
*/
private predicate startsSplits(ControlFlowElement cfe) {
scopeFirst(_, cfe)
or
exists(SplitImpl s |
s.hasEntry(_, cfe, _)
or
s.hasExit(_, cfe, _)
)
or
exists(ControlFlowElement pred, SplitImpl split, Completion c | succ(pred, cfe, c) |
split.appliesTo(pred) and
not split.hasSuccessor(pred, cfe, c)
)
}
private predicate intraSplitsSucc(ControlFlowElement pred, ControlFlowElement succ) {
succ(pred, succ, _) and
not startsSplits(succ)
}
private predicate splitsBlockContains(ControlFlowElement start, ControlFlowElement cfe) =
fastTC(intraSplitsSucc/2)(start, cfe)
/**
* A block of control flow elements where the set of splits is guaranteed
* to remain unchanged, represented by the first element in the block.
*/
class SameSplitsBlock extends ControlFlowElement {
SameSplitsBlock() { startsSplits(this) }
/** Gets a control flow element in this block. */
ControlFlowElement getAnElement() {
splitsBlockContains(this, result)
or
result = this
}
pragma[noinline]
private SameSplitsBlock getASuccessor(Splits predSplits, Splits succSplits) {
exists(ControlFlowElement pred | pred = this.getAnElement() |
succSplits(pred, predSplits, result, succSplits, _)
)
}
/**
* Holds if the elements of this block are reachable from a callable entry
* point, with the splits `splits`.
*/
predicate isReachable(Splits splits) {
// Base case
succEntrySplits(_, this, splits, _)
or
// Recursive case
exists(SameSplitsBlock pred, Splits predSplits | pred.isReachable(predSplits) |
this = pred.getASuccessor(predSplits, splits)
)
}
}
}
cached
private module Cached {
/**
* If needed, call this predicate from `ControlFlowGraphImplSpecific.qll` in order to
* force a stage-dependency on the `ControlFlowGraphImplShared.qll` stage and therby
* collapsing the two stages.
*/
cached
predicate forceCachingInSameStage() { any() }
cached
newtype TSplits =
TSplitsNil() or
TSplitsCons(SplitImpl head, Splits tail) {
exists(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Completion c, int rnk
|
case2aFromRank(pred, predSplits, succ, tail, c, rnk + 1) and
head = case2aSomeAtRank(pred, predSplits, succ, c, rnk)
)
or
succEntrySplitsCons(_, _, head, tail, _)
}
cached
string splitsToString(Splits splits) {
splits = TSplitsNil() and
result = ""
or
exists(SplitImpl head, Splits tail, string headString, string tailString |
splits = TSplitsCons(head, tail)
|
headString = head.toString() and
tailString = tail.toString() and
if tailString = ""
then result = headString
else
if headString = ""
then result = tailString
else result = headString + ", " + tailString
)
}
/**
* Internal representation of control flow nodes in the control flow graph.
* The control flow graph is pruned for unreachable nodes.
*/
cached
newtype TNode =
TEntryNode(CfgScope scope) { succEntrySplits(scope, _, _, _) } or
TAnnotatedExitNode(CfgScope scope, boolean normal) {
exists(Reachability::SameSplitsBlock b, SuccessorType t | b.isReachable(_) |
succExitSplits(b.getAnElement(), _, scope, t) and
if isAbnormalExitType(t) then normal = false else normal = true
)
} or
TExitNode(CfgScope scope) {
exists(Reachability::SameSplitsBlock b | b.isReachable(_) |
succExitSplits(b.getAnElement(), _, scope, _)
)
} or
TElementNode(ControlFlowElement cfe, Splits splits) {
exists(Reachability::SameSplitsBlock b | b.isReachable(splits) | cfe = b.getAnElement())
}
/** Gets a successor node of a given flow type, if any. */
cached
TNode getASuccessor(TNode pred, SuccessorType t) {
// Callable entry node -> callable body
exists(ControlFlowElement succElement, Splits succSplits, CfgScope scope |
result = TElementNode(succElement, succSplits) and
pred = TEntryNode(scope) and
succEntrySplits(scope, succElement, succSplits, t)
)
or
exists(ControlFlowElement predElement, Splits predSplits |
pred = TElementNode(predElement, predSplits)
|
// Element node -> callable exit (annotated)
exists(CfgScope scope, boolean normal |
result = TAnnotatedExitNode(scope, normal) and
succExitSplits(predElement, predSplits, scope, t) and
if isAbnormalExitType(t) then normal = false else normal = true
)
or
// Element node -> element node
exists(ControlFlowElement succElement, Splits succSplits, Completion c |
result = TElementNode(succElement, succSplits)
|
succSplits(predElement, predSplits, succElement, succSplits, c) and
t = getAMatchingSuccessorType(c)
)
)
or
// Callable exit (annotated) -> callable exit
exists(CfgScope scope |
pred = TAnnotatedExitNode(scope, _) and
result = TExitNode(scope) and
successorTypeIsSimple(t)
)
}
/**
* Gets a first control flow element executed within `cfe`.
*/
cached
ControlFlowElement getAControlFlowEntryNode(ControlFlowElement cfe) { first(cfe, result) }
/**
* Gets a potential last control flow element executed within `cfe`.
*/
cached
ControlFlowElement getAControlFlowExitNode(ControlFlowElement cfe) { last(cfe, result, _) }
}
import Cached
/**
* Import this module into a `.ql` file of `@kind graph` to render a CFG. The
* graph is restricted to nodes from `RelevantNode`.
*/
module TestOutput {
abstract class RelevantNode extends Node { }
query predicate nodes(RelevantNode n, string attr, string val) {
attr = "semmle.order" and
val =
any(int i |
n =
rank[i](RelevantNode p, Location l |
l = p.getLocation()
|
p
order by
l.getFile().getBaseName(), l.getFile().getAbsolutePath(), l.getStartLine(),
l.getStartColumn()
)
).toString()
}
query predicate edges(RelevantNode pred, RelevantNode succ, string attr, string val) {
exists(SuccessorType t | succ = getASuccessor(pred, t) |
attr = "semmle.label" and
if successorTypeIsSimple(t) then val = "" else val = t.toString()
)
}
}
/** Provides a set of splitting-related consistency queries. */
module Consistency {
query predicate nonUniqueSetRepresentation(Splits s1, Splits s2) {
forex(Split s | s = s1.getASplit() | s = s2.getASplit()) and
forex(Split s | s = s2.getASplit() | s = s1.getASplit()) and
s1 != s2
}
query predicate breakInvariant2(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
SplitImpl split, Completion c
) {
succSplits(pred, predSplits, succ, succSplits, c) and
split = predSplits.getASplit() and
split.hasSuccessor(pred, succ, c) and
not split = succSplits.getASplit()
}
query predicate breakInvariant3(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
SplitImpl split, Completion c
) {
succSplits(pred, predSplits, succ, succSplits, c) and
split = predSplits.getASplit() and
split.hasExit(pred, succ, c) and
not split.hasEntry(pred, succ, c) and
split = succSplits.getASplit()
}
query predicate breakInvariant4(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
SplitImpl split, Completion c
) {
succSplits(pred, predSplits, succ, succSplits, c) and
split.hasEntry(pred, succ, c) and
not split.getKind() = predSplits.getASplit().getKind() and
not split = succSplits.getASplit()
}
query predicate breakInvariant5(
ControlFlowElement pred, Splits predSplits, ControlFlowElement succ, Splits succSplits,
SplitImpl split, Completion c
) {
succSplits(pred, predSplits, succ, succSplits, c) and
split = succSplits.getASplit() and
not (split.hasSuccessor(pred, succ, c) and split = predSplits.getASplit()) and
not split.hasEntry(pred, succ, c)
}
query predicate multipleSuccessors(Node node, SuccessorType t, Node successor) {
not node instanceof TEntryNode and
strictcount(getASuccessor(node, t)) > 1 and
successor = getASuccessor(node, t)
}
}

View File

@@ -0,0 +1,74 @@
private import ruby as rb
private import ControlFlowGraphImpl as Impl
private import Completion as Comp
private import codeql.ruby.ast.internal.Synthesis
private import Splitting as Splitting
private import codeql.ruby.CFG as CFG
/** The base class for `ControlFlowTree`. */
class ControlFlowTreeBase extends rb::AstNode {
ControlFlowTreeBase() { not any(Synthesis s).excludeFromControlFlowTree(this) }
}
class ControlFlowElement = rb::AstNode;
class Completion = Comp::Completion;
/**
* Hold if `c` represents normal evaluation of a statement or an
* expression.
*/
predicate completionIsNormal(Completion c) { c instanceof Comp::NormalCompletion }
/**
* Hold if `c` represents simple (normal) evaluation of a statement or an
* expression.
*/
predicate completionIsSimple(Completion c) { c instanceof Comp::SimpleCompletion }
/** Holds if `c` is a valid completion for `e`. */
predicate completionIsValidFor(Completion c, ControlFlowElement e) { c.isValidFor(e) }
class CfgScope = CFG::CfgScope;
predicate getCfgScope = Impl::getCfgScope/1;
/** Holds if `first` is first executed when entering `scope`. */
predicate scopeFirst(CfgScope scope, ControlFlowElement first) {
scope.(Impl::CfgScope::Range_).entry(first)
}
/** Holds if `scope` is exited when `last` finishes with completion `c`. */
predicate scopeLast(CfgScope scope, ControlFlowElement last, Completion c) {
scope.(Impl::CfgScope::Range_).exit(last, c)
}
/** The maximum number of splits allowed for a given node. */
int maxSplits() { result = 5 }
class SplitKindBase = Splitting::TSplitKind;
class Split = Splitting::Split;
class SuccessorType = CFG::SuccessorType;
/** Gets a successor type that matches completion `c`. */
SuccessorType getAMatchingSuccessorType(Completion c) { result = c.getAMatchingSuccessorType() }
/**
* Hold if `c` represents simple (normal) evaluation of a statement or an
* expression.
*/
predicate successorTypeIsSimple(SuccessorType t) {
t instanceof CFG::SuccessorTypes::NormalSuccessor
}
/** Holds if `t` is an abnormal exit type out of a CFG scope. */
predicate isAbnormalExitType(SuccessorType t) {
t instanceof CFG::SuccessorTypes::RaiseSuccessor or
t instanceof CFG::SuccessorTypes::ExitSuccessor
}
class Location = rb::Location;
class Node = CFG::CfgNode;

View File

@@ -0,0 +1,22 @@
/** Provides a simple analysis for identifying calls that will not return. */
private import codeql.ruby.AST
private import Completion
/** A call that definitely does not return (conservative analysis). */
abstract class NonReturningCall extends MethodCall {
/** Gets a valid completion for this non-returning call. */
abstract Completion getACompletion();
}
private class RaiseCall extends NonReturningCall {
RaiseCall() { this.getMethodName() = "raise" }
override RaiseCompletion getACompletion() { not result instanceof NestedCompletion }
}
private class ExitCall extends NonReturningCall {
ExitCall() { this.getMethodName() in ["abort", "exit"] }
override ExitCompletion getACompletion() { not result instanceof NestedCompletion }
}

View File

@@ -0,0 +1,336 @@
/**
* Provides classes and predicates relevant for splitting the control flow graph.
*/
private import codeql.ruby.AST
private import Completion
private import ControlFlowGraphImpl
private import SuccessorTypes
private import codeql.ruby.controlflow.ControlFlowGraph
cached
private module Cached {
cached
newtype TSplitKind =
TConditionalCompletionSplitKind() { forceCachingInSameStage() } or
TEnsureSplitKind(int nestLevel) { nestLevel = any(Trees::BodyStmtTree t).getNestLevel() }
cached
newtype TSplit =
TConditionalCompletionSplit(ConditionalCompletion c) or
TEnsureSplit(EnsureSplitting::EnsureSplitType type, int nestLevel) {
nestLevel = any(Trees::BodyStmtTree t).getNestLevel()
}
}
import Cached
/** A split for a control flow node. */
class Split extends TSplit {
/** Gets a textual representation of this split. */
string toString() { none() }
}
private module ConditionalCompletionSplitting {
/**
* A split for conditional completions. For example, in
*
* ```rb
* def method x
* if x < 2 and x > 0
* puts "x is 1"
* end
* end
* ```
*
* we record whether `x < 2` and `x > 0` evaluate to `true` or `false`, and
* restrict the edges out of `x < 2 and x > 0` accordingly.
*/
class ConditionalCompletionSplit extends Split, TConditionalCompletionSplit {
ConditionalCompletion completion;
ConditionalCompletionSplit() { this = TConditionalCompletionSplit(completion) }
override string toString() { result = completion.toString() }
}
private class ConditionalCompletionSplitKind extends SplitKind, TConditionalCompletionSplitKind {
override int getListOrder() { result = 0 }
override predicate isEnabled(AstNode n) { this.appliesTo(n) }
override string toString() { result = "ConditionalCompletion" }
}
int getNextListOrder() { result = 1 }
private class ConditionalCompletionSplitImpl extends SplitImpl, ConditionalCompletionSplit {
override ConditionalCompletionSplitKind getKind() { any() }
override predicate hasEntry(AstNode pred, AstNode succ, Completion c) {
succ(pred, succ, c) and
last(succ, _, completion) and
(
last(succ.(NotExpr).getOperand(), pred, c) and
completion.(BooleanCompletion).getDual() = c
or
last(succ.(LogicalAndExpr).getAnOperand(), pred, c) and
completion = c
or
last(succ.(LogicalOrExpr).getAnOperand(), pred, c) and
completion = c
or
last(succ.(StmtSequence).getLastStmt(), pred, c) and
completion = c
or
last(succ.(ConditionalExpr).getBranch(_), pred, c) and
completion = c
)
}
override predicate hasEntryScope(CfgScope scope, AstNode succ) { none() }
override predicate hasExit(AstNode pred, AstNode succ, Completion c) {
this.appliesTo(pred) and
succ(pred, succ, c) and
if c instanceof ConditionalCompletion then completion = c else any()
}
override predicate hasExitScope(CfgScope scope, AstNode last, Completion c) {
this.appliesTo(last) and
succExit(scope, last, c) and
if c instanceof ConditionalCompletion then completion = c else any()
}
override predicate hasSuccessor(AstNode pred, AstNode succ, Completion c) { none() }
}
}
module EnsureSplitting {
/**
* The type of a split `ensure` node.
*
* The type represents one of the possible ways of entering an `ensure`
* block. For example, if a block ends with a `return` statement, then
* the `ensure` block must end with a `return` as well (provided that
* the `ensure` block executes normally).
*/
class EnsureSplitType extends SuccessorType {
EnsureSplitType() { not this instanceof ConditionalSuccessor }
/** Holds if this split type matches entry into an `ensure` block with completion `c`. */
predicate isSplitForEntryCompletion(Completion c) {
if c instanceof NormalCompletion
then
// If the entry into the `ensure` block completes with any normal completion,
// it simply means normal execution after the `ensure` block
this instanceof NormalSuccessor
else this = c.getAMatchingSuccessorType()
}
}
/** A node that belongs to an `ensure` block. */
private class EnsureNode extends AstNode {
private Trees::BodyStmtTree block;
EnsureNode() { this = block.getAnEnsureDescendant() }
int getNestLevel() { result = block.getNestLevel() }
/** Holds if this node is the entry node in the `ensure` block it belongs to. */
predicate isEntryNode() { first(block.getEnsure(), this) }
}
/**
* A split for nodes belonging to an `ensure` block, which determines how to
* continue execution after leaving the `ensure` block. For example, in
*
* ```rb
* begin
* if x
* raise "Exception"
* end
* ensure
* puts "Ensure"
* end
* ```
*
* all control flow nodes in the `ensure` block have two splits: one representing
* normal execution of the body (when `x` evaluates to `true`), and one representing
* exceptional execution of the body (when `x` evaluates to `false`).
*/
class EnsureSplit extends Split, TEnsureSplit {
private EnsureSplitType type;
private int nestLevel;
EnsureSplit() { this = TEnsureSplit(type, nestLevel) }
/**
* Gets the type of this `ensure` split, that is, how to continue execution after the
* `ensure` block.
*/
EnsureSplitType getType() { result = type }
/** Gets the nesting level. */
int getNestLevel() { result = nestLevel }
override string toString() {
if type instanceof NormalSuccessor
then result = ""
else
if nestLevel > 0
then result = "ensure(" + nestLevel + "): " + type.toString()
else result = "ensure: " + type.toString()
}
}
private int getListOrder(EnsureSplitKind kind) {
result = ConditionalCompletionSplitting::getNextListOrder() + kind.getNestLevel()
}
int getNextListOrder() {
result = max([getListOrder(_) + 1, ConditionalCompletionSplitting::getNextListOrder()])
}
private class EnsureSplitKind extends SplitKind, TEnsureSplitKind {
private int nestLevel;
EnsureSplitKind() { this = TEnsureSplitKind(nestLevel) }
/** Gets the nesting level. */
int getNestLevel() { result = nestLevel }
override int getListOrder() { result = getListOrder(this) }
override string toString() { result = "ensure (" + nestLevel + ")" }
}
pragma[noinline]
private predicate hasEntry0(AstNode pred, EnsureNode succ, int nestLevel, Completion c) {
succ.isEntryNode() and
nestLevel = succ.getNestLevel() and
succ(pred, succ, c)
}
private class EnsureSplitImpl extends SplitImpl, EnsureSplit {
override EnsureSplitKind getKind() { result.getNestLevel() = this.getNestLevel() }
override predicate hasEntry(AstNode pred, AstNode succ, Completion c) {
hasEntry0(pred, succ, this.getNestLevel(), c) and
this.getType().isSplitForEntryCompletion(c)
}
override predicate hasEntryScope(CfgScope scope, AstNode first) { none() }
/**
* Holds if this split applies to `pred`, where `pred` is a valid predecessor.
*/
private predicate appliesToPredecessor(AstNode pred) {
this.appliesTo(pred) and
(succ(pred, _, _) or succExit(_, pred, _))
}
pragma[noinline]
private predicate exit0(AstNode pred, Trees::BodyStmtTree block, int nestLevel, Completion c) {
this.appliesToPredecessor(pred) and
nestLevel = block.getNestLevel() and
block.lastInner(pred, c)
}
/**
* Holds if `pred` may exit this split with completion `c`. The Boolean
* `inherited` indicates whether `c` is an inherited completion from the
* body.
*/
private predicate exit(Trees::BodyStmtTree block, AstNode pred, Completion c, boolean inherited) {
exists(EnsureSplitType type |
exit0(pred, block, this.getNestLevel(), c) and
type = this.getType()
|
if last(block.getEnsure(), pred, c)
then
// `ensure` block can itself exit with completion `c`: either `c` must
// match this split, `c` must be an abnormal completion, or this split
// does not require another completion to be recovered
inherited = false and
(
type = c.getAMatchingSuccessorType()
or
not c instanceof NormalCompletion
or
type instanceof NormalSuccessor
)
else (
// `ensure` block can exit with inherited completion `c`, which must
// match this split
inherited = true and
type = c.getAMatchingSuccessorType() and
not type instanceof NormalSuccessor
)
)
or
// If this split is normal, and an outer split can exit based on an inherited
// completion, we need to exit this split as well. For example, in
//
// ```rb
// def m(b1, b2)
// if b1
// return
// end
// ensure
// begin
// if b2
// raise "Exception"
// end
// ensure
// puts "inner ensure"
// end
// end
// ```
//
// if the outer split for `puts "inner ensure"` is `return` and the inner split
// is "normal" (corresponding to `b1 = true` and `b2 = false`), then the inner
// split must be able to exit with a `return` completion.
this.appliesToPredecessor(pred) and
exists(EnsureSplitImpl outer |
outer.getNestLevel() = this.getNestLevel() - 1 and
outer.exit(_, pred, c, inherited) and
this.getType() instanceof NormalSuccessor and
inherited = true
)
}
override predicate hasExit(AstNode pred, AstNode succ, Completion c) {
succ(pred, succ, c) and
(
exit(_, pred, c, _)
or
exit(_, pred, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion(), _)
)
}
override predicate hasExitScope(CfgScope scope, AstNode last, Completion c) {
succExit(scope, last, c) and
(
exit(_, last, c, _)
or
exit(_, last, c.(NestedBreakCompletion).getAnInnerCompatibleCompletion(), _)
)
}
override predicate hasSuccessor(AstNode pred, AstNode succ, Completion c) {
this.appliesToPredecessor(pred) and
succ(pred, succ, c) and
succ =
any(EnsureNode en |
if en.isEntryNode()
then
// entering a nested `ensure` block
en.getNestLevel() > this.getNestLevel()
else
// staying in the same (possibly nested) `ensure` block as `pred`
en.getNestLevel() >= this.getNestLevel()
)
}
}
}

View File

@@ -0,0 +1,75 @@
/** Provides commonly used barriers to dataflow. */
private import ruby
private import codeql.ruby.DataFlow
private import codeql.ruby.CFG
/**
* A validation of value by comparing with a constant string value, for example
* in:
*
* ```rb
* dir = params[:order]
* dir = "DESC" unless dir == "ASC"
* User.order("name #{dir}")
* ```
*
* the equality operation guards against `dir` taking arbitrary values when used
* in the `order` call.
*/
class StringConstCompare extends DataFlow::BarrierGuard,
CfgNodes::ExprNodes::ComparisonOperationCfgNode {
private CfgNode checkedNode;
// The value of the condition that results in the node being validated.
private boolean checkedBranch;
StringConstCompare() {
exists(CfgNodes::ExprNodes::StringLiteralCfgNode strLitNode |
this.getExpr() instanceof EqExpr and checkedBranch = true
or
this.getExpr() instanceof CaseEqExpr and checkedBranch = true
or
this.getExpr() instanceof NEExpr and checkedBranch = false
|
this.getLeftOperand() = strLitNode and this.getRightOperand() = checkedNode
or
this.getLeftOperand() = checkedNode and this.getRightOperand() = strLitNode
)
}
override predicate checks(CfgNode expr, boolean branch) {
expr = checkedNode and branch = checkedBranch
}
}
/**
* A validation of a value by checking for inclusion in an array of string
* literal values, for example in:
*
* ```rb
* name = params[:user_name]
* if %w(alice bob charlie).include? name
* User.find_by("username = #{name}")
* end
* ```
*
* the `include?` call guards against `name` taking arbitrary values when used
* in the `find_by` call.
*/
//
class StringConstArrayInclusionCall extends DataFlow::BarrierGuard,
CfgNodes::ExprNodes::MethodCallCfgNode {
private CfgNode checkedNode;
StringConstArrayInclusionCall() {
exists(ArrayLiteral aLit |
this.getExpr().getMethodName() = "include?" and
this.getExpr().getReceiver() = aLit
|
forall(Expr elem | elem = aLit.getAnElement() | elem instanceof StringLiteral) and
this.getArgument(0) = checkedNode
)
}
override predicate checks(CfgNode expr, boolean branch) { expr = checkedNode and branch = true }
}

View File

@@ -0,0 +1,125 @@
/** Provides classes and predicates for defining flow summaries. */
import ruby
import codeql.ruby.DataFlow
private import internal.FlowSummaryImpl as Impl
private import internal.DataFlowDispatch
// import all instances below
private module Summaries { }
class SummaryComponent = Impl::Public::SummaryComponent;
/** Provides predicates for constructing summary components. */
module SummaryComponent {
private import Impl::Public::SummaryComponent as SC
predicate parameter = SC::parameter/1;
predicate argument = SC::argument/1;
predicate content = SC::content/1;
/** Gets a summary component that represents a qualifier. */
SummaryComponent qualifier() { result = argument(-1) }
/** Gets a summary component that represents a block argument. */
SummaryComponent block() { result = argument(-2) }
/** Gets a summary component that represents the return value of a call. */
SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) }
}
class SummaryComponentStack = Impl::Public::SummaryComponentStack;
/** Provides predicates for constructing stacks of summary components. */
module SummaryComponentStack {
private import Impl::Public::SummaryComponentStack as SCS
predicate singleton = SCS::singleton/1;
predicate push = SCS::push/2;
predicate argument = SCS::argument/1;
/** Gets a singleton stack representing a qualifier. */
SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) }
/** Gets a singleton stack representing a block argument. */
SummaryComponentStack block() { result = singleton(SummaryComponent::block()) }
/** Gets a singleton stack representing the return value of a call. */
SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
}
/** A callable with a flow summary, identified by a unique string. */
abstract class SummarizedCallable extends LibraryCallable {
bindingset[this]
SummarizedCallable() { any() }
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step
* or a taint-step.
*
* Input specifications are restricted to stacks that end with
* `SummaryComponent::argument(_)`, preceded by zero or more
* `SummaryComponent::return()` or `SummaryComponent::content(_)` components.
*
* Output specifications are restricted to stacks that end with
* `SummaryComponent::return()` or `SummaryComponent::argument(_)`.
*
* Output stacks ending with `SummaryComponent::return()` can be preceded by zero
* or more `SummaryComponent::content(_)` components.
*
* Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an
* optional `SummaryComponent::parameter(_)` component, which in turn can be preceded
* by zero or more `SummaryComponent::content(_)` components.
*/
pragma[nomagic]
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
none()
}
/**
* Same as
*
* ```ql
* propagatesFlow(
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
* )
* ```
*
* but uses an external (string) representation of the input and output stacks.
*/
pragma[nomagic]
predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() }
/**
* Holds if values stored inside `content` are cleared on objects passed as
* the `i`th argument to this callable.
*/
pragma[nomagic]
predicate clearsContent(int i, DataFlow::Content content) { none() }
}
private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable {
private SummarizedCallable sc;
SummarizedCallableAdapter() { this = TLibraryCallable(sc) }
final override predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
sc.propagatesFlow(input, output, preservesValue)
}
final override predicate clearsContent(int i, DataFlow::Content content) {
sc.clearsContent(i, content)
}
}
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;

View File

@@ -0,0 +1,37 @@
/**
* Provides an extension point for for modeling user-controlled data.
* Such data is often used as data-flow sources in security queries.
*/
private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlow
// Need to import since frameworks can extend `RemoteFlowSource::Range`
private import codeql.ruby.Frameworks
/**
* A data flow source of remote user input.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RemoteFlowSource::Range` instead.
*/
class RemoteFlowSource extends DataFlow::Node {
RemoteFlowSource::Range self;
RemoteFlowSource() { this = self }
/** Gets a string that describes the type of this remote flow source. */
string getSourceType() { result = self.getSourceType() }
}
/** Provides a class for modeling new sources of remote user input. */
module RemoteFlowSource {
/**
* A data flow source of remote user input.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RemoteFlowSource` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets a string that describes the type of this remote flow source. */
abstract string getSourceType();
}
}

View File

@@ -0,0 +1,385 @@
/**
* Provides the module `Ssa` for working with static single assignment (SSA) form.
*/
/**
* Provides classes for working with static single assignment (SSA) form.
*/
module Ssa {
private import codeql.Locations
private import codeql.ruby.CFG
private import codeql.ruby.ast.Variable
private import internal.SsaImplCommon as SsaImplCommon
private import internal.SsaImpl as SsaImpl
private import CfgNodes::ExprNodes
/** A static single assignment (SSA) definition. */
class Definition extends SsaImplCommon::Definition {
/**
* Gets the control flow node of this SSA definition, if any. Phi nodes are
* examples of SSA definitions without a control flow node, as they are
* modelled at index `-1` in the relevant basic block.
*/
final CfgNode getControlFlowNode() {
exists(BasicBlock bb, int i | this.definesAt(_, bb, i) | result = bb.getNode(i))
}
/**
* Gets a control-flow node that reads the value of this SSA definition.
*
* Example:
*
* ```rb
* def m b # defines b_0
* i = 0 # defines i_0
* puts i # reads i_0
* puts i + 1 # reads i_0
* if b # reads b_0
* i = 1 # defines i_1
* puts i # reads i_1
* puts i + 1 # reads i_1
* else
* i = 2 # defines i_2
* puts i # reads i_2
* puts i + 1 # reads i_2
* end
* # defines i_3 = phi(i_1, i_2)
* puts i # reads i3
* end
* ```
*/
final VariableReadAccessCfgNode getARead() { result = SsaImpl::getARead(this) }
/**
* Gets a first control-flow node that reads the value of this SSA definition.
* That is, a read that can be reached from this definition without passing
* through other reads.
*
* Example:
*
* ```rb
* def m b # defines b_0
* i = 0 # defines i_0
* puts i # first read of i_0
* puts i + 1
* if b # first read of b_0
* i = 1 # defines i_1
* puts i # first read of i_1
* puts i + 1
* else
* i = 2 # defines i_2
* puts i # first read of i_2
* puts i + 1
* end
* # defines i_3 = phi(i_1, i_2)
* puts i # first read of i3
* end
* ```
*/
final VariableReadAccessCfgNode getAFirstRead() { SsaImpl::firstRead(this, result) }
/**
* Gets a last control-flow node that reads the value of this SSA definition.
* That is, a read that can reach the end of the enclosing CFG scope, or another
* SSA definition for the source variable, without passing through any other read.
*
* Example:
*
* ```rb
* def m b # defines b_0
* i = 0 # defines i_0
* puts i
* puts i + 1 # last read of i_0
* if b # last read of b_0
* i = 1 # defines i_1
* puts i
* puts i + 1 # last read of i_1
* else
* i = 2 # defines i_2
* puts i
* puts i + 1 # last read of i_2
* end
* # defines i_3 = phi(i_1, i_2)
* puts i # last read of i3
* end
* ```
*/
final VariableReadAccessCfgNode getALastRead() { SsaImpl::lastRead(this, result) }
/**
* Holds if `read1` and `read2` are adjacent reads of this SSA definition.
* That is, `read2` can be reached from `read1` without passing through
* another read.
*
* Example:
*
* ```rb
* def m b
* i = 0 # defines i_0
* puts i # reads i_0 (read1)
* puts i + 1 # reads i_0 (read2)
* if b
* i = 1 # defines i_1
* puts i # reads i_1 (read1)
* puts i + 1 # reads i_1 (read2)
* else
* i = 2 # defines i_2
* puts i # reads i_2 (read1)
* puts i + 1 # reads i_2 (read2)
* end
* puts i
* end
* ```
*/
final predicate hasAdjacentReads(
VariableReadAccessCfgNode read1, VariableReadAccessCfgNode read2
) {
SsaImpl::adjacentReadPair(this, read1, read2)
}
/**
* Gets an SSA definition whose value can flow to this one in one step. This
* includes inputs to phi nodes and the prior definitions of uncertain writes.
*/
private Definition getAPhiInputOrPriorDefinition() {
result = this.(PhiNode).getAnInput() or
result = this.(CapturedCallDefinition).getPriorDefinition()
}
/**
* Gets a definition that ultimately defines this SSA definition and is
* not itself a phi node.
*
* Example:
*
* ```rb
* def m b
* i = 0 # defines i_0
* puts i
* puts i + 1
* if b
* i = 1 # defines i_1
* puts i
* puts i + 1
* else
* i = 2 # defines i_2
* puts i
* puts i + 1
* end
* # defines i_3 = phi(i_1, i_2); ultimate definitions are i_1 and i_2
* puts i
* end
* ```
*/
final Definition getAnUltimateDefinition() {
result = this.getAPhiInputOrPriorDefinition*() and
not result instanceof PhiNode
}
override string toString() { result = this.getControlFlowNode().toString() }
/** Gets the location of this SSA definition. */
Location getLocation() { result = this.getControlFlowNode().getLocation() }
}
/**
* An SSA definition that corresponds to a write. For example `x = 10` in
*
* ```rb
* x = 10
* puts x
* ```
*/
class WriteDefinition extends Definition, SsaImplCommon::WriteDefinition {
private VariableWriteAccess write;
WriteDefinition() {
exists(BasicBlock bb, int i, Variable v |
this.definesAt(v, bb, i) and
SsaImpl::variableWriteActual(bb, i, v, write)
)
}
/** Gets the underlying write access. */
final VariableWriteAccess getWriteAccess() { result = write }
/**
* Holds if this SSA definition represents a direct assignment of `value`
* to the underlying variable.
*/
predicate assigns(CfgNodes::ExprCfgNode value) {
exists(CfgNodes::ExprNodes::AssignExprCfgNode a, BasicBlock bb, int i |
this.definesAt(_, bb, i) and
a = bb.getNode(i) and
value = a.getRhs()
)
}
final override string toString() { result = Definition.super.toString() }
final override Location getLocation() { result = this.getControlFlowNode().getLocation() }
}
/**
* An SSA definition inserted at the beginning of a scope to represent an
* uninitialized local variable. For example, in
*
* ```rb
* def m
* x = 10 if b
* puts x
* end
* ```
*
* since the assignment to `x` is conditional, an unitialized definition for
* `x` is inserted at the start of `m`.
*/
class UninitializedDefinition extends Definition, SsaImplCommon::WriteDefinition {
UninitializedDefinition() {
exists(BasicBlock bb, int i, Variable v |
this.definesAt(v, bb, i) and
SsaImpl::uninitializedWrite(bb, i, v)
)
}
final override string toString() { result = "<uninitialized>" }
final override Location getLocation() { result = this.getBasicBlock().getLocation() }
}
/**
* An SSA definition inserted at the beginning of a scope to represent a
* captured local variable. For example, in
*
* ```rb
* def m x
* y = 0
* x.times do |x|
* y += x
* end
* return y
* end
* ```
*
* an entry definition for `y` is inserted at the start of the `do` block.
*/
class CapturedEntryDefinition extends Definition, SsaImplCommon::WriteDefinition {
CapturedEntryDefinition() {
exists(BasicBlock bb, int i, Variable v |
this.definesAt(v, bb, i) and
SsaImpl::capturedEntryWrite(bb, i, v)
)
}
final override string toString() { result = "<captured>" }
override Location getLocation() { result = this.getBasicBlock().getLocation() }
}
/**
* An SSA definition inserted at a call that may update the value of a captured
* variable. For example, in
*
* ```rb
* def m x
* y = 0
* x.times do |x|
* y += x
* end
* return y
* end
* ```
*
* a definition for `y` is inserted at the call to `times`.
*/
class CapturedCallDefinition extends Definition, SsaImplCommon::UncertainWriteDefinition {
CapturedCallDefinition() {
exists(Variable v, BasicBlock bb, int i |
this.definesAt(v, bb, i) and
SsaImpl::capturedCallWrite(bb, i, v)
)
}
/**
* Gets the immediately preceding definition. Since this update is uncertain,
* the value from the preceding definition might still be valid.
*/
final Definition getPriorDefinition() { result = SsaImpl::uncertainWriteDefinitionInput(this) }
override string toString() { result = this.getControlFlowNode().toString() }
}
/**
* A phi node. For example, in
*
* ```rb
* if b
* x = 0
* else
* x = 1
* end
* puts x
* ```
*
* a phi node for `x` is inserted just before the call `puts x`.
*/
class PhiNode extends Definition, SsaImplCommon::PhiNode {
/**
* Gets an input of this phi node.
*
* Example:
*
* ```rb
* def m b
* i = 0 # defines i_0
* puts i
* puts i + 1
* if b
* i = 1 # defines i_1
* puts i
* puts i + 1
* else
* i = 2 # defines i_2
* puts i
* puts i + 1
* end
* # defines i_3 = phi(i_1, i_2); inputs are i_1 and i_2
* puts i
* end
* ```
*/
final Definition getAnInput() { this.hasInputFromBlock(result, _) }
/** Holds if `inp` is an input to this phi node along the edge originating in `bb`. */
predicate hasInputFromBlock(Definition inp, BasicBlock bb) {
inp = SsaImpl::phiHasInputFromBlock(this, bb)
}
private string getSplitString() {
result = this.getBasicBlock().getFirstNode().(CfgNodes::AstCfgNode).getSplitsString()
}
override string toString() {
exists(string prefix |
prefix = "[" + this.getSplitString() + "] "
or
not exists(this.getSplitString()) and
prefix = ""
|
result = prefix + "phi"
)
}
/*
* The location of a phi node is the same as the location of the first node
* in the basic block in which it is defined.
*
* Strictly speaking, the node is *before* the first node, but such a location
* does not exist in the source program.
*/
final override Location getLocation() {
result = this.getBasicBlock().getFirstNode().getLocation()
}
}
}

View File

@@ -0,0 +1,459 @@
private import ruby
private import codeql.ruby.CFG
private import DataFlowPrivate
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.ast.internal.Module
private import FlowSummaryImpl as FlowSummaryImpl
private import codeql.ruby.dataflow.FlowSummary
newtype TReturnKind =
TNormalReturnKind() or
TBreakReturnKind()
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
/**
* A return kind. A return kind describes how a value can be returned
* from a callable.
*/
abstract class ReturnKind extends TReturnKind {
/** Gets a textual representation of this position. */
abstract string toString();
}
/**
* A value returned from a callable using a `return` statement or an expression
* body, that is, a "normal" return.
*/
class NormalReturnKind extends ReturnKind, TNormalReturnKind {
override string toString() { result = "return" }
}
/**
* A value returned from a callable using a `break` statement.
*/
class BreakReturnKind extends ReturnKind, TBreakReturnKind {
override string toString() { result = "break" }
}
/** A callable defined in library code, identified by a unique string. */
abstract class LibraryCallable extends string {
bindingset[this]
LibraryCallable() { any() }
/** Gets a call to this library callable. */
abstract Call getACall();
}
/**
* A callable. This includes callables from source code, as well as callables
* defined in library code.
*/
class DataFlowCallable extends TDataFlowCallable {
/** Gets the underlying source code callable, if any. */
Callable asCallable() { this = TCfgScope(result) }
/** Gets the underlying library callable, if any. */
LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
/** Gets a textual representation of this callable. */
string toString() { result = [this.asCallable().toString(), this.asLibraryCallable()] }
/** Gets the location of this callable. */
Location getLocation() { result = this.asCallable().getLocation() }
}
/**
* A call. This includes calls from source code, as well as call(back)s
* inside library callables with a flow summary.
*/
class DataFlowCall extends TDataFlowCall {
/** Gets the enclosing callable. */
DataFlowCallable getEnclosingCallable() { none() }
/** Gets the underlying source code call, if any. */
CfgNodes::ExprNodes::CallCfgNode asCall() { none() }
/** Gets a textual representation of this call. */
string toString() { none() }
/** Gets the location of this call. */
Location getLocation() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/**
* A synthesized call inside a callable with a flow summary.
*
* For example, in
* ```rb
* ints.each do |i|
* puts i
* end
* ```
*
* there is a call to the block argument inside `each`.
*/
class SummaryCall extends DataFlowCall, TSummaryCall {
private FlowSummaryImpl::Public::SummarizedCallable c;
private DataFlow::Node receiver;
SummaryCall() { this = TSummaryCall(c, receiver) }
/** Gets the data flow node that this call targets. */
DataFlow::Node getReceiver() { result = receiver }
override DataFlowCallable getEnclosingCallable() { result = c }
override string toString() { result = "[summary] call to " + receiver + " in " + c }
override Location getLocation() { result = c.getLocation() }
}
private class NormalCall extends DataFlowCall, TNormalCall {
private CfgNodes::ExprNodes::CallCfgNode c;
NormalCall() { this = TNormalCall(c) }
override CfgNodes::ExprNodes::CallCfgNode asCall() { result = c }
override DataFlowCallable getEnclosingCallable() { result = TCfgScope(c.getScope()) }
override string toString() { result = c.toString() }
override Location getLocation() { result = c.getLocation() }
}
pragma[nomagic]
private predicate methodCall(
CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode, string method
) {
exists(DataFlow::Node nodeTo |
method = call.getExpr().(MethodCall).getMethodName() and
nodeTo.asExpr() = call.getReceiver() and
sourceNode.flowsTo(nodeTo)
)
}
private Block yieldCall(CfgNodes::ExprNodes::CallCfgNode call) {
call.getExpr() instanceof YieldCall and
exists(BlockParameterNode node |
node = trackBlock(result) and
node.getMethod() = call.getExpr().getEnclosingMethod()
)
}
pragma[nomagic]
private predicate superCall(CfgNodes::ExprNodes::CallCfgNode call, Module superClass, string method) {
call.getExpr() instanceof SuperCall and
exists(Module tp |
tp = call.getExpr().getEnclosingModule().getModule() and
superClass = tp.getSuperClass() and
method = call.getExpr().getEnclosingMethod().getName()
)
}
pragma[nomagic]
private predicate instanceMethodCall(CfgNodes::ExprNodes::CallCfgNode call, Module tp, string method) {
exists(DataFlow::LocalSourceNode sourceNode |
methodCall(call, sourceNode, method) and
sourceNode = trackInstance(tp)
)
}
cached
private module Cached {
cached
newtype TDataFlowCallable =
TCfgScope(CfgScope scope) or
TLibraryCallable(LibraryCallable callable)
cached
newtype TDataFlowCall =
TNormalCall(CfgNodes::ExprNodes::CallCfgNode c) or
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, DataFlow::Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
cached
CfgScope getTarget(CfgNodes::ExprNodes::CallCfgNode call) {
// Temporarily disable operation resolution (due to bad performance)
not call.getExpr() instanceof Operation and
(
exists(string method |
exists(Module tp |
instanceMethodCall(call, tp, method) and
result = lookupMethod(tp, method) and
if result.(Method).isPrivate()
then
exists(Self self |
self = call.getReceiver().getExpr() and
pragma[only_bind_out](self.getEnclosingModule().getModule().getSuperClass*()) =
pragma[only_bind_out](result.getEnclosingModule().getModule())
) and
// For now, we restrict the scope of top-level declarations to their file.
// This may remove some plausible targets, but also removes a lot of
// implausible targets
if result.getEnclosingModule() instanceof Toplevel
then result.getFile() = call.getFile()
else any()
else any()
)
or
exists(DataFlow::LocalSourceNode sourceNode |
methodCall(call, sourceNode, method) and
sourceNode = trackSingletonMethod(result, method)
)
)
or
exists(Module superClass, string method |
superCall(call, superClass, method) and
result = lookupMethod(superClass, method)
)
or
result = yieldCall(call)
)
}
}
import Cached
private DataFlow::LocalSourceNode trackInstance(Module tp, TypeTracker t) {
t.start() and
(
result.asExpr().getExpr() instanceof NilLiteral and tp = TResolved("NilClass")
or
result.asExpr().getExpr().(BooleanLiteral).isFalse() and tp = TResolved("FalseClass")
or
result.asExpr().getExpr().(BooleanLiteral).isTrue() and tp = TResolved("TrueClass")
or
result.asExpr().getExpr() instanceof IntegerLiteral and tp = TResolved("Integer")
or
result.asExpr().getExpr() instanceof FloatLiteral and tp = TResolved("Float")
or
result.asExpr().getExpr() instanceof RationalLiteral and tp = TResolved("Rational")
or
result.asExpr().getExpr() instanceof ComplexLiteral and tp = TResolved("Complex")
or
result.asExpr().getExpr() instanceof StringlikeLiteral and tp = TResolved("String")
or
exists(ConstantReadAccess array, MethodCall mc |
result.asExpr().getExpr() = mc and
mc.getMethodName() = "[]" and
mc.getReceiver() = array and
array.getName() = "Array" and
array.hasGlobalScope() and
tp = TResolved("Array")
)
or
result.asExpr().getExpr() instanceof HashLiteral and tp = TResolved("Hash")
or
result.asExpr().getExpr() instanceof MethodBase and tp = TResolved("Symbol")
or
result.asParameter() instanceof BlockParameter and tp = TResolved("Proc")
or
result.asExpr().getExpr() instanceof Lambda and tp = TResolved("Proc")
or
exists(CfgNodes::ExprNodes::CallCfgNode call, DataFlow::Node nodeTo |
call.getExpr().(MethodCall).getMethodName() = "new" and
nodeTo.asExpr() = call.getReceiver() and
trackModule(tp).flowsTo(nodeTo) and
result.asExpr() = call
)
or
// `self` in method
exists(Self self, Method enclosing |
self = result.asExpr().getExpr() and
enclosing = self.getEnclosingMethod() and
tp = enclosing.getEnclosingModule().getModule() and
not self.getEnclosingModule().getEnclosingMethod() = enclosing
)
or
// `self` in singleton method
exists(Self self, MethodBase enclosing |
self = result.asExpr().getExpr() and
flowsToSingletonMethodObject(trackInstance(tp), enclosing) and
enclosing = self.getEnclosingMethod() and
not self.getEnclosingModule().getEnclosingMethod() = enclosing
)
or
// `self` in top-level
exists(Self self, Toplevel enclosing |
self = result.asExpr().getExpr() and
enclosing = self.getEnclosingModule() and
tp = TResolved("Object") and
not self.getEnclosingMethod().getEnclosingModule() = enclosing
)
or
// a module or class
exists(Module m |
result = trackModule(m) and
if m.isClass() then tp = TResolved("Class") else tp = TResolved("Module")
)
)
or
exists(TypeTracker t2, StepSummary summary |
result = trackInstanceRec(tp, t2, summary) and t = t2.append(summary)
)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackInstanceRec(Module tp, TypeTracker t, StepSummary summary) {
StepSummary::step(trackInstance(tp, t), result, summary)
}
private DataFlow::LocalSourceNode trackInstance(Module tp) {
result = trackInstance(tp, TypeTracker::end())
}
private DataFlow::LocalSourceNode trackBlock(Block block, TypeTracker t) {
t.start() and result.asExpr().getExpr() = block
or
exists(TypeTracker t2, StepSummary summary |
result = trackBlockRec(block, t2, summary) and t = t2.append(summary)
)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackBlockRec(Block block, TypeTracker t, StepSummary summary) {
StepSummary::step(trackBlock(block, t), result, summary)
}
private DataFlow::LocalSourceNode trackBlock(Block block) {
result = trackBlock(block, TypeTracker::end())
}
private predicate singletonMethod(MethodBase method, Expr object) {
object = method.(SingletonMethod).getObject()
or
exists(SingletonClass cls |
object = cls.getValue() and method instanceof Method and method = cls.getAMethod()
)
}
pragma[nomagic]
private predicate flowsToSingletonMethodObject(DataFlow::LocalSourceNode nodeFrom, MethodBase method) {
exists(DataFlow::LocalSourceNode nodeTo |
nodeFrom.flowsTo(nodeTo) and
singletonMethod(method, nodeTo.asExpr().getExpr())
)
}
pragma[nomagic]
private predicate moduleFlowsToSingletonMethodObject(Module m, MethodBase method) {
flowsToSingletonMethodObject(trackModule(m), method)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackSingletonMethod0(MethodBase method, TypeTracker t) {
t.start() and
(
flowsToSingletonMethodObject(result, method)
or
exists(Module m | result = trackModule(m) and moduleFlowsToSingletonMethodObject(m, method))
)
or
exists(TypeTracker t2, StepSummary summary |
result = trackSingletonMethod0Rec(method, t2, summary) and t = t2.append(summary)
)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackSingletonMethod0Rec(
MethodBase method, TypeTracker t, StepSummary summary
) {
StepSummary::step(trackSingletonMethod0(method, t), result, summary)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackSingletonMethod(MethodBase m, string name) {
result = trackSingletonMethod0(m, TypeTracker::end()) and
name = m.getName()
}
private DataFlow::Node selfInModule(Module tp) {
exists(Self self, ModuleBase enclosing |
self = result.asExpr().getExpr() and
enclosing = self.getEnclosingModule() and
tp = enclosing.getModule() and
not self.getEnclosingMethod().getEnclosingModule() = enclosing
)
}
private DataFlow::LocalSourceNode trackModule(Module tp, TypeTracker t) {
t.start() and
(
// ConstantReadAccess to Module
resolveScopeExpr(result.asExpr().getExpr()) = tp
or
// `self` reference to Module
result = selfInModule(tp)
)
or
exists(TypeTracker t2, StepSummary summary |
result = trackModuleRec(tp, t2, summary) and t = t2.append(summary)
)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackModuleRec(Module tp, TypeTracker t, StepSummary summary) {
StepSummary::step(trackModule(tp, t), result, summary)
}
private DataFlow::LocalSourceNode trackModule(Module tp) {
result = trackModule(tp, TypeTracker::end())
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) {
result = TCfgScope(getTarget(call.asCall())) and
not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
or
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.asCall().getExpr() = callable.getACall()
)
}
/**
* Holds if the set of viable implementations that can be called by `call`
* might be improved by knowing the call context. This is the case if the
* qualifier accesses a parameter of the enclosing callable `c` (including
* the implicit `self` parameter).
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
/**
* Gets a viable dispatch target of `call` in the context `ctx`. This is
* restricted to those `call`s for which a context might make a difference.
*/
DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }
/**
* Holds if `e` is an `ExprNode` that may be returned by a call to `c`.
*/
predicate exprNodeReturnedFrom(DataFlow::ExprNode e, Callable c) {
exists(ReturningNode r |
r.getEnclosingCallable().asCallable() = c and
(
r.(ExplicitReturnNode).getReturningNode().getReturnedValueNode() = e.asExpr() or
r.(ExprReturnNode) = e
)
)
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,181 @@
/**
* Provides consistency queries for checking invariants in the language-specific
* data-flow classes and predicates.
*/
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public
module Consistency {
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
this instanceof ParameterNode or
this instanceof ReturnNode or
this = getAnOutNode(_, _) or
simpleLocalFlowStep(this, _) or
simpleLocalFlowStep(_, this) or
jumpStep(this, _) or
jumpStep(_, this) or
storeStep(this, _, _) or
storeStep(_, _, this) or
readStep(this, _, _) or
readStep(_, _, this) or
defaultAdditionalTaintStep(this, _) or
defaultAdditionalTaintStep(_, this)
}
}
query predicate uniqueEnclosingCallable(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(n.getEnclosingCallable()) and
c != 1 and
msg = "Node should have one enclosing callable but has " + c + "."
)
}
query predicate uniqueType(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(getNodeType(n)) and
c != 1 and
msg = "Node should have one type but has " + c + "."
)
}
query predicate uniqueNodeLocation(Node n, string msg) {
exists(int c |
c =
count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
) and
c != 1 and
msg = "Node should have one location but has " + c + "."
)
}
query predicate missingLocation(string msg) {
exists(int c |
c =
strictcount(Node n |
not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
) and
msg = "Nodes without location: " + c
)
}
query predicate uniqueNodeToString(Node n, string msg) {
exists(int c |
c = count(n.toString()) and
c != 1 and
msg = "Node should have one toString but has " + c + "."
)
}
query predicate missingToString(string msg) {
exists(int c |
c = strictcount(Node n | not exists(n.toString())) and
msg = "Nodes without toString: " + c
)
}
query predicate parameterCallable(ParameterNode p, string msg) {
exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
msg = "Callable mismatch for parameter."
}
query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
simpleLocalFlowStep(n1, n2) and
n1.getEnclosingCallable() != n2.getEnclosingCallable() and
msg = "Local flow step does not preserve enclosing callable."
}
private DataFlowType typeRepr() { result = getNodeType(_) }
query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
t = typeRepr() and
not compatibleTypes(t, t) and
msg = "Type compatibility predicate is not reflexive."
}
query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
isUnreachableInCall(n, call) and
exists(DataFlowCallable c |
c = n.getEnclosingCallable() and
not viableCallable(call) = c
) and
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
}
query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
(
n = getAnOutNode(call, _) and
msg = "OutNode and call does not share enclosing callable."
or
n.(ArgumentNode).argumentOf(call, _) and
msg = "ArgumentNode and call does not share enclosing callable."
) and
n.getEnclosingCallable() != call.getEnclosingCallable()
}
// This predicate helps the compiler forget that in some languages
// it is impossible for a result of `getPreUpdateNode` to be an
// instance of `PostUpdateNode`.
private Node getPre(PostUpdateNode n) {
result = n.getPreUpdateNode()
or
none()
}
query predicate postIsNotPre(PostUpdateNode n, string msg) {
getPre(n) = n and
msg = "PostUpdateNode should not equal its pre-update node."
}
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
exists(int c |
c = count(n.getPreUpdateNode()) and
c != 1 and
msg = "PostUpdateNode should have one pre-update node but has " + c + "."
)
}
query predicate uniquePostUpdate(Node n, string msg) {
1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
msg = "Node has multiple PostUpdateNodes."
}
query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
msg = "PostUpdateNode does not share callable with its pre-update node."
}
private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
query predicate reverseRead(Node n, string msg) {
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
msg = "Origin of readStep is missing a PostUpdateNode."
}
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
// This predicate helps the compiler forget that in some languages
// it is impossible for a `PostUpdateNode` to be the target of
// `simpleLocalFlowStep`.
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
query predicate postWithInFlow(Node n, string msg) {
isPostUpdateNode(n) and
simpleLocalFlowStep(_, n) and
msg = "PostUpdateNode should not be the target of local flow."
}
}

View File

@@ -0,0 +1,11 @@
/**
* Provides Ruby-specific definitions for use in the data flow library.
*/
module Private {
import DataFlowPrivate
import DataFlowDispatch
}
module Public {
import DataFlowPublic
}

View File

@@ -0,0 +1,799 @@
private import ruby
private import codeql.ruby.CFG
private import codeql.ruby.dataflow.SSA
private import DataFlowPublic
private import DataFlowDispatch
private import SsaImpl as SsaImpl
private import FlowSummaryImpl as FlowSummaryImpl
abstract class NodeImpl extends Node {
/** Do not call: use `getEnclosingCallable()` instead. */
abstract CfgScope getCfgScope();
/** Do not call: use `getLocation()` instead. */
abstract Location getLocationImpl();
/** Do not call: use `toString()` instead. */
abstract string toStringImpl();
}
private class ExprNodeImpl extends ExprNode, NodeImpl {
override CfgScope getCfgScope() { result = this.getExprNode().getExpr().getCfgScope() }
override Location getLocationImpl() { result = this.getExprNode().getLocation() }
override string toStringImpl() { result = this.getExprNode().toString() }
}
/** Provides predicates related to local data flow. */
module LocalFlow {
private import codeql.ruby.dataflow.internal.SsaImpl
/**
* Holds if `nodeFrom` is a last node referencing SSA definition `def`, which
* can reach `next`.
*/
private predicate localFlowSsaInput(Node nodeFrom, Ssa::Definition def, Ssa::Definition next) {
exists(BasicBlock bb, int i | lastRefBeforeRedef(def, bb, i, next) |
def = nodeFrom.(SsaDefinitionNode).getDefinition() and
def.definesAt(_, bb, i)
or
exists(CfgNodes::ExprCfgNode e |
e = nodeFrom.asExpr() and
e = bb.getNode(i) and
e.getExpr() instanceof VariableReadAccess
)
)
}
/** Gets the SSA definition node corresponding to parameter `p`. */
SsaDefinitionNode getParameterDefNode(NamedParameter p) {
exists(BasicBlock bb, int i |
bb.getNode(i).getNode() = p.getDefiningAccess() and
result.getDefinition().definesAt(_, bb, i)
)
}
/**
* Holds if there is a local flow step from `nodeFrom` to `nodeTo` involving
* SSA definition `def`.
*/
predicate localSsaFlowStep(Ssa::Definition def, Node nodeFrom, Node nodeTo) {
// Flow from assignment into SSA definition
def.(Ssa::WriteDefinition).assigns(nodeFrom.asExpr()) and
nodeTo.(SsaDefinitionNode).getDefinition() = def
or
// Flow from SSA definition to first read
def = nodeFrom.(SsaDefinitionNode).getDefinition() and
nodeTo.asExpr() = def.getAFirstRead()
or
// Flow from read to next read
exists(
CfgNodes::ExprNodes::VariableReadAccessCfgNode read1,
CfgNodes::ExprNodes::VariableReadAccessCfgNode read2
|
def.hasAdjacentReads(read1, read2) and
nodeTo.asExpr() = read2
|
nodeFrom.asExpr() = read1
or
read1 = nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr()
)
or
// Flow into phi node
exists(Ssa::PhiNode phi |
localFlowSsaInput(nodeFrom, def, phi) and
phi = nodeTo.(SsaDefinitionNode).getDefinition() and
def = phi.getAnInput()
)
// TODO
// or
// // Flow into uncertain SSA definition
// exists(LocalFlow::UncertainExplicitSsaDefinition uncertain |
// localFlowSsaInput(nodeFrom, def, uncertain) and
// uncertain = nodeTo.(SsaDefinitionNode).getDefinition() and
// def = uncertain.getPriorDefinition()
// )
}
}
/** An argument of a call (including qualifier arguments). */
private class Argument extends Expr {
private Call call;
private int arg;
Argument() { this = call.getArgument(arg) }
/** Holds if this expression is the `i`th argument of `c`. */
predicate isArgumentOf(Expr c, int i) { c = call and i = arg }
}
/** A collection of cached types and predicates to be evaluated in the same stage. */
cached
private module Cached {
cached
newtype TNode =
TExprNode(CfgNodes::ExprCfgNode n) or
TReturningNode(CfgNodes::ReturningCfgNode n) or
TSynthReturnNode(CfgScope scope, ReturnKind kind) {
exists(ReturningNode ret |
ret.(NodeImpl).getCfgScope() = scope and
ret.getKind() = kind
)
} or
TSsaDefinitionNode(Ssa::Definition def) or
TNormalParameterNode(Parameter p) { not p instanceof BlockParameter } or
TSelfParameterNode(MethodBase m) or
TBlockParameterNode(MethodBase m) or
TExprPostUpdateNode(CfgNodes::ExprCfgNode n) {
exists(AstNode node | node = n.getNode() |
node instanceof Argument and
not node instanceof BlockArgument
or
n = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver()
)
} or
TSummaryNode(
FlowSummaryImpl::Public::SummarizedCallable c,
FlowSummaryImpl::Private::SummaryNodeState state
) {
FlowSummaryImpl::Private::summaryNodeRange(c, state)
} or
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, int i) {
FlowSummaryImpl::Private::summaryParameterNodeRange(c, i)
}
class TParameterNode =
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode;
private predicate defaultValueFlow(NamedParameter p, ExprNode e) {
p.(OptionalParameter).getDefaultValue() = e.getExprNode().getExpr()
or
p.(KeywordParameter).getDefaultValue() = e.getExprNode().getExpr()
}
private predicate localFlowStepCommon(Node nodeFrom, Node nodeTo) {
LocalFlow::localSsaFlowStep(_, nodeFrom, nodeTo)
or
nodeFrom.(SelfParameterNode).getMethod() = nodeTo.asExpr().getExpr().getEnclosingCallable() and
nodeTo.asExpr().getExpr() instanceof Self
or
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs()
or
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::BlockArgumentCfgNode).getValue()
or
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::StmtSequenceCfgNode).getLastStmt()
or
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ConditionalExprCfgNode).getBranch(_)
or
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::CaseExprCfgNode).getBranch(_)
or
exists(CfgNodes::ExprCfgNode exprTo, ReturningStatementNode n |
nodeFrom = n and
exprTo = nodeTo.asExpr() and
n.getReturningNode().getNode() instanceof BreakStmt and
exprTo.getNode() instanceof Loop and
nodeTo.asExpr().getAPredecessor(any(SuccessorTypes::BreakSuccessor s)) = n.getReturningNode()
)
or
nodeFrom.asExpr() = nodeTo.(ReturningStatementNode).getReturningNode().getReturnedValueNode()
or
nodeTo.asExpr() =
any(CfgNodes::ExprNodes::ForExprCfgNode for |
exists(SuccessorType s |
not s instanceof SuccessorTypes::BreakSuccessor and
exists(for.getAPredecessor(s))
) and
nodeFrom.asExpr() = for.getValue()
)
}
/**
* This is the local flow predicate that is used as a building block in global
* data flow.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
localFlowStepCommon(nodeFrom, nodeTo)
or
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
or
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
or
nodeTo.(SynthReturnNode).getAnInput() = nodeFrom
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true)
}
/** This is the local flow predicate that is exposed. */
cached
predicate localFlowStepImpl(Node nodeFrom, Node nodeTo) {
localFlowStepCommon(nodeFrom, nodeTo)
or
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
or
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
}
/** This is the local flow predicate that is used in type tracking. */
cached
predicate localFlowStepTypeTracker(Node nodeFrom, Node nodeTo) {
localFlowStepCommon(nodeFrom, nodeTo)
or
exists(NamedParameter p |
defaultValueFlow(p, nodeFrom) and
nodeTo = LocalFlow::getParameterDefNode(p)
)
}
cached
predicate isLocalSourceNode(Node n) {
n instanceof ParameterNode
or
// This case should not be needed once we have proper use-use flow
// for `self`. At that point, the `self`s returned by `trackInstance`
// in `DataFlowDispatch.qll` should refer to the post-update node,
// and we can remove this case.
n instanceof SelfArgumentNode
or
not localFlowStepTypeTracker+(any(Node e |
e instanceof ExprNode
or
e instanceof ParameterNode
), n)
}
cached
newtype TContent = TTodoContent() // stub
}
import Cached
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) {
exists(Ssa::Definition def | def = n.(SsaDefinitionNode).getDefinition() |
def instanceof Ssa::PhiNode
)
or
n instanceof SummaryNode
or
n instanceof SummaryParameterNode
or
n instanceof SynthReturnNode
}
/** An SSA definition, viewed as a node in a data flow graph. */
class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode {
Ssa::Definition def;
SsaDefinitionNode() { this = TSsaDefinitionNode(def) }
/** Gets the underlying SSA definition. */
Ssa::Definition getDefinition() { result = def }
override CfgScope getCfgScope() { result = def.getBasicBlock().getScope() }
override Location getLocationImpl() { result = def.getLocation() }
override string toStringImpl() { result = def.toString() }
}
/**
* A value returning statement, viewed as a node in a data flow graph.
*
* Note that because of control-flow splitting, one `ReturningStmt` may correspond
* to multiple `ReturningStatementNode`s, just like it may correspond to multiple
* `ControlFlow::Node`s.
*/
class ReturningStatementNode extends NodeImpl, TReturningNode {
CfgNodes::ReturningCfgNode n;
ReturningStatementNode() { this = TReturningNode(n) }
/** Gets the expression corresponding to this node. */
CfgNodes::ReturningCfgNode getReturningNode() { result = n }
override CfgScope getCfgScope() { result = n.getScope() }
override Location getLocationImpl() { result = n.getLocation() }
override string toStringImpl() { result = n.toString() }
}
private module ParameterNodes {
abstract class ParameterNodeImpl extends ParameterNode, NodeImpl {
abstract predicate isSourceParameterOf(Callable c, int i);
override predicate isParameterOf(DataFlowCallable c, int i) {
this.isSourceParameterOf(c.asCallable(), i)
}
}
/**
* The value of a normal parameter at function entry, viewed as a node in a data
* flow graph.
*/
class NormalParameterNode extends ParameterNodeImpl, TNormalParameterNode {
private Parameter parameter;
NormalParameterNode() { this = TNormalParameterNode(parameter) }
override Parameter getParameter() { result = parameter }
override predicate isSourceParameterOf(Callable c, int i) { c.getParameter(i) = parameter }
override CfgScope getCfgScope() { result = parameter.getCallable() }
override Location getLocationImpl() { result = parameter.getLocation() }
override string toStringImpl() { result = parameter.toString() }
}
/**
* The value of the `self` parameter at function entry, viewed as a node in a data
* flow graph.
*/
class SelfParameterNode extends ParameterNodeImpl, TSelfParameterNode {
private MethodBase method;
SelfParameterNode() { this = TSelfParameterNode(method) }
final MethodBase getMethod() { result = method }
override predicate isSourceParameterOf(Callable c, int i) { method = c and i = -1 }
override CfgScope getCfgScope() { result = method }
override Location getLocationImpl() { result = method.getLocation() }
override string toStringImpl() { result = "self in " + method.toString() }
}
/**
* The value of a block parameter at function entry, viewed as a node in a data
* flow graph.
*/
class BlockParameterNode extends ParameterNodeImpl, TBlockParameterNode {
private MethodBase method;
BlockParameterNode() { this = TBlockParameterNode(method) }
final MethodBase getMethod() { result = method }
override Parameter getParameter() {
result = method.getAParameter() and result instanceof BlockParameter
}
override predicate isSourceParameterOf(Callable c, int i) { c = method and i = -2 }
override CfgScope getCfgScope() { result = method }
override Location getLocationImpl() {
result = getParameter().getLocation()
or
not exists(getParameter()) and result = method.getLocation()
}
override string toStringImpl() {
result = getParameter().toString()
or
not exists(getParameter()) and result = "&block"
}
}
/** A parameter for a library callable with a flow summary. */
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
private FlowSummaryImpl::Public::SummarizedCallable sc;
private int pos;
SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
override predicate isSourceParameterOf(Callable c, int i) { none() }
override predicate isParameterOf(DataFlowCallable c, int i) { sc = c and i = pos }
override CfgScope getCfgScope() { none() }
override DataFlowCallable getEnclosingCallable() { result = sc }
override Location getLocationImpl() { none() }
override string toStringImpl() { result = "parameter " + pos + " of " + sc }
}
}
import ParameterNodes
/** A data-flow node used to model flow summaries. */
private class SummaryNode extends NodeImpl, TSummaryNode {
private FlowSummaryImpl::Public::SummarizedCallable c;
private FlowSummaryImpl::Private::SummaryNodeState state;
SummaryNode() { this = TSummaryNode(c, state) }
override CfgScope getCfgScope() { none() }
override DataFlowCallable getEnclosingCallable() { result = c }
override Location getLocationImpl() { none() }
override string toStringImpl() { result = "[summary] " + state + " in " + c }
}
/** A data-flow node that represents a call argument. */
abstract class ArgumentNode extends Node {
/** Holds if this argument occurs at the given position in the given call. */
predicate argumentOf(DataFlowCall call, int pos) { this.sourceArgumentOf(call.asCall(), pos) }
abstract predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos);
/** Gets the call in which this node is an argument. */
final DataFlowCall getCall() { this.argumentOf(result, _) }
}
private module ArgumentNodes {
/** A data-flow node that represents an explicit call argument. */
class ExplicitArgumentNode extends ArgumentNode {
ExplicitArgumentNode() {
this.asExpr().getExpr() instanceof Argument and
not this.asExpr().getExpr() instanceof BlockArgument
}
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
this.asExpr() = call.getArgument(pos)
}
}
/** A data-flow node that represents the `self` argument of a call. */
class SelfArgumentNode extends ArgumentNode {
SelfArgumentNode() { this.asExpr() = any(CfgNodes::ExprNodes::CallCfgNode call).getReceiver() }
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
this.asExpr() = call.getReceiver() and
pos = -1
}
}
/** A data-flow node that represents a block argument. */
class BlockArgumentNode extends ArgumentNode {
BlockArgumentNode() {
this.asExpr().getExpr() instanceof BlockArgument or
exists(CfgNodes::ExprNodes::CallCfgNode c | c.getBlock() = this.asExpr())
}
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) {
pos = -2 and
(
this.asExpr() = call.getBlock()
or
exists(CfgNodes::ExprCfgNode arg, int n |
arg = call.getArgument(n) and
this.asExpr() = arg and
arg.getExpr() instanceof BlockArgument
)
)
}
}
private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, int pos) { none() }
override predicate argumentOf(DataFlowCall call, int pos) {
FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
}
}
}
import ArgumentNodes
/** A data-flow node that represents a value syntactically returned by a callable. */
abstract class ReturningNode extends Node {
/** Gets the kind of this return node. */
abstract ReturnKind getKind();
}
/** A data-flow node that represents a value returned by a callable. */
abstract class ReturnNode extends Node {
/** Gets the kind of this return node. */
abstract ReturnKind getKind();
}
private module ReturnNodes {
private predicate isValid(CfgNodes::ReturningCfgNode node) {
exists(ReturningStmt stmt, Callable scope |
stmt = node.getNode() and
scope = node.getScope()
|
stmt instanceof ReturnStmt and
(scope instanceof Method or scope instanceof SingletonMethod or scope instanceof Lambda)
or
stmt instanceof NextStmt and
(scope instanceof Block or scope instanceof Lambda)
or
stmt instanceof BreakStmt and
(scope instanceof Block or scope instanceof Lambda)
)
}
/**
* A data-flow node that represents an expression returned by a callable,
* either using an explict `return` statement or as the expression of a method body.
*/
class ExplicitReturnNode extends ReturningNode, ReturningStatementNode {
ExplicitReturnNode() {
isValid(n) and
n.getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
n.getScope() instanceof Callable
}
override ReturnKind getKind() {
if n.getNode() instanceof BreakStmt
then result instanceof BreakReturnKind
else result instanceof NormalReturnKind
}
}
class ExprReturnNode extends ReturningNode, ExprNode {
ExprReturnNode() {
this.getExprNode().getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
this.(NodeImpl).getCfgScope() instanceof Callable
}
override ReturnKind getKind() { result instanceof NormalReturnKind }
}
/**
* A synthetic data-flow node for joining flow from different syntactic
* returns into a single node.
*
* This node only exists to avoid computing the product of a large fan-in
* with a large fan-out.
*/
class SynthReturnNode extends NodeImpl, ReturnNode, TSynthReturnNode {
private CfgScope scope;
private ReturnKind kind;
SynthReturnNode() { this = TSynthReturnNode(scope, kind) }
/** Gets a syntactic return node that flows into this synthetic node. */
ReturningNode getAnInput() {
result.(NodeImpl).getCfgScope() = scope and
result.getKind() = kind
}
override ReturnKind getKind() { result = kind }
override CfgScope getCfgScope() { result = scope }
override Location getLocationImpl() { result = scope.getLocation() }
override string toStringImpl() { result = "return " + kind + " in " + scope }
}
private class SummaryReturnNode extends SummaryNode, ReturnNode {
private ReturnKind rk;
SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
override ReturnKind getKind() { result = rk }
}
}
import ReturnNodes
/** A data-flow node that represents the output of a call. */
abstract class OutNode extends Node {
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
abstract DataFlowCall getCall(ReturnKind kind);
}
private module OutNodes {
/**
* A data-flow node that reads a value returned directly by a callable,
* either via a call or a `yield` of a block.
*/
class ExprOutNode extends OutNode, ExprNode {
private DataFlowCall call;
ExprOutNode() { call.asCall() = this.getExprNode() }
override DataFlowCall getCall(ReturnKind kind) {
result = call and
kind instanceof NormalReturnKind
}
}
private class SummaryOutNode extends SummaryNode, OutNode {
SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
override DataFlowCall getCall(ReturnKind kind) {
FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
}
}
}
import OutNodes
predicate jumpStep(Node pred, Node succ) {
SsaImpl::captureFlowIn(pred.(SsaDefinitionNode).getDefinition(),
succ.(SsaDefinitionNode).getDefinition())
or
SsaImpl::captureFlowOut(pred.(SsaDefinitionNode).getDefinition(),
succ.(SsaDefinitionNode).getDefinition())
or
exists(Self s, Method m |
s = succ.asExpr().getExpr() and
pred.(SelfParameterNode).getMethod() = m and
m = s.getEnclosingMethod() and
m != s.getEnclosingCallable()
)
or
succ.asExpr().getExpr().(ConstantReadAccess).getValue() = pred.asExpr().getExpr()
}
predicate storeStep(Node node1, Content c, Node node2) {
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2)
}
predicate readStep(Node node1, Content c, Node node2) {
FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c, node2)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
storeStep(_, c, n)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
}
private newtype TDataFlowType = TTodoDataFlowType()
class DataFlowType extends TDataFlowType {
string toString() { result = "" }
}
/** Gets the type of `n` used for type pruning. */
DataFlowType getNodeType(NodeImpl n) { any() }
/** Gets a string representation of a `DataFlowType`. */
string ppReprType(DataFlowType t) { result = t.toString() }
/**
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
* a node of type `t1` to a node of type `t2`.
*/
pragma[inline]
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
/**
* A node associated with an object after an operation that might have
* changed its state.
*
* This can be either the argument to a callable after the callable returns
* (which might have mutated the argument), or the qualifier of a field after
* an update to the field.
*
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update.
*/
abstract class PostUpdateNode extends Node {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
}
private module PostUpdateNodes {
class ExprPostUpdateNode extends PostUpdateNode, NodeImpl, TExprPostUpdateNode {
private CfgNodes::ExprCfgNode e;
ExprPostUpdateNode() { this = TExprPostUpdateNode(e) }
override ExprNode getPreUpdateNode() { e = result.getExprNode() }
override CfgScope getCfgScope() { result = e.getExpr().getCfgScope() }
override Location getLocationImpl() { result = e.getLocation() }
override string toStringImpl() { result = "[post] " + e.toString() }
}
private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
private Node pre;
SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
override Node getPreUpdateNode() { result = pre }
}
}
private import PostUpdateNodes
/** A node that performs a type cast. */
class CastNode extends Node {
CastNode() { this instanceof ReturningNode }
}
class DataFlowExpr = CfgNodes::ExprCfgNode;
int accessPathLimit() { result = 5 }
/**
* Holds if access paths with `c` at their head always should be tracked at high
* precision. This disables adaptive access path precision for such access paths.
*/
predicate forceHighPrecision(Content c) { none() }
/** The unit type. */
private newtype TUnit = TMkUnit()
/** The trivial type with a single element. */
class Unit extends TUnit {
/** Gets a textual representation of this element. */
string toString() { result = "unit" }
}
/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) { n instanceof BlockArgumentNode }
/**
* Holds if the node `n` is unreachable when the call context is `call`.
*/
predicate isUnreachableInCall(Node n, DataFlowCall call) { none() }
newtype LambdaCallKind =
TYieldCallKind() or
TLambdaCallKind()
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
kind = TYieldCallKind() and
creation.asExpr().getExpr() = c.asCallable().(Block)
or
kind = TLambdaCallKind() and
(
creation.asExpr().getExpr() = c.asCallable().(Lambda)
or
creation.asExpr() =
any(CfgNodes::ExprNodes::MethodCallCfgNode mc |
c.asCallable() = mc.getBlock().getExpr() and
mc.getExpr().getMethodName() = "lambda"
)
)
}
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
kind = TYieldCallKind() and
receiver.(BlockParameterNode).getMethod() =
call.asCall().getExpr().(YieldCall).getEnclosingMethod()
or
kind = TLambdaCallKind() and
call.asCall() =
any(CfgNodes::ExprNodes::MethodCallCfgNode mc |
receiver.asExpr() = mc.getReceiver() and
mc.getExpr().getMethodName() = "call"
)
or
receiver = call.(SummaryCall).getReceiver() and
if receiver.(ParameterNode).isParameterOf(_, -2)
then kind = TYieldCallKind()
else kind = TLambdaCallKind()
}
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }

View File

@@ -0,0 +1,210 @@
private import ruby
private import DataFlowDispatch
private import DataFlowPrivate
private import codeql.ruby.CFG
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.dataflow.SSA
private import FlowSummaryImpl as FlowSummaryImpl
/**
* An element, viewed as a node in a data flow graph. Either an expression
* (`ExprNode`) or a parameter (`ParameterNode`).
*/
class Node extends TNode {
/** Gets the expression corresponding to this node, if any. */
CfgNodes::ExprCfgNode asExpr() { result = this.(ExprNode).getExprNode() }
/** Gets the parameter corresponding to this node, if any. */
Parameter asParameter() { result = this.(ParameterNode).getParameter() }
/** Gets a textual representation of this node. */
// TODO: cache
final string toString() { result = this.(NodeImpl).toStringImpl() }
/** Gets the location of this node. */
// TODO: cache
final Location getLocation() { result = this.(NodeImpl).getLocationImpl() }
DataFlowCallable getEnclosingCallable() { result = TCfgScope(this.(NodeImpl).getCfgScope()) }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/**
* Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
*/
LocalSourceNode getALocalSource() { result.flowsTo(this) }
}
/** A data-flow node corresponding to a call in the control-flow graph. */
class CallNode extends LocalSourceNode {
private CfgNodes::ExprNodes::CallCfgNode node;
CallNode() { node = this.asExpr() }
/** Gets the data-flow node corresponding to the receiver of the call corresponding to this data-flow node */
Node getReceiver() { result.asExpr() = node.getReceiver() }
/** Gets the data-flow node corresponding to the `n`th argument of the call corresponding to this data-flow node */
Node getArgument(int n) { result.asExpr() = node.getArgument(n) }
/** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */
Node getKeywordArgument(string name) { result.asExpr() = node.getKeywordArgument(name) }
}
/**
* An expression, viewed as a node in a data flow graph.
*
* Note that because of control-flow splitting, one `Expr` may correspond
* to multiple `ExprNode`s, just like it may correspond to multiple
* `ControlFlow::Node`s.
*/
class ExprNode extends Node, TExprNode {
private CfgNodes::ExprCfgNode n;
ExprNode() { this = TExprNode(n) }
/** Gets the expression corresponding to this node. */
CfgNodes::ExprCfgNode getExprNode() { result = n }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParameterNode extends Node, TParameterNode {
/** Gets the parameter corresponding to this node, if any. */
Parameter getParameter() { none() }
/**
* Holds if this node is the parameter of callable `c` at the specified
* (zero-based) position.
*/
predicate isParameterOf(DataFlowCallable c, int i) { none() }
}
/**
* A data-flow node that is a source of local flow.
*/
class LocalSourceNode extends Node {
LocalSourceNode() { isLocalSourceNode(this) }
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
pragma[inline]
predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) }
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
}
predicate hasLocalSource(Node sink, Node source) {
// Declaring `source` to be a `SourceNode` currently causes a redundant check in the
// recursive case, so instead we check it explicitly here.
source = sink and
source instanceof LocalSourceNode
or
exists(Node mid |
hasLocalSource(mid, source) and
localFlowStepTypeTracker(mid, sink)
)
}
/** Gets a node corresponding to expression `e`. */
ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e }
/**
* Gets the node corresponding to the value of parameter `p` at function entry.
*/
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep = localFlowStepImpl/2;
/**
* Holds if data flows from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* Holds if data can flow from `e1` to `e2` in zero or more
* local (intra-procedural) steps.
*/
predicate localExprFlow(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
localFlow(exprNode(e1), exprNode(e2))
}
/**
* A reference contained in an object. This is either a field, a property,
* or an element in a collection.
*/
class Content extends TContent {
/** Gets a textual representation of this content. */
string toString() { none() }
/** Gets the location of this content. */
Location getLocation() { none() }
}
/**
* A guard that validates some expression.
*
* To use this in a configuration, extend the class and provide a
* characteristic predicate precisely specifying the guard, and override
* `checks` to specify what is being validated and in which branch.
*
* It is important that all extending classes in scope are disjoint.
*/
abstract class BarrierGuard extends CfgNodes::ExprCfgNode {
private ConditionBlock conditionBlock;
BarrierGuard() { this = conditionBlock.getLastNode() }
/** Holds if this guard controls block `b` upon evaluating to `branch`. */
private predicate controlsBlock(BasicBlock bb, boolean branch) {
exists(SuccessorTypes::BooleanSuccessor s | s.getValue() = branch |
conditionBlock.controls(bb, s)
)
}
/**
* Holds if this guard validates `expr` upon evaluating to `branch`.
* For example, the following code validates `foo` when the condition
* `foo == "foo"` is true.
* ```ruby
* if foo == "foo"
* do_something
* else
* do_something_else
* end
* ```
*/
abstract predicate checks(CfgNode expr, boolean branch);
final Node getAGuardedNode() {
exists(boolean branch, CfgNodes::ExprCfgNode testedNode, Ssa::Definition def |
def.getARead() = testedNode and
def.getARead() = result.asExpr() and
this.checks(testedNode, branch) and
this.controlsBlock(result.asExpr().getBasicBlock(), branch)
)
}
}

View File

@@ -0,0 +1,964 @@
/**
* Provides classes and predicates for defining flow summaries.
*
* The definitions in this file are language-independent, and language-specific
* definitions are passed in via the `DataFlowImplSpecific` and
* `FlowSummaryImplSpecific` modules.
*/
private import FlowSummaryImplSpecific
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import DataFlowImplCommon
/** Provides classes and predicates for defining flow summaries. */
module Public {
private import Private
/**
* A component used in a flow summary.
*
* Either a parameter or an argument at a given position, a specific
* content type, or a return kind.
*/
class SummaryComponent extends TSummaryComponent {
/** Gets a textual representation of this summary component. */
string toString() {
exists(Content c | this = TContentSummaryComponent(c) and result = c.toString())
or
exists(int i | this = TParameterSummaryComponent(i) and result = "parameter " + i)
or
exists(int i | this = TArgumentSummaryComponent(i) and result = "argument " + i)
or
exists(ReturnKind rk | this = TReturnSummaryComponent(rk) and result = "return (" + rk + ")")
}
}
/** Provides predicates for constructing summary components. */
module SummaryComponent {
/** Gets a summary component for content `c`. */
SummaryComponent content(Content c) { result = TContentSummaryComponent(c) }
/** Gets a summary component for parameter `i`. */
SummaryComponent parameter(int i) { result = TParameterSummaryComponent(i) }
/** Gets a summary component for argument `i`. */
SummaryComponent argument(int i) { result = TArgumentSummaryComponent(i) }
/** Gets a summary component for a return of kind `rk`. */
SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) }
}
/**
* A (non-empty) stack of summary components.
*
* A stack is used to represent where data is read from (input) or where it
* is written to (output). For example, an input stack `[Field f, Argument 0]`
* means that data is read from field `f` from the `0`th argument, while an
* output stack `[Field g, Return]` means that data is written to the field
* `g` of the returned object.
*/
class SummaryComponentStack extends TSummaryComponentStack {
/** Gets the head of this stack. */
SummaryComponent head() {
this = TSingletonSummaryComponentStack(result) or
this = TConsSummaryComponentStack(result, _)
}
/** Gets the tail of this stack, if any. */
SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) }
/** Gets the length of this stack. */
int length() {
this = TSingletonSummaryComponentStack(_) and result = 1
or
result = 1 + this.tail().length()
}
/** Gets the stack obtained by dropping the first `i` elements, if any. */
SummaryComponentStack drop(int i) {
i = 0 and result = this
or
result = this.tail().drop(i - 1)
}
/** Holds if this stack contains summary component `c`. */
predicate contains(SummaryComponent c) { c = this.drop(_).head() }
/** Gets a textual representation of this stack. */
string toString() {
exists(SummaryComponent head, SummaryComponentStack tail |
head = this.head() and
tail = this.tail() and
result = head + " of " + tail
)
or
exists(SummaryComponent c |
this = TSingletonSummaryComponentStack(c) and
result = c.toString()
)
}
}
/** Provides predicates for constructing stacks of summary components. */
module SummaryComponentStack {
/** Gets a singleton stack containing `c`. */
SummaryComponentStack singleton(SummaryComponent c) {
result = TSingletonSummaryComponentStack(c)
}
/**
* Gets the stack obtained by pushing `head` onto `tail`.
*
* Make sure to override `RequiredSummaryComponentStack::required()` in order
* to ensure that the constructed stack exists.
*/
SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) {
result = TConsSummaryComponentStack(head, tail)
}
/** Gets a singleton stack for argument `i`. */
SummaryComponentStack argument(int i) { result = singleton(SummaryComponent::argument(i)) }
/** Gets a singleton stack representing a return of kind `rk`. */
SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) }
}
/**
* A class that exists for QL technical reasons only (the IPA type used
* to represent component stacks needs to be bounded).
*/
abstract class RequiredSummaryComponentStack extends SummaryComponentStack {
/**
* Holds if the stack obtained by pushing `head` onto `tail` is required.
*/
abstract predicate required(SummaryComponent c);
}
/** A callable with a flow summary. */
abstract class SummarizedCallable extends DataFlowCallable {
/**
* Holds if data may flow from `input` to `output` through this callable.
*
* `preservesValue` indicates whether this is a value-preserving step
* or a taint-step.
*
* Input specifications are restricted to stacks that end with
* `SummaryComponent::argument(_)`, preceded by zero or more
* `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components.
*
* Output specifications are restricted to stacks that end with
* `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`.
*
* Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero
* or more `SummaryComponent::content(_)` components.
*
* Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an
* optional `SummaryComponent::parameter(_)` component, which in turn can be preceded
* by zero or more `SummaryComponent::content(_)` components.
*/
pragma[nomagic]
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
none()
}
/**
* Holds if values stored inside `content` are cleared on objects passed as
* the `i`th argument to this callable.
*/
pragma[nomagic]
predicate clearsContent(int i, Content content) { none() }
}
}
/**
* Provides predicates for compiling flow summaries down to atomic local steps,
* read steps, and store steps.
*/
module Private {
private import Public
newtype TSummaryComponent =
TContentSummaryComponent(Content c) or
TParameterSummaryComponent(int i) { parameterPosition(i) } or
TArgumentSummaryComponent(int i) { parameterPosition(i) } or
TReturnSummaryComponent(ReturnKind rk)
private TSummaryComponent thisParam() {
result = TParameterSummaryComponent(instanceParameterPosition())
}
newtype TSummaryComponentStack =
TSingletonSummaryComponentStack(SummaryComponent c) or
TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) {
tail.(RequiredSummaryComponentStack).required(head)
or
tail.(RequiredSummaryComponentStack).required(TParameterSummaryComponent(_)) and
head = thisParam()
}
pragma[nomagic]
private predicate summary(
SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output,
boolean preservesValue
) {
c.propagatesFlow(input, output, preservesValue)
or
// observe side effects of callbacks on input arguments
c.propagatesFlow(output, input, preservesValue) and
preservesValue = true and
isCallbackParameter(input) and
isContentOfArgument(output)
or
// flow from the receiver of a callback into the instance-parameter
exists(SummaryComponentStack s, SummaryComponentStack callbackRef |
c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _)
|
callbackRef = s.drop(_) and
(isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and
input = callbackRef.tail() and
output = TConsSummaryComponentStack(thisParam(), input) and
preservesValue = true
)
}
private predicate isCallbackParameter(SummaryComponentStack s) {
s.head() = TParameterSummaryComponent(_) and exists(s.tail())
}
private predicate isContentOfArgument(SummaryComponentStack s) {
s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail())
or
s = TSingletonSummaryComponentStack(TArgumentSummaryComponent(_))
}
private predicate outputState(SummarizedCallable c, SummaryComponentStack s) {
summary(c, _, s, _)
or
exists(SummaryComponentStack out |
outputState(c, out) and
out.head() = TContentSummaryComponent(_) and
s = out.tail()
)
or
// Add the argument node corresponding to the requested post-update node
inputState(c, s) and isCallbackParameter(s)
}
private predicate inputState(SummarizedCallable c, SummaryComponentStack s) {
summary(c, s, _, _)
or
exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail())
or
exists(SummaryComponentStack out |
outputState(c, out) and
out.head() = TParameterSummaryComponent(_) and
s = out.tail()
)
}
private newtype TSummaryNodeState =
TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or
TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) }
/**
* A state used to break up (complex) flow summaries into atomic flow steps.
* For a flow summary
*
* ```ql
* propagatesFlow(
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
* )
* ```
*
* the following states are used:
*
* - `TSummaryNodeInputState(SummaryComponentStack s)`:
* this state represents that the components in `s` _have been read_ from the
* input.
* - `TSummaryNodeOutputState(SummaryComponentStack s)`:
* this state represents that the components in `s` _remain to be written_ to
* the output.
*/
class SummaryNodeState extends TSummaryNodeState {
/** Holds if this state is a valid input state for `c`. */
pragma[nomagic]
predicate isInputState(SummarizedCallable c, SummaryComponentStack s) {
this = TSummaryNodeInputState(s) and
inputState(c, s)
}
/** Holds if this state is a valid output state for `c`. */
pragma[nomagic]
predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) {
this = TSummaryNodeOutputState(s) and
outputState(c, s)
}
/** Gets a textual representation of this state. */
string toString() {
exists(SummaryComponentStack s |
this = TSummaryNodeInputState(s) and
result = "read: " + s
)
or
exists(SummaryComponentStack s |
this = TSummaryNodeOutputState(s) and
result = "to write: " + s
)
}
}
/**
* Holds if `state` represents having read the `i`th argument for `c`. In this case
* we are not synthesizing a data-flow node, but instead assume that a relevant
* parameter node already exists.
*/
private predicate parameterReadState(SummarizedCallable c, SummaryNodeState state, int i) {
state.isInputState(c, SummaryComponentStack::argument(i))
}
/**
* Holds if a synthesized summary node is needed for the state `state` in summarized
* callable `c`.
*/
predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) {
state.isInputState(c, _) and
not parameterReadState(c, state, _)
or
state.isOutputState(c, _)
}
pragma[noinline]
private Node summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) {
exists(SummaryNodeState state | state.isInputState(c, s) |
result = summaryNode(c, state)
or
exists(int i |
parameterReadState(c, state, i) and
result.(ParamNode).isParameterOf(c, i)
)
)
}
pragma[noinline]
private Node summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) {
exists(SummaryNodeState state |
state.isOutputState(c, s) and
result = summaryNode(c, state)
)
}
/**
* Holds if a write targets `post`, which is a post-update node for the `i`th
* parameter of `c`.
*/
private predicate isParameterPostUpdate(Node post, SummarizedCallable c, int i) {
post = summaryNodeOutputState(c, SummaryComponentStack::argument(i))
}
/** Holds if a parameter node is required for the `i`th parameter of `c`. */
predicate summaryParameterNodeRange(SummarizedCallable c, int i) {
parameterReadState(c, _, i)
or
isParameterPostUpdate(_, c, i)
}
private predicate callbackOutput(
SummarizedCallable c, SummaryComponentStack s, Node receiver, ReturnKind rk
) {
any(SummaryNodeState state).isInputState(c, s) and
s.head() = TReturnSummaryComponent(rk) and
receiver = summaryNodeInputState(c, s.drop(1))
}
private predicate callbackInput(
SummarizedCallable c, SummaryComponentStack s, Node receiver, int i
) {
any(SummaryNodeState state).isOutputState(c, s) and
s.head() = TParameterSummaryComponent(i) and
receiver = summaryNodeInputState(c, s.drop(1))
}
/** Holds if a call targeting `receiver` should be synthesized inside `c`. */
predicate summaryCallbackRange(SummarizedCallable c, Node receiver) {
callbackOutput(c, _, receiver, _)
or
callbackInput(c, _, receiver, _)
}
/**
* Gets the type of synthesized summary node `n`.
*
* The type is computed based on the language-specific predicates
* `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and
* `getCallbackReturnType()`.
*/
DataFlowType summaryNodeType(Node n) {
exists(Node pre |
summaryPostUpdateNode(n, pre) and
result = getNodeType(pre)
)
or
exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() |
n = summaryNodeInputState(c, s) and
(
exists(Content cont |
head = TContentSummaryComponent(cont) and result = getContentType(cont)
)
or
exists(ReturnKind rk |
head = TReturnSummaryComponent(rk) and
result =
getCallbackReturnType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c),
s.drop(1))), rk)
)
)
or
n = summaryNodeOutputState(c, s) and
(
exists(Content cont |
head = TContentSummaryComponent(cont) and result = getContentType(cont)
)
or
s.length() = 1 and
exists(ReturnKind rk |
head = TReturnSummaryComponent(rk) and
result = getReturnType(c, rk)
)
or
exists(int i | head = TParameterSummaryComponent(i) |
result =
getCallbackParameterType(getNodeType(summaryNodeInputState(pragma[only_bind_out](c),
s.drop(1))), i)
)
)
)
}
/** Holds if summary node `out` contains output of kind `rk` from call `c`. */
predicate summaryOutNode(DataFlowCall c, Node out, ReturnKind rk) {
exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver |
callbackOutput(callable, s, receiver, rk) and
out = summaryNodeInputState(callable, s) and
c = summaryDataFlowCall(receiver)
)
}
/** Holds if summary node `arg` is the `i`th argument of call `c`. */
predicate summaryArgumentNode(DataFlowCall c, Node arg, int i) {
exists(SummarizedCallable callable, SummaryComponentStack s, Node receiver |
callbackInput(callable, s, receiver, i) and
arg = summaryNodeOutputState(callable, s) and
c = summaryDataFlowCall(receiver)
)
}
/** Holds if summary node `post` is a post-update node with pre-update node `pre`. */
predicate summaryPostUpdateNode(Node post, Node pre) {
exists(SummarizedCallable c, int i |
isParameterPostUpdate(post, c, i) and
pre.(ParamNode).isParameterOf(c, i)
)
or
exists(SummarizedCallable callable, SummaryComponentStack s |
callbackInput(callable, s, _, _) and
pre = summaryNodeOutputState(callable, s) and
post = summaryNodeInputState(callable, s)
)
}
/** Holds if summary node `ret` is a return node of kind `rk`. */
predicate summaryReturnNode(Node ret, ReturnKind rk) {
exists(SummarizedCallable callable, SummaryComponentStack s |
ret = summaryNodeOutputState(callable, s) and
s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk))
)
}
/** Provides a compilation of flow summaries to atomic data-flow steps. */
module Steps {
/**
* Holds if there is a local step from `pred` to `succ`, which is synthesized
* from a flow summary.
*/
predicate summaryLocalStep(Node pred, Node succ, boolean preservesValue) {
exists(
SummarizedCallable c, SummaryComponentStack inputContents,
SummaryComponentStack outputContents
|
summary(c, inputContents, outputContents, preservesValue) and
pred = summaryNodeInputState(c, inputContents) and
succ = summaryNodeOutputState(c, outputContents)
|
preservesValue = true
or
preservesValue = false and not summary(c, inputContents, outputContents, true)
)
or
// If flow through a method updates a parameter from some input A, and that
// parameter also is returned through B, then we'd like a combined flow from A
// to B as well. As an example, this simplifies modeling of fluent methods:
// for `StringBuilder.append(x)` with a specified value flow from qualifier to
// return value and taint flow from argument 0 to the qualifier, then this
// allows us to infer taint flow from argument 0 to the return value.
succ instanceof ParamNode and summaryPostUpdateNode(pred, succ) and preservesValue = true
or
// Similarly we would like to chain together summaries where values get passed
// into callbacks along the way.
pred instanceof ArgNode and summaryPostUpdateNode(succ, pred) and preservesValue = true
}
/**
* Holds if there is a read step of content `c` from `pred` to `succ`, which
* is synthesized from a flow summary.
*/
predicate summaryReadStep(Node pred, Content c, Node succ) {
exists(SummarizedCallable sc, SummaryComponentStack s |
pred = summaryNodeInputState(sc, s.drop(1)) and
succ = summaryNodeInputState(sc, s) and
SummaryComponent::content(c) = s.head()
)
}
/**
* Holds if there is a store step of content `c` from `pred` to `succ`, which
* is synthesized from a flow summary.
*/
predicate summaryStoreStep(Node pred, Content c, Node succ) {
exists(SummarizedCallable sc, SummaryComponentStack s |
pred = summaryNodeOutputState(sc, s) and
succ = summaryNodeOutputState(sc, s.drop(1)) and
SummaryComponent::content(c) = s.head()
)
}
/**
* Holds if values stored inside content `c` are cleared when passed as
* input of type `input` in `call`.
*/
predicate summaryClearsContent(ArgNode arg, Content c) {
exists(DataFlowCall call, int i |
viableCallable(call).(SummarizedCallable).clearsContent(i, c) and
arg.argumentOf(call, i)
)
}
pragma[nomagic]
private ParamNode summaryArgParam(ArgNode arg, ReturnKindExt rk, OutNodeExt out) {
exists(DataFlowCall call, int pos, SummarizedCallable callable |
arg.argumentOf(call, pos) and
viableCallable(call) = callable and
result.isParameterOf(callable, pos) and
out = rk.getAnOutNode(call)
)
}
/**
* Holds if `arg` flows to `out` using a simple flow summary, that is, a flow
* summary without reads and stores.
*
* NOTE: This step should not be used in global data-flow/taint-tracking, but may
* be useful to include in the exposed local data-flow/taint-tracking relations.
*/
predicate summaryThroughStep(ArgNode arg, Node out, boolean preservesValue) {
exists(ReturnKindExt rk, ReturnNodeExt ret |
summaryLocalStep(summaryArgParam(arg, rk, out), ret, preservesValue) and
ret.getKind() = rk
)
}
/**
* Holds if there is a read(+taint) of `c` from `arg` to `out` using a
* flow summary.
*
* NOTE: This step should not be used in global data-flow/taint-tracking, but may
* be useful to include in the exposed local data-flow/taint-tracking relations.
*/
predicate summaryGetterStep(ArgNode arg, Content c, Node out) {
exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret |
summaryReadStep(summaryArgParam(arg, rk, out), c, mid) and
summaryLocalStep(mid, ret, _) and
ret.getKind() = rk
)
}
/**
* Holds if there is a (taint+)store of `arg` into content `c` of `out` using a
* flow summary.
*
* NOTE: This step should not be used in global data-flow/taint-tracking, but may
* be useful to include in the exposed local data-flow/taint-tracking relations.
*/
predicate summarySetterStep(ArgNode arg, Content c, Node out) {
exists(ReturnKindExt rk, Node mid, ReturnNodeExt ret |
summaryLocalStep(summaryArgParam(arg, rk, out), mid, _) and
summaryStoreStep(mid, c, ret) and
ret.getKind() = rk
)
}
/**
* Holds if data is written into content `c` of argument `arg` using a flow summary.
*
* Depending on the type of `c`, this predicate may be relevant to include in the
* definition of `clearsContent()`.
*/
predicate summaryStoresIntoArg(Content c, Node arg) {
exists(ParamUpdateReturnKind rk, ReturnNodeExt ret, PostUpdateNode out |
exists(DataFlowCall call, SummarizedCallable callable |
getNodeEnclosingCallable(ret) = callable and
viableCallable(call) = callable and
summaryStoreStep(_, c, ret) and
ret.getKind() = pragma[only_bind_into](rk) and
out = rk.getAnOutNode(call) and
arg = out.getPreUpdateNode()
)
)
}
}
/**
* Provides a means of translating externally (e.g., CSV) defined flow
* summaries into a `SummarizedCallable`s.
*/
module External {
/** Holds if `spec` is a relevant external specification. */
private predicate relevantSpec(string spec) {
summaryElement(_, spec, _, _) or
summaryElement(_, _, spec, _) or
sourceElement(_, spec, _) or
sinkElement(_, spec, _)
}
/** Holds if the `n`th component of specification `s` is `c`. */
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
/** Holds if specification `s` has length `len`. */
predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
/** Gets the last component of specification `s`. */
string specLast(string s) {
exists(int len |
specLength(s, len) and
specSplit(s, result, len - 1)
)
}
/** Holds if specification component `c` parses as parameter `n`. */
predicate parseParam(string c, int n) {
specSplit(_, c, _) and
(
c.regexpCapture("Parameter\\[([-0-9]+)\\]", 1).toInt() = n
or
exists(int n1, int n2 |
c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and
c.regexpCapture("Parameter\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and
n = [n1 .. n2]
)
)
}
/** Holds if specification component `c` parses as argument `n`. */
predicate parseArg(string c, int n) {
specSplit(_, c, _) and
(
c.regexpCapture("Argument\\[([-0-9]+)\\]", 1).toInt() = n
or
exists(int n1, int n2 |
c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = n1 and
c.regexpCapture("Argument\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = n2 and
n = [n1 .. n2]
)
)
}
private SummaryComponent interpretComponent(string c) {
specSplit(_, c, _) and
(
exists(int pos | parseArg(c, pos) and result = SummaryComponent::argument(pos))
or
exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos))
or
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
or
result = interpretComponentSpecific(c)
)
}
/**
* Holds if `spec` specifies summary component stack `stack`.
*/
predicate interpretSpec(string spec, SummaryComponentStack stack) {
interpretSpec(spec, 0, stack)
}
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
exists(string c |
relevantSpec(spec) and
specLength(spec, idx + 1) and
specSplit(spec, c, idx) and
stack = SummaryComponentStack::singleton(interpretComponent(c))
)
or
exists(SummaryComponent head, SummaryComponentStack tail |
interpretSpec(spec, idx, head, tail) and
stack = SummaryComponentStack::push(head, tail)
)
}
private predicate interpretSpec(
string output, int idx, SummaryComponent head, SummaryComponentStack tail
) {
exists(string c |
interpretSpec(output, idx + 1, tail) and
specSplit(output, c, idx) and
head = interpretComponent(c)
)
}
private class MkStack extends RequiredSummaryComponentStack {
MkStack() { interpretSpec(_, _, _, this) }
override predicate required(SummaryComponent c) { interpretSpec(_, _, c, this) }
}
private class SummarizedCallableExternal extends SummarizedCallable {
SummarizedCallableExternal() { summaryElement(this, _, _, _) }
override predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
exists(string inSpec, string outSpec, string kind |
summaryElement(this, inSpec, outSpec, kind) and
interpretSpec(inSpec, input) and
interpretSpec(outSpec, output)
|
kind = "value" and preservesValue = true
or
kind = "taint" and preservesValue = false
)
}
}
/** Holds if component `c` of specification `spec` cannot be parsed. */
predicate invalidSpecComponent(string spec, string c) {
specSplit(spec, c, _) and
not exists(interpretComponent(c))
}
private predicate inputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _)
}
private predicate outputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
c = "ReturnValue"
}
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
exists(SourceOrSinkElement e |
sourceElement(e, output, kind) and
if outputNeedsReference(specLast(output))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
exists(SourceOrSinkElement e |
sinkElement(e, input, kind) and
if inputNeedsReference(specLast(input))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
sourceElementRef(ref, output, _) and
specLength(output, idx) and
node = ref
or
exists(InterpretNode mid, string c |
interpretOutput(output, idx + 1, ref, mid) and
specSplit(output, c, idx)
|
exists(int pos |
node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), pos)
|
c = "Argument" or parseArg(c, pos)
)
or
exists(int pos | node.asNode().(ParamNode).isParameterOf(mid.asCallable(), pos) |
c = "Parameter" or parseParam(c, pos)
)
or
c = "ReturnValue" and
node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind()))
or
interpretOutputSpecific(c, mid, node)
)
}
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
sinkElementRef(ref, input, _) and
specLength(input, idx) and
node = ref
or
exists(InterpretNode mid, string c |
interpretInput(input, idx + 1, ref, mid) and
specSplit(input, c, idx)
|
exists(int pos | node.asNode().(ArgNode).argumentOf(mid.asCall(), pos) |
c = "Argument" or parseArg(c, pos)
)
or
exists(ReturnNodeExt ret |
c = "ReturnValue" and
ret = node.asNode() and
ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and
mid.asCallable() = getNodeEnclosingCallable(ret)
)
or
interpretInputSpecific(c, mid, node)
)
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* model.
*/
predicate isSourceNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string output |
sourceElementRef(ref, output, kind) and
interpretOutput(output, 0, ref, node)
)
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* model.
*/
predicate isSinkNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string input |
sinkElementRef(ref, input, kind) and
interpretInput(input, 0, ref, node)
)
}
}
/** Provides a query predicate for outputting a set of relevant flow summaries. */
module TestOutput {
/** A flow summary to include in the `summary/3` query predicate. */
abstract class RelevantSummarizedCallable extends SummarizedCallable {
/** Gets the string representation of this callable used by `summary/3`. */
string getFullString() { result = this.toString() }
}
/** A query predicate for outputting flow summaries in QL tests. */
query predicate summary(string callable, string flow, boolean preservesValue) {
exists(
RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output
|
callable = c.getFullString() and
c.propagatesFlow(input, output, preservesValue) and
flow = input + " -> " + output
)
}
}
/**
* Provides query predicates for rendering the generated data flow graph for
* a summarized callable.
*
* Import this module into a `.ql` file of `@kind graph` to render the graph.
* The graph is restricted to callables from `RelevantSummarizedCallable`.
*/
module RenderSummarizedCallable {
/** A summarized callable to include in the graph. */
abstract class RelevantSummarizedCallable extends SummarizedCallable { }
private newtype TNodeOrCall =
MkNode(Node n) {
exists(RelevantSummarizedCallable c |
n = summaryNode(c, _)
or
n.(ParamNode).isParameterOf(c, _)
)
} or
MkCall(DataFlowCall call) {
call = summaryDataFlowCall(_) and
call.getEnclosingCallable() instanceof RelevantSummarizedCallable
}
private class NodeOrCall extends TNodeOrCall {
Node asNode() { this = MkNode(result) }
DataFlowCall asCall() { this = MkCall(result) }
string toString() {
result = this.asNode().toString()
or
result = this.asCall().toString()
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.asNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or
this.asCall().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
query predicate nodes(NodeOrCall n, string key, string val) {
key = "semmle.label" and val = n.toString()
}
private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) {
exists(boolean preservesValue |
Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and
if preservesValue = true then value = "value" else value = "taint"
)
or
exists(Content c |
Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and
value = "read (" + c + ")"
or
Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and
value = "store (" + c + ")"
or
Private::Steps::summaryClearsContent(a.asNode(), c) and
b = a and
value = "clear (" + c + ")"
)
or
summaryPostUpdateNode(b.asNode(), a.asNode()) and
value = "post-update"
or
b.asCall() = summaryDataFlowCall(a.asNode()) and
value = "receiver"
or
exists(int i |
summaryArgumentNode(b.asCall(), a.asNode(), i) and
value = "argument (" + i + ")"
)
}
query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) {
key = "semmle.label" and
value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ")
}
}
}

View File

@@ -0,0 +1,117 @@
/**
* Provides Ruby specific classes and predicates for defining flow summaries.
*/
private import ruby
private import DataFlowDispatch
private import DataFlowPrivate
private import DataFlowPublic
private import DataFlowImplCommon
private import FlowSummaryImpl::Private
private import FlowSummaryImpl::Public
private import codeql.ruby.dataflow.FlowSummary as FlowSummary
/** Holds is `i` is a valid parameter position. */
predicate parameterPosition(int i) { i in [-2 .. 10] }
/** Gets the parameter position of the instance parameter. */
int instanceParameterPosition() { none() } // disables implicit summary flow to `self` for callbacks
/** Gets the synthesized summary data-flow node for the given values. */
Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSummaryNode(c, state) }
/** Gets the synthesized data-flow call for `receiver`. */
SummaryCall summaryDataFlowCall(Node receiver) { receiver = result.getReceiver() }
/** Gets the type of content `c`. */
DataFlowType getContentType(Content c) { any() }
/** Gets the return type of kind `rk` for callable `c`. */
bindingset[c, rk]
DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }
/**
* Gets the type of the `i`th parameter in a synthesized call that targets a
* callback of type `t`.
*/
bindingset[t, i]
DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
/**
* Gets the return type of kind `rk` in a synthesized call that targets a
* callback of type `t`.
*/
DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() }
/**
* Holds if an external flow summary exists for `c` with input specification
* `input`, output specification `output`, and kind `kind`.
*/
predicate summaryElement(DataFlowCallable c, string input, string output, string kind) {
exists(FlowSummary::SummarizedCallable sc, boolean preservesValue |
sc.propagatesFlowExt(input, output, preservesValue) and
c.asLibraryCallable() = sc and
if preservesValue = true then kind = "value" else kind = "taint"
)
}
/**
* Gets the summary component for specification component `c`, if any.
*
* This covers all the Ruby-specific components of a flow summary, and
* is currently restricted to `"BlockArgument"`.
*/
SummaryComponent interpretComponentSpecific(string c) {
c = "BlockArgument" and
result = FlowSummary::SummaryComponent::block()
}
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
NormalReturnKind getReturnValueKind() { any() }
/**
* All definitions in this module are required by the shared implementation
* (for source/sink interpretation), but they are unused for Ruby, where
* we rely on API graphs instead.
*/
private module UnusedSourceSinkInterpretation {
/**
* Holds if an external source specification exists for `e` with output specification
* `output` and kind `kind`.
*/
predicate sourceElement(AstNode n, string output, string kind) { none() }
/**
* Holds if an external sink specification exists for `n` with input specification
* `input` and kind `kind`.
*/
predicate sinkElement(AstNode n, string input, string kind) { none() }
class SourceOrSinkElement = AstNode;
/** An entity used to interpret a source/sink specification. */
class InterpretNode extends AstNode {
/** Gets the element that this node corresponds to, if any. */
SourceOrSinkElement asElement() { none() }
/** Gets the data-flow node that this node corresponds to, if any. */
Node asNode() { none() }
/** Gets the call that this node corresponds to, if any. */
DataFlowCall asCall() { none() }
/** Gets the callable that this node corresponds to, if any. */
DataFlowCallable asCallable() { none() }
/** Gets the target of this call, if any. */
Callable getCallTarget() { none() }
}
/** Provides additional sink specification logic. */
predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
/** Provides additional source specification logic. */
predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() }
}
import UnusedSourceSinkInterpretation

View File

@@ -0,0 +1,289 @@
private import SsaImplCommon
private import codeql.ruby.AST
private import codeql.ruby.CFG
private import codeql.ruby.ast.Variable
private import CfgNodes::ExprNodes
/** Holds if `v` is uninitialized at index `i` in entry block `bb`. */
predicate uninitializedWrite(EntryBasicBlock bb, int i, LocalVariable v) {
v.getDeclaringScope() = bb.getScope() and
i = -1
}
/** Holds if `bb` contains a caputured read of variable `v`. */
pragma[noinline]
private predicate hasCapturedVariableRead(BasicBlock bb, LocalVariable v) {
exists(LocalVariableReadAccess read |
read = bb.getANode().getNode() and
read.isCapturedAccess() and
read.getVariable() = v
)
}
/**
* Holds if an entry definition is needed for captured variable `v` at index
* `i` in entry block `bb`.
*/
predicate capturedEntryWrite(EntryBasicBlock bb, int i, LocalVariable v) {
hasCapturedVariableRead(bb.getASuccessor*(), v) and
i = -1
}
/** Holds if `bb` contains a caputured write to variable `v`. */
pragma[noinline]
private predicate writesCapturedVariable(BasicBlock bb, LocalVariable v) {
exists(LocalVariableWriteAccess write |
write = bb.getANode().getNode() and
write.isCapturedAccess() and
write.getVariable() = v
)
}
/**
* Holds if a pseudo read of captured variable `v` should be inserted
* at index `i` in exit block `bb`.
*/
private predicate capturedExitRead(AnnotatedExitBasicBlock bb, int i, LocalVariable v) {
bb.isNormal() and
writesCapturedVariable(bb.getAPredecessor*(), v) and
i = bb.length()
}
private CfgScope getCaptureOuterCfgScope(CfgScope scope) {
result = scope.getOuterCfgScope() and
(
scope instanceof Block
or
scope instanceof Lambda
)
}
/** Holds if captured variable `v` is read inside `scope`. */
pragma[noinline]
private predicate hasCapturedRead(Variable v, CfgScope scope) {
any(LocalVariableReadAccess read |
read.getVariable() = v and scope = getCaptureOuterCfgScope*(read.getCfgScope())
).isCapturedAccess()
}
pragma[noinline]
private predicate hasVariableWriteWithCapturedRead(BasicBlock bb, LocalVariable v, CfgScope scope) {
hasCapturedRead(v, scope) and
exists(VariableWriteAccess write |
write = bb.getANode().getNode() and
write.getVariable() = v and
bb.getScope() = scope.getOuterCfgScope()
)
}
/**
* Holds if the call at index `i` in basic block `bb` may reach a callable
* that reads captured variable `v`.
*/
private predicate capturedCallRead(BasicBlock bb, int i, LocalVariable v) {
exists(CfgScope scope |
hasVariableWriteWithCapturedRead(bb.getAPredecessor*(), v, scope) and
bb.getNode(i).getNode() instanceof Call
|
not scope instanceof Block
or
// If the read happens inside a block, we restrict to the call that
// contains the block
scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock()
)
}
/** Holds if captured variable `v` is written inside `scope`. */
pragma[noinline]
private predicate hasCapturedWrite(Variable v, CfgScope scope) {
any(LocalVariableWriteAccess write |
write.getVariable() = v and scope = getCaptureOuterCfgScope*(write.getCfgScope())
).isCapturedAccess()
}
/** Holds if `v` is read at index `i` in basic block `bb`. */
private predicate variableReadActual(BasicBlock bb, int i, LocalVariable v) {
exists(VariableReadAccess read |
read.getVariable() = v and
read = bb.getNode(i).getNode()
)
}
predicate variableRead(BasicBlock bb, int i, LocalVariable v, boolean certain) {
variableReadActual(bb, i, v) and
certain = true
or
capturedCallRead(bb, i, v) and
certain = false
or
capturedExitRead(bb, i, v) and
certain = false
}
pragma[noinline]
private predicate hasVariableReadWithCapturedWrite(BasicBlock bb, LocalVariable v, CfgScope scope) {
hasCapturedWrite(v, scope) and
exists(VariableReadAccess read |
read = bb.getANode().getNode() and
read.getVariable() = v and
bb.getScope() = scope.getOuterCfgScope()
)
}
cached
private module Cached {
/**
* Holds if the call at index `i` in basic block `bb` may reach a callable
* that writes captured variable `v`.
*/
cached
predicate capturedCallWrite(BasicBlock bb, int i, LocalVariable v) {
exists(CfgScope scope |
hasVariableReadWithCapturedWrite(bb.getASuccessor*(), v, scope) and
bb.getNode(i).getNode() instanceof Call
|
not scope instanceof Block
or
// If the write happens inside a block, we restrict to the call that
// contains the block
scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock()
)
}
/**
* Holds if `v` is written at index `i` in basic block `bb`, and the corresponding
* AST write access is `write`.
*/
cached
predicate variableWriteActual(BasicBlock bb, int i, LocalVariable v, VariableWriteAccess write) {
exists(AstNode n |
write.getVariable() = v and
n = bb.getNode(i).getNode()
|
write.isExplicitWrite(n)
or
write.isImplicitWrite() and
n = write
)
}
cached
VariableReadAccessCfgNode getARead(Definition def) {
exists(LocalVariable v, BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
variableReadActual(bb, i, v) and
result = bb.getNode(i)
)
}
/**
* Holds if there is flow for a captured variable from the enclosing scope into a block.
* ```rb
* foo = 0
* bar {
* puts foo
* }
* ```
*/
cached
predicate captureFlowIn(Definition def, Definition entry) {
exists(LocalVariable v, BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
capturedCallRead(bb, i, v) and
exists(BasicBlock bb2, int i2 |
capturedEntryWrite(bb2, i2, v) and
entry.definesAt(v, bb2, i2)
)
)
}
/**
* Holds if there is outgoing flow for a captured variable that is updated in a block.
* ```rb
* foo = 0
* bar {
* foo += 10
* }
* puts foo
* ```
*/
cached
predicate captureFlowOut(Definition def, Definition exit) {
exists(LocalVariable v, BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
capturedExitRead(bb, i, v) and
exists(BasicBlock bb2, int i2 |
capturedCallWrite(bb2, i2, v) and
exit.definesAt(v, bb2, i2)
)
)
}
cached
Definition phiHasInputFromBlock(PhiNode phi, BasicBlock bb) {
phiHasInputFromBlock(phi, result, bb)
}
/**
* Holds if the value defined at SSA definition `def` can reach a read at `read`,
* without passing through any other non-pseudo read.
*/
cached
predicate firstRead(Definition def, VariableReadAccessCfgNode read) {
exists(BasicBlock bb1, int i1, BasicBlock bb2, int i2 |
def.definesAt(_, bb1, i1) and
adjacentDefNoUncertainReads(def, bb1, i1, bb2, i2) and
read = bb2.getNode(i2)
)
}
/**
* Holds if the read at `read2` is a read of the same SSA definition `def`
* as the read at `read1`, and `read2` can be reached from `read1` without
* passing through another non-pseudo read.
*/
cached
predicate adjacentReadPair(
Definition def, VariableReadAccessCfgNode read1, VariableReadAccessCfgNode read2
) {
exists(BasicBlock bb1, int i1, BasicBlock bb2, int i2 |
read1 = bb1.getNode(i1) and
variableReadActual(bb1, i1, _) and
adjacentDefNoUncertainReads(def, bb1, i1, bb2, i2) and
read2 = bb2.getNode(i2)
)
}
/**
* Holds if the read of `def` at `read` may be a last read. That is, `read`
* can either reach another definition of the underlying source variable or
* the end of the CFG scope, without passing through another non-pseudo read.
*/
cached
predicate lastRead(Definition def, VariableReadAccessCfgNode read) {
exists(BasicBlock bb, int i |
lastRefNoUncertainReads(def, bb, i) and
variableReadActual(bb, i, _) and
read = bb.getNode(i)
)
}
/**
* Holds if the reference to `def` at index `i` in basic block `bb` can reach
* another definition `next` of the same underlying source variable, without
* passing through another write or non-pseudo read.
*
* The reference is either a read of `def` or `def` itself.
*/
cached
predicate lastRefBeforeRedef(Definition def, BasicBlock bb, int i, Definition next) {
lastRefRedefNoUncertainReads(def, bb, i, next)
}
cached
Definition uncertainWriteDefinitionInput(UncertainWriteDefinition def) {
uncertainWriteDefinitionInput(def, result)
}
}
import Cached

View File

@@ -0,0 +1,637 @@
/**
* Provides a language-independent implementation of static single assignment
* (SSA) form.
*/
private import SsaImplSpecific
private BasicBlock getABasicBlockPredecessor(BasicBlock bb) { getABasicBlockSuccessor(result) = bb }
/**
* Liveness analysis (based on source variables) to restrict the size of the
* SSA representation.
*/
private module Liveness {
/**
* A classification of variable references into reads (of a given kind) and
* (certain or uncertain) writes.
*/
private newtype TRefKind =
Read(boolean certain) { certain in [false, true] } or
Write(boolean certain) { certain in [false, true] }
private class RefKind extends TRefKind {
string toString() {
exists(boolean certain | this = Read(certain) and result = "read (" + certain + ")")
or
exists(boolean certain | this = Write(certain) and result = "write (" + certain + ")")
}
int getOrder() {
this = Read(_) and
result = 0
or
this = Write(_) and
result = 1
}
}
/**
* Holds if the `i`th node of basic block `bb` is a reference to `v` of kind `k`.
*/
private predicate ref(BasicBlock bb, int i, SourceVariable v, RefKind k) {
exists(boolean certain | variableRead(bb, i, v, certain) | k = Read(certain))
or
exists(boolean certain | variableWrite(bb, i, v, certain) | k = Write(certain))
}
private newtype OrderedRefIndex =
MkOrderedRefIndex(int i, int tag) {
exists(RefKind rk | ref(_, i, _, rk) | tag = rk.getOrder())
}
private OrderedRefIndex refOrd(BasicBlock bb, int i, SourceVariable v, RefKind k, int ord) {
ref(bb, i, v, k) and
result = MkOrderedRefIndex(i, ord) and
ord = k.getOrder()
}
/**
* Gets the (1-based) rank of the reference to `v` at the `i`th node of
* basic block `bb`, which has the given reference kind `k`.
*
* Reads are considered before writes when they happen at the same index.
*/
private int refRank(BasicBlock bb, int i, SourceVariable v, RefKind k) {
refOrd(bb, i, v, k, _) =
rank[result](int j, int ord, OrderedRefIndex res |
res = refOrd(bb, j, v, _, ord)
|
res order by j, ord
)
}
private int maxRefRank(BasicBlock bb, SourceVariable v) {
result = refRank(bb, _, v, _) and
not result + 1 = refRank(bb, _, v, _)
}
/**
* Gets the (1-based) rank of the first reference to `v` inside basic block `bb`
* that is either a read or a certain write.
*/
private int firstReadOrCertainWrite(BasicBlock bb, SourceVariable v) {
result =
min(int r, RefKind k |
r = refRank(bb, _, v, k) and
k != Write(false)
|
r
)
}
/**
* Holds if source variable `v` is live at the beginning of basic block `bb`.
*/
predicate liveAtEntry(BasicBlock bb, SourceVariable v) {
// The first read or certain write to `v` inside `bb` is a read
refRank(bb, _, v, Read(_)) = firstReadOrCertainWrite(bb, v)
or
// There is no certain write to `v` inside `bb`, but `v` is live at entry
// to a successor basic block of `bb`
not exists(firstReadOrCertainWrite(bb, v)) and
liveAtExit(bb, v)
}
/**
* Holds if source variable `v` is live at the end of basic block `bb`.
*/
predicate liveAtExit(BasicBlock bb, SourceVariable v) {
liveAtEntry(getABasicBlockSuccessor(bb), v)
}
/**
* Holds if variable `v` is live in basic block `bb` at index `i`.
* The rank of `i` is `rnk` as defined by `refRank()`.
*/
private predicate liveAtRank(BasicBlock bb, int i, SourceVariable v, int rnk) {
exists(RefKind kind | rnk = refRank(bb, i, v, kind) |
rnk = maxRefRank(bb, v) and
liveAtExit(bb, v)
or
ref(bb, i, v, kind) and
kind = Read(_)
or
exists(RefKind nextKind |
liveAtRank(bb, _, v, rnk + 1) and
rnk + 1 = refRank(bb, _, v, nextKind) and
nextKind != Write(true)
)
)
}
/**
* Holds if variable `v` is live after the (certain or uncertain) write at
* index `i` inside basic block `bb`.
*/
predicate liveAfterWrite(BasicBlock bb, int i, SourceVariable v) {
exists(int rnk | rnk = refRank(bb, i, v, Write(_)) | liveAtRank(bb, i, v, rnk))
}
}
private import Liveness
/** Holds if `bb1` strictly dominates `bb2`. */
private predicate strictlyDominates(BasicBlock bb1, BasicBlock bb2) {
bb1 = getImmediateBasicBlockDominator+(bb2)
}
/** Holds if `bb1` dominates a predecessor of `bb2`. */
private predicate dominatesPredecessor(BasicBlock bb1, BasicBlock bb2) {
exists(BasicBlock pred | pred = getABasicBlockPredecessor(bb2) |
bb1 = pred
or
strictlyDominates(bb1, pred)
)
}
/** Holds if `df` is in the dominance frontier of `bb`. */
private predicate inDominanceFrontier(BasicBlock bb, BasicBlock df) {
dominatesPredecessor(bb, df) and
not strictlyDominates(bb, df)
}
/**
* Holds if `bb` is in the dominance frontier of a block containing a
* definition of `v`.
*/
pragma[noinline]
private predicate inDefDominanceFrontier(BasicBlock bb, SourceVariable v) {
exists(BasicBlock defbb, Definition def |
def.definesAt(v, defbb, _) and
inDominanceFrontier(defbb, bb)
)
}
cached
newtype TDefinition =
TWriteDef(SourceVariable v, BasicBlock bb, int i) {
variableWrite(bb, i, v, _) and
liveAfterWrite(bb, i, v)
} or
TPhiNode(SourceVariable v, BasicBlock bb) {
inDefDominanceFrontier(bb, v) and
liveAtEntry(bb, v)
}
private module SsaDefReaches {
newtype TSsaRefKind =
SsaRead() or
SsaDef()
/**
* A classification of SSA variable references into reads and definitions.
*/
class SsaRefKind extends TSsaRefKind {
string toString() {
this = SsaRead() and
result = "SsaRead"
or
this = SsaDef() and
result = "SsaDef"
}
int getOrder() {
this = SsaRead() and
result = 0
or
this = SsaDef() and
result = 1
}
}
/**
* Holds if the `i`th node of basic block `bb` is a reference to `v`,
* either a read (when `k` is `SsaRead()`) or an SSA definition (when `k`
* is `SsaDef()`).
*
* Unlike `Liveness::ref`, this includes `phi` nodes.
*/
predicate ssaRef(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) {
variableRead(bb, i, v, _) and
k = SsaRead()
or
exists(Definition def | def.definesAt(v, bb, i)) and
k = SsaDef()
}
private newtype OrderedSsaRefIndex =
MkOrderedSsaRefIndex(int i, SsaRefKind k) { ssaRef(_, i, _, k) }
private OrderedSsaRefIndex ssaRefOrd(BasicBlock bb, int i, SourceVariable v, SsaRefKind k, int ord) {
ssaRef(bb, i, v, k) and
result = MkOrderedSsaRefIndex(i, k) and
ord = k.getOrder()
}
/**
* Gets the (1-based) rank of the reference to `v` at the `i`th node of basic
* block `bb`, which has the given reference kind `k`.
*
* For example, if `bb` is a basic block with a phi node for `v` (considered
* to be at index -1), reads `v` at node 2, and defines it at node 5, we have:
*
* ```ql
* ssaRefRank(bb, -1, v, SsaDef()) = 1 // phi node
* ssaRefRank(bb, 2, v, Read()) = 2 // read at node 2
* ssaRefRank(bb, 5, v, SsaDef()) = 3 // definition at node 5
* ```
*
* Reads are considered before writes when they happen at the same index.
*/
int ssaRefRank(BasicBlock bb, int i, SourceVariable v, SsaRefKind k) {
ssaRefOrd(bb, i, v, k, _) =
rank[result](int j, int ord, OrderedSsaRefIndex res |
res = ssaRefOrd(bb, j, v, _, ord)
|
res order by j, ord
)
}
int maxSsaRefRank(BasicBlock bb, SourceVariable v) {
result = ssaRefRank(bb, _, v, _) and
not result + 1 = ssaRefRank(bb, _, v, _)
}
/**
* Holds if the SSA definition `def` reaches rank index `rnk` in its own
* basic block `bb`.
*/
predicate ssaDefReachesRank(BasicBlock bb, Definition def, int rnk, SourceVariable v) {
exists(int i |
rnk = ssaRefRank(bb, i, v, SsaDef()) and
def.definesAt(v, bb, i)
)
or
ssaDefReachesRank(bb, def, rnk - 1, v) and
rnk = ssaRefRank(bb, _, v, SsaRead())
}
/**
* Holds if the SSA definition of `v` at `def` reaches index `i` in the same
* basic block `bb`, without crossing another SSA definition of `v`.
*/
predicate ssaDefReachesReadWithinBlock(SourceVariable v, Definition def, BasicBlock bb, int i) {
exists(int rnk |
ssaDefReachesRank(bb, def, rnk, v) and
rnk = ssaRefRank(bb, i, v, SsaRead())
)
}
/**
* Holds if the SSA definition of `v` at `def` reaches uncertain SSA definition
* `redef` in the same basic block, without crossing another SSA definition of `v`.
*/
predicate ssaDefReachesUncertainDefWithinBlock(
SourceVariable v, Definition def, UncertainWriteDefinition redef
) {
exists(BasicBlock bb, int rnk, int i |
ssaDefReachesRank(bb, def, rnk, v) and
rnk = ssaRefRank(bb, i, v, SsaDef()) - 1 and
redef.definesAt(v, bb, i)
)
}
/**
* Same as `ssaRefRank()`, but restricted to a particular SSA definition `def`.
*/
int ssaDefRank(Definition def, SourceVariable v, BasicBlock bb, int i, SsaRefKind k) {
v = def.getSourceVariable() and
result = ssaRefRank(bb, i, v, k) and
(
ssaDefReachesRead(_, def, bb, i)
or
def.definesAt(_, bb, i)
)
}
/**
* Holds if the reference to `def` at index `i` in basic block `bb` is the
* last reference to `v` inside `bb`.
*/
pragma[noinline]
predicate lastSsaRef(Definition def, SourceVariable v, BasicBlock bb, int i) {
ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v)
}
predicate defOccursInBlock(Definition def, BasicBlock bb, SourceVariable v) {
exists(ssaDefRank(def, v, bb, _, _))
}
pragma[noinline]
private predicate ssaDefReachesThroughBlock(Definition def, BasicBlock bb) {
ssaDefReachesEndOfBlock(bb, def, _) and
not defOccursInBlock(_, bb, def.getSourceVariable())
}
/**
* Holds if `def` is accessed in basic block `bb1` (either a read or a write),
* `bb2` is a transitive successor of `bb1`, `def` is live at the end of `bb1`,
* and the underlying variable for `def` is neither read nor written in any block
* on the path between `bb1` and `bb2`.
*/
predicate varBlockReaches(Definition def, BasicBlock bb1, BasicBlock bb2) {
defOccursInBlock(def, bb1, _) and
bb2 = getABasicBlockSuccessor(bb1)
or
exists(BasicBlock mid |
varBlockReaches(def, bb1, mid) and
ssaDefReachesThroughBlock(def, mid) and
bb2 = getABasicBlockSuccessor(mid)
)
}
/**
* Holds if `def` is accessed in basic block `bb1` (either a read or a write),
* `def` is read at index `i2` in basic block `bb2`, `bb2` is in a transitive
* successor block of `bb1`, and `def` is neither read nor written in any block
* on a path between `bb1` and `bb2`.
*/
predicate defAdjacentRead(Definition def, BasicBlock bb1, BasicBlock bb2, int i2) {
varBlockReaches(def, bb1, bb2) and
ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1
}
}
private import SsaDefReaches
pragma[nomagic]
predicate liveThrough(BasicBlock bb, SourceVariable v) {
liveAtExit(bb, v) and
not ssaRef(bb, _, v, SsaDef())
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the SSA definition of `v` at `def` reaches the end of basic
* block `bb`, at which point it is still live, without crossing another
* SSA definition of `v`.
*/
pragma[nomagic]
predicate ssaDefReachesEndOfBlock(BasicBlock bb, Definition def, SourceVariable v) {
exists(int last | last = maxSsaRefRank(bb, v) |
ssaDefReachesRank(bb, def, last, v) and
liveAtExit(bb, v)
)
or
// The construction of SSA form ensures that each read of a variable is
// dominated by its definition. An SSA definition therefore reaches a
// control flow node if it is the _closest_ SSA definition that dominates
// the node. If two definitions dominate a node then one must dominate the
// other, so therefore the definition of _closest_ is given by the dominator
// tree. Thus, reaching definitions can be calculated in terms of dominance.
ssaDefReachesEndOfBlock(getImmediateBasicBlockDominator(bb), def, pragma[only_bind_into](v)) and
liveThrough(bb, pragma[only_bind_into](v))
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if `inp` is an input to the phi node `phi` along the edge originating in `bb`.
*/
pragma[nomagic]
predicate phiHasInputFromBlock(PhiNode phi, Definition inp, BasicBlock bb) {
exists(SourceVariable v, BasicBlock bbDef |
phi.definesAt(v, bbDef, _) and
getABasicBlockPredecessor(bbDef) = bb and
ssaDefReachesEndOfBlock(bb, inp, v)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the SSA definition of `v` at `def` reaches a read at index `i` in
* basic block `bb`, without crossing another SSA definition of `v`. The read
* is of kind `rk`.
*/
pragma[nomagic]
predicate ssaDefReachesRead(SourceVariable v, Definition def, BasicBlock bb, int i) {
ssaDefReachesReadWithinBlock(v, def, bb, i)
or
variableRead(bb, i, v, _) and
ssaDefReachesEndOfBlock(getABasicBlockPredecessor(bb), def, v) and
not ssaDefReachesReadWithinBlock(v, _, bb, i)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if `def` is accessed at index `i1` in basic block `bb1` (either a read
* or a write), `def` is read at index `i2` in basic block `bb2`, and there is a
* path between them without any read of `def`.
*/
pragma[nomagic]
predicate adjacentDefRead(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) {
exists(int rnk |
rnk = ssaDefRank(def, _, bb1, i1, _) and
rnk + 1 = ssaDefRank(def, _, bb1, i2, SsaRead()) and
variableRead(bb1, i2, _, _) and
bb2 = bb1
)
or
lastSsaRef(def, _, bb1, i1) and
defAdjacentRead(def, bb1, bb2, i2)
}
pragma[noinline]
private predicate adjacentDefRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2, SourceVariable v
) {
adjacentDefRead(def, bb1, i1, bb2, i2) and
v = def.getSourceVariable()
}
private predicate adjacentDefReachesRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
exists(SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) |
ssaRef(bb1, i1, v, SsaDef())
or
variableRead(bb1, i1, v, true)
)
or
exists(BasicBlock bb3, int i3 |
adjacentDefReachesRead(def, bb1, i1, bb3, i3) and
variableRead(bb3, i3, _, false) and
adjacentDefRead(def, bb3, i3, bb2, i2)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Same as `adjacentDefRead`, but ignores uncertain reads.
*/
pragma[nomagic]
predicate adjacentDefNoUncertainReads(Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2) {
adjacentDefReachesRead(def, bb1, i1, bb2, i2) and
variableRead(bb2, i2, _, true)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the node at index `i` in `bb` is a last reference to SSA definition
* `def`. The reference is last because it can reach another write `next`,
* without passing through another read or write.
*/
pragma[nomagic]
predicate lastRefRedef(Definition def, BasicBlock bb, int i, Definition next) {
exists(SourceVariable v |
// Next reference to `v` inside `bb` is a write
exists(int rnk, int j |
rnk = ssaDefRank(def, v, bb, i, _) and
next.definesAt(v, bb, j) and
rnk + 1 = ssaRefRank(bb, j, v, SsaDef())
)
or
// Can reach a write using one or more steps
lastSsaRef(def, v, bb, i) and
exists(BasicBlock bb2 |
varBlockReaches(def, bb, bb2) and
1 = ssaDefRank(next, v, bb2, _, SsaDef())
)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if `inp` is an immediately preceding definition of uncertain definition
* `def`. Since `def` is uncertain, the value from the preceding definition might
* still be valid.
*/
pragma[nomagic]
predicate uncertainWriteDefinitionInput(UncertainWriteDefinition def, Definition inp) {
lastRefRedef(inp, _, _, def)
}
private predicate adjacentDefReachesUncertainRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
adjacentDefReachesRead(def, bb1, i1, bb2, i2) and
variableRead(bb2, i2, _, false)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Same as `lastRefRedef`, but ignores uncertain reads.
*/
pragma[nomagic]
predicate lastRefRedefNoUncertainReads(Definition def, BasicBlock bb, int i, Definition next) {
lastRefRedef(def, bb, i, next) and
not variableRead(bb, i, def.getSourceVariable(), false)
or
exists(BasicBlock bb0, int i0 |
lastRefRedef(def, bb0, i0, next) and
adjacentDefReachesUncertainRead(def, bb, i, bb0, i0)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Holds if the node at index `i` in `bb` is a last reference to SSA
* definition `def`.
*
* That is, the node can reach the end of the enclosing callable, or another
* SSA definition for the underlying source variable, without passing through
* another read.
*/
pragma[nomagic]
predicate lastRef(Definition def, BasicBlock bb, int i) {
lastRefRedef(def, bb, i, _)
or
lastSsaRef(def, _, bb, i) and
(
// Can reach exit directly
bb instanceof ExitBasicBlock
or
// Can reach a block using one or more steps, where `def` is no longer live
exists(BasicBlock bb2 | varBlockReaches(def, bb, bb2) |
not defOccursInBlock(def, bb2, _) and
not ssaDefReachesEndOfBlock(bb2, def, _)
)
)
}
/**
* NB: If this predicate is exposed, it should be cached.
*
* Same as `lastRefRedef`, but ignores uncertain reads.
*/
pragma[nomagic]
predicate lastRefNoUncertainReads(Definition def, BasicBlock bb, int i) {
lastRef(def, bb, i) and
not variableRead(bb, i, def.getSourceVariable(), false)
or
exists(BasicBlock bb0, int i0 |
lastRef(def, bb0, i0) and
adjacentDefReachesUncertainRead(def, bb, i, bb0, i0)
)
}
/** A static single assignment (SSA) definition. */
class Definition extends TDefinition {
/** Gets the source variable underlying this SSA definition. */
SourceVariable getSourceVariable() { this.definesAt(result, _, _) }
/**
* Holds if this SSA definition defines `v` at index `i` in basic block `bb`.
* Phi nodes are considered to be at index `-1`, while normal variable writes
* are at the index of the control flow node they wrap.
*/
final predicate definesAt(SourceVariable v, BasicBlock bb, int i) {
this = TWriteDef(v, bb, i)
or
this = TPhiNode(v, bb) and i = -1
}
/** Gets the basic block to which this SSA definition belongs. */
final BasicBlock getBasicBlock() { this.definesAt(_, result, _) }
/** Gets a textual representation of this SSA definition. */
string toString() { none() }
}
/** An SSA definition that corresponds to a write. */
class WriteDefinition extends Definition, TWriteDef {
private SourceVariable v;
private BasicBlock bb;
private int i;
WriteDefinition() { this = TWriteDef(v, bb, i) }
override string toString() { result = "WriteDef" }
}
/** A phi node. */
class PhiNode extends Definition, TPhiNode {
override string toString() { result = "Phi" }
}
/**
* An SSA definition that represents an uncertain update of the underlying
* source variable.
*/
class UncertainWriteDefinition extends WriteDefinition {
UncertainWriteDefinition() {
exists(SourceVariable v, BasicBlock bb, int i |
this.definesAt(v, bb, i) and
variableWrite(bb, i, v, false)
)
}
}

View File

@@ -0,0 +1,34 @@
/** Provides the Ruby specific parameters for `SsaImplCommon.qll`. */
private import SsaImpl as SsaImpl
private import codeql.ruby.AST
private import codeql.ruby.ast.Parameter
private import codeql.ruby.ast.Variable
private import codeql.ruby.controlflow.BasicBlocks as BasicBlocks
private import codeql.ruby.controlflow.ControlFlowGraph
class BasicBlock = BasicBlocks::BasicBlock;
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result = bb.getImmediateDominator() }
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
class ExitBasicBlock = BasicBlocks::ExitBasicBlock;
class SourceVariable = LocalVariable;
predicate variableWrite(BasicBlock bb, int i, SourceVariable v, boolean certain) {
(
SsaImpl::uninitializedWrite(bb, i, v)
or
SsaImpl::capturedEntryWrite(bb, i, v)
or
SsaImpl::variableWriteActual(bb, i, v, _)
) and
certain = true
or
SsaImpl::capturedCallWrite(bb, i, v) and
certain = false
}
predicate variableRead = SsaImpl::variableRead/4;

View File

@@ -0,0 +1,41 @@
private import ruby
private import TaintTrackingPublic
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import FlowSummaryImpl as FlowSummaryImpl
/**
* Holds if `node` should be a sanitizer in all global taint flow configurations
* but not in local taint.
*/
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included
* in all global taint flow configurations.
*/
cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// operation involving `nodeFrom`
exists(CfgNodes::ExprNodes::OperationCfgNode op |
op = nodeTo.asExpr() and
op.getAnOperand() = nodeFrom.asExpr() and
not op.getExpr() instanceof AssignExpr
)
or
// string interpolation of `nodeFrom` into `nodeTo`
nodeFrom.asExpr() =
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
or
// element reference from nodeFrom
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ElementReferenceCfgNode).getReceiver()
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
}

View File

@@ -0,0 +1,31 @@
private import ruby
private import TaintTrackingPrivate
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import FlowSummaryImpl as FlowSummaryImpl
/**
* Holds if taint propagates from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
/**
* Holds if taint can flow from `e1` to `e2` in zero or more
* local (intra-procedural) steps.
*/
predicate localExprTaint(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
}
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
defaultAdditionalTaintStep(nodeFrom, nodeTo)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, false)
}

View File

@@ -0,0 +1,120 @@
/**
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
* exposed through the `Configuration` class. For some languages, this file
* exists in several identical copies, allowing queries to use multiple
* `Configuration` classes that depend on each other without introducing
* mutual recursion among those configurations.
*/
import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
* this abstract class.
*
* A taint-tracking configuration is a special data flow configuration
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
* necessarily preserve values but are still relevant from a taint tracking
* perspective. (For example, string concatenation, where one of the operands
* is tainted.)
*
* To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string. For example, write
*
* ```ql
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isSanitizer`.
* // Optionally override `isSanitizerIn`.
* // Optionally override `isSanitizerOut`.
* // Optionally override `isSanitizerGuard`.
* // Optionally override `isAdditionalTaintStep`.
* }
* ```
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but it is unsupported to depend on
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
* overridden predicates that define sources, sinks, or additional steps.
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant taint source.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
/**
* Holds if `sink` is a relevant taint sink.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/
// overridden to provide taint-tracking specific qldoc
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
super.hasFlow(source, sink)
}
}

View File

@@ -0,0 +1,6 @@
import codeql.ruby.dataflow.internal.TaintTrackingPublic as Public
module Private {
import codeql.ruby.DataFlow::DataFlow as DataFlow
import codeql.ruby.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -0,0 +1,43 @@
/** Provides classes for detecting generated code. */
private import ruby
private import codeql.ruby.ast.internal.TreeSitter
/** A source file that contains generated code. */
abstract class GeneratedCodeFile extends RubyFile { }
/** A file contining comments suggesting it contains generated code. */
class GeneratedCommentFile extends GeneratedCodeFile {
GeneratedCommentFile() { this = any(GeneratedCodeComment c).getLocation().getFile() }
}
/** A comment line that indicates generated code. */
abstract class GeneratedCodeComment extends Ruby::Comment { }
/**
* A generic comment line that suggests that the file is generated.
*/
class GenericGeneratedCodeComment extends GeneratedCodeComment {
GenericGeneratedCodeComment() {
exists(string line, string entity, string was, string automatically | line = getValue() |
entity = "file|class|art[ei]fact|module|script" and
was = "was|is|has been" and
automatically = "automatically |mechanically |auto[- ]?" and
line.regexpMatch("(?i).*\\bThis (" + entity + ") (" + was + ") (" + automatically +
")?generated\\b.*")
)
}
}
/** A comment warning against modifications. */
class DontModifyMarkerComment extends GeneratedCodeComment {
DontModifyMarkerComment() {
exists(string line | line = getValue() |
line.regexpMatch("(?i).*\\bGenerated by\\b.*\\bDo not edit\\b.*") or
line.regexpMatch("(?i).*\\bAny modifications to this file will be lost\\b.*")
)
}
}
/** Holds if `file` looks like it contains generated code. */
predicate isGeneratedCode(GeneratedCodeFile file) { any() }

View File

@@ -0,0 +1,259 @@
private import codeql.ruby.AST
private import codeql.ruby.Concepts
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.ast.internal.Module
private import ActionView
private class ActionControllerBaseAccess extends ConstantReadAccess {
ActionControllerBaseAccess() {
this.getName() = "Base" and
this.getScopeExpr().(ConstantAccess).getName() = "ActionController"
}
}
// ApplicationController extends ActionController::Base, but we
// treat it separately in case the ApplicationController definition
// is not in the database
private class ApplicationControllerAccess extends ConstantReadAccess {
ApplicationControllerAccess() { this.getName() = "ApplicationController" }
}
/**
* A `ClassDeclaration` for a class that extends `ActionController::Base`.
* For example,
*
* ```rb
* class FooController < ActionController::Base
* def delete_handler
* uid = params[:id]
* User.delete_by("id = ?", uid)
* end
* end
* ```
*/
class ActionControllerControllerClass extends ClassDeclaration {
ActionControllerControllerClass() {
// class FooController < ActionController::Base
this.getSuperclassExpr() instanceof ActionControllerBaseAccess
or
// class FooController < ApplicationController
this.getSuperclassExpr() instanceof ApplicationControllerAccess
or
// class BarController < FooController
exists(ActionControllerControllerClass other |
other.getModule() = resolveScopeExpr(this.getSuperclassExpr())
)
}
/**
* Gets a `ActionControllerActionMethod` defined in this class.
*/
ActionControllerActionMethod getAnAction() { result = this.getAMethod() }
}
/**
* An instance method defined within an `ActionController` controller class.
* This may be the target of a route handler, if such a route is defined.
*/
class ActionControllerActionMethod extends Method, HTTP::Server::RequestHandler::Range {
private ActionControllerControllerClass controllerClass;
ActionControllerActionMethod() { this = controllerClass.getAMethod() }
/**
* Establishes a mapping between a method within the file
* `<sourcePrefix>app/controllers/<subpath>_controller.rb` and the
* corresponding template file at
* `<sourcePrefix>app/views/<subpath>/<method_name>.html.erb`.
*/
ErbFile getDefaultTemplateFile() {
controllerTemplateFile(this.getControllerClass(), result) and
result.getBaseName() = this.getName() + ".html.erb"
}
// params come from `params` method rather than a method parameter
override Parameter getARoutedParameter() { none() }
override string getFramework() { result = "ActionController" }
/** Gets a call to render from within this method. */
RenderCall getARenderCall() { result.getParent+() = this }
// TODO: model the implicit render call when a path through the method does
// not end at an explicit render or redirect
/** Gets the controller class containing this method. */
ActionControllerControllerClass getControllerClass() { result = controllerClass }
}
// A method call with a `self` receiver from within a controller class
private class ActionControllerContextCall extends MethodCall {
private ActionControllerControllerClass controllerClass;
ActionControllerContextCall() {
this.getReceiver() instanceof Self and
this.getEnclosingModule() = controllerClass
}
ActionControllerControllerClass getControllerClass() { result = controllerClass }
}
/**
* A call to the `params` method to fetch the request parameters.
*/
abstract class ParamsCall extends MethodCall {
ParamsCall() { this.getMethodName() = "params" }
}
/**
* A `RemoteFlowSource::Range` to represent accessing the
* ActionController parameters available via the `params` method.
*/
class ParamsSource extends RemoteFlowSource::Range {
ParamsCall call;
ParamsSource() { this.asExpr().getExpr() = call }
override string getSourceType() { result = "ActionController::Metal#params" }
}
// A call to `params` from within a controller.
private class ActionControllerParamsCall extends ActionControllerContextCall, ParamsCall { }
// A call to `render` from within a controller.
private class ActionControllerRenderCall extends ActionControllerContextCall, RenderCall { }
// A call to `render_to` from within a controller.
private class ActionControllerRenderToCall extends ActionControllerContextCall, RenderToCall { }
// A call to `html_safe` from within a controller.
private class ActionControllerHtmlSafeCall extends HtmlSafeCall {
ActionControllerHtmlSafeCall() {
this.getEnclosingModule() instanceof ActionControllerControllerClass
}
}
// A call to `html_escape` from within a controller.
private class ActionControllerHtmlEscapeCall extends HtmlEscapeCall {
ActionControllerHtmlEscapeCall() {
this.getEnclosingModule() instanceof ActionControllerControllerClass
}
}
/**
* A call to the `redirect_to` method, used in an action to redirect to a
* specific URL/path or to a different action in this controller.
*/
class RedirectToCall extends ActionControllerContextCall {
RedirectToCall() { this.getMethodName() = "redirect_to" }
/** Gets the `Expr` representing the URL to redirect to, if any */
Expr getRedirectUrl() { result = this.getArgument(0) }
/** Gets the `ActionControllerActionMethod` to redirect to, if any */
ActionControllerActionMethod getRedirectActionMethod() {
exists(string methodName |
methodName = this.getKeywordArgument("action").(StringlikeLiteral).getValueText() and
methodName = result.getName() and
result.getEnclosingModule() = this.getControllerClass()
)
}
}
/**
* A call to the `redirect_to` method, as an `HttpRedirectResponse`.
*/
class ActionControllerRedirectResponse extends HTTP::Server::HttpRedirectResponse::Range {
RedirectToCall redirectToCall;
ActionControllerRedirectResponse() { this.asExpr().getExpr() = redirectToCall }
override DataFlow::Node getBody() { none() }
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() { none() }
override DataFlow::Node getRedirectLocation() {
result.asExpr().getExpr() = redirectToCall.getRedirectUrl()
}
}
/**
* A method in an `ActionController` class that is accessible from within a
* Rails view as a helper method. For instance, in:
*
* ```rb
* class FooController < ActionController::Base
* helper_method :logged_in?
* def logged_in?
* @current_user != nil
* end
* end
* ```
*
* the `logged_in?` method is a helper method.
* See also https://api.rubyonrails.org/classes/AbstractController/Helpers/ClassMethods.html#method-i-helper_method
*/
class ActionControllerHelperMethod extends Method {
private ActionControllerControllerClass controllerClass;
ActionControllerHelperMethod() {
this.getEnclosingModule() = controllerClass and
exists(MethodCall helperMethodMarker |
helperMethodMarker.getMethodName() = "helper_method" and
helperMethodMarker.getAnArgument().(StringlikeLiteral).getValueText() = this.getName() and
helperMethodMarker.getEnclosingModule() = controllerClass
)
}
/** Gets the class containing this helper method. */
ActionControllerControllerClass getControllerClass() { result = controllerClass }
}
/**
* Gets an `ActionControllerControllerClass` associated with the given `ErbFile`
* according to Rails path conventions.
* For instance, a template file at `app/views/foo/bar/baz.html.erb` will be
* mapped to a controller class in `app/controllers/foo/bar/baz_controller.rb`,
* if such a controller class exists.
*/
ActionControllerControllerClass getAssociatedControllerClass(ErbFile f) {
// There is a direct mapping from template file to controller class
controllerTemplateFile(result, f)
or
// The template `f` is a partial, and it is rendered from within another
// template file, `fp`. In this case, `f` inherits the associated
// controller classes from `fp`.
f.isPartial() and
exists(RenderCall r, ErbFile fp |
r.getLocation().getFile() = fp and
r.getTemplateFile() = f and
result = getAssociatedControllerClass(fp)
)
}
// TODO: improve layout support, e.g. for `layout` method
// https://guides.rubyonrails.org/layouts_and_rendering.html
/**
* Holds if `templatesFile` is a viable file "belonging" to the given
* `ActionControllerControllerClass`, according to Rails conventions.
*
* This handles mappings between controllers in `app/controllers/`, and
* templates in `app/views/` and `app/views/layouts/`.
*/
predicate controllerTemplateFile(ActionControllerControllerClass cls, ErbFile templateFile) {
exists(string templatesPath, string sourcePrefix, string subPath, string controllerPath |
controllerPath = cls.getLocation().getFile().getRelativePath() and
templatesPath = templateFile.getParentContainer().getRelativePath() and
// `sourcePrefix` is either a prefix path ending in a slash, or empty if
// the rails app is at the source root
sourcePrefix = [controllerPath.regexpCapture("^(.*/)app/controllers/(?:.*?)/(?:[^/]*)$", 1), ""] and
controllerPath = sourcePrefix + "app/controllers/" + subPath + "_controller.rb" and
(
templatesPath = sourcePrefix + "app/views/" + subPath or
templateFile.getRelativePath().matches(sourcePrefix + "app/views/layouts/" + subPath + "%")
)
)
}

View File

@@ -0,0 +1,138 @@
private import codeql.ruby.AST
private import codeql.ruby.Concepts
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.ast.internal.Module
private import ActionController
predicate inActionViewContext(AstNode n) {
// Within a template
n.getLocation().getFile() instanceof ErbFile
}
/**
* A method call on a string to mark it as HTML safe for Rails.
* Strings marked as such will not be automatically escaped when inserted into
* HTML.
*/
abstract class HtmlSafeCall extends MethodCall {
HtmlSafeCall() { this.getMethodName() = "html_safe" }
}
// A call to `html_safe` from within a template.
private class ActionViewHtmlSafeCall extends HtmlSafeCall {
ActionViewHtmlSafeCall() { inActionViewContext(this) }
}
/**
* A call to a method named "html_escape", "html_escape_once", or "h".
*/
abstract class HtmlEscapeCall extends MethodCall {
// "h" is aliased to "html_escape" in ActiveSupport
HtmlEscapeCall() { this.getMethodName() = ["html_escape", "html_escape_once", "h"] }
}
class RailsHtmlEscaping extends Escaping::Range, DataFlow::CallNode {
RailsHtmlEscaping() { this.asExpr().getExpr() instanceof HtmlEscapeCall }
override DataFlow::Node getAnInput() { result = this.getArgument(0) }
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getHtmlKind() }
}
// A call to `html_escape` from within a template.
private class ActionViewHtmlEscapeCall extends HtmlEscapeCall {
ActionViewHtmlEscapeCall() { inActionViewContext(this) }
}
// A call in a context where some commonly used `ActionView` methods are available.
private class ActionViewContextCall extends MethodCall {
ActionViewContextCall() {
this.getReceiver() instanceof Self and
inActionViewContext(this)
}
predicate isInErbFile() { this.getLocation().getFile() instanceof ErbFile }
}
/** A call to the `raw` method to output a value without HTML escaping. */
class RawCall extends ActionViewContextCall {
RawCall() { this.getMethodName() = "raw" }
}
// A call to the `params` method within the context of a template.
private class ActionViewParamsCall extends ActionViewContextCall, ParamsCall { }
/**
* A call to a `render` method that will populate the response body with the
* rendered content.
*/
abstract class RenderCall extends MethodCall {
RenderCall() { this.getMethodName() = "render" }
private Expr getTemplatePathArgument() {
// TODO: support other ways of specifying paths (e.g. `file`)
result = [this.getKeywordArgument(["partial", "template", "action"]), this.getArgument(0)]
}
private string getTemplatePathValue() { result = this.getTemplatePathArgument().getValueText() }
// everything up to and including the final slash, but ignoring any leading slash
private string getSubPath() {
result = this.getTemplatePathValue().regexpCapture("^/?(.*/)?(?:[^/]*?)$", 1)
}
// everything after the final slash, or the whole string if there is no slash
private string getBaseName() {
result = this.getTemplatePathValue().regexpCapture("^/?(?:.*/)?([^/]*?)$", 1)
}
/**
* Gets the template file to be rendered by this call, if any.
*/
ErbFile getTemplateFile() {
result.getTemplateName() = this.getBaseName() and
result.getRelativePath().matches("%app/views/" + this.getSubPath() + "%")
}
/**
* Get the local variables passed as context to the renderer
*/
HashLiteral getLocals() { result = this.getKeywordArgument("locals") }
// TODO: implicit renders in controller actions
}
// A call to the `render` method within the context of a template.
private class ActionViewRenderCall extends RenderCall, ActionViewContextCall { }
/**
* A render call that does not automatically set the HTTP response body.
*/
abstract class RenderToCall extends MethodCall {
RenderToCall() { this.getMethodName() = ["render_to_body", "render_to_string"] }
}
// A call to `render_to` from within a template.
private class ActionViewRenderToCall extends ActionViewContextCall, RenderToCall { }
/**
* A call to the ActionView `link_to` helper method.
*
* This generates an HTML anchor tag. The method is not designed to expect
* user-input, so provided paths are not automatically HTML escaped.
*/
class LinkToCall extends ActionViewContextCall {
LinkToCall() { this.getMethodName() = "link_to" }
Expr getPathArgument() {
// When `link_to` is called with a block, it uses the first argument as the
// path, and otherwise the second argument.
exists(this.getBlock()) and result = this.getArgument(0)
or
not exists(this.getBlock()) and result = this.getArgument(1)
}
}
// TODO: model flow in/out of template files properly,

View File

@@ -0,0 +1,319 @@
private import codeql.ruby.AST
private import codeql.ruby.Concepts
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.internal.DataFlowDispatch
private import codeql.ruby.ast.internal.Module
private import codeql.ruby.ApiGraphs
private import codeql.ruby.frameworks.StandardLibrary
private class ActiveRecordBaseAccess extends ConstantReadAccess {
ActiveRecordBaseAccess() {
this.getName() = "Base" and
this.getScopeExpr().(ConstantAccess).getName() = "ActiveRecord"
}
}
// ApplicationRecord extends ActiveRecord::Base, but we
// treat it separately in case the ApplicationRecord definition
// is not in the database
private class ApplicationRecordAccess extends ConstantReadAccess {
ApplicationRecordAccess() { this.getName() = "ApplicationRecord" }
}
/// See https://api.rubyonrails.org/classes/ActiveRecord/Persistence.html
private string activeRecordPersistenceInstanceMethodName() {
result =
[
"becomes", "becomes!", "decrement", "decrement!", "delete", "delete!", "destroy", "destroy!",
"destroyed?", "increment", "increment!", "new_record?", "persisted?",
"previously_new_record?", "reload", "save", "save!", "toggle", "toggle!", "touch", "update",
"update!", "update_attribute", "update_column", "update_columns"
]
}
// Methods with these names are defined for all active record model instances,
// so they are unlikely to refer to a database field.
private predicate isBuiltInMethodForActiveRecordModelInstance(string methodName) {
methodName = activeRecordPersistenceInstanceMethodName() or
methodName = basicObjectInstanceMethodName() or
methodName = objectInstanceMethodName()
}
/**
* A `ClassDeclaration` for a class that extends `ActiveRecord::Base`. For example,
*
* ```rb
* class UserGroup < ActiveRecord::Base
* has_many :users
* end
* ```
*/
class ActiveRecordModelClass extends ClassDeclaration {
ActiveRecordModelClass() {
// class Foo < ActiveRecord::Base
this.getSuperclassExpr() instanceof ActiveRecordBaseAccess
or
// class Foo < ApplicationRecord
this.getSuperclassExpr() instanceof ApplicationRecordAccess
or
// class Bar < Foo
exists(ActiveRecordModelClass other |
other.getModule() = resolveScopeExpr(this.getSuperclassExpr())
)
}
// Gets the class declaration for this class and all of its super classes
private ModuleBase getAllClassDeclarations() {
result = this.getModule().getSuperClass*().getADeclaration()
}
/**
* Gets methods defined in this class that may access a field from the database.
*/
Method getAPotentialFieldAccessMethod() {
// It's a method on this class or one of its super classes
result = this.getAllClassDeclarations().getAMethod() and
// There is a value that can be returned by this method which may include field data
exists(DataFlow::Node returned, ActiveRecordInstanceMethodCall cNode, MethodCall c |
exprNodeReturnedFrom(returned, result) and
cNode.flowsTo(returned) and
c = cNode.asExpr().getExpr()
|
// The referenced method is not built-in, and...
not isBuiltInMethodForActiveRecordModelInstance(c.getMethodName()) and
(
// ...The receiver does not have a matching method definition, or...
not exists(
cNode.getInstance().getClass().getAllClassDeclarations().getMethod(c.getMethodName())
)
or
// ...the called method can access a field
c.getATarget() = cNode.getInstance().getClass().getAPotentialFieldAccessMethod()
)
)
}
}
/** A class method call whose receiver is an `ActiveRecordModelClass`. */
class ActiveRecordModelClassMethodCall extends MethodCall {
private ActiveRecordModelClass recvCls;
ActiveRecordModelClassMethodCall() {
// e.g. Foo.where(...)
recvCls.getModule() = resolveScopeExpr(this.getReceiver())
or
// e.g. Foo.joins(:bars).where(...)
recvCls = this.getReceiver().(ActiveRecordModelClassMethodCall).getReceiverClass()
or
// e.g. self.where(...) within an ActiveRecordModelClass
this.getReceiver() instanceof Self and
this.getEnclosingModule() = recvCls
}
/** The `ActiveRecordModelClass` of the receiver of this method. */
ActiveRecordModelClass getReceiverClass() { result = recvCls }
}
private Expr sqlFragmentArgument(MethodCall call) {
exists(string methodName |
methodName = call.getMethodName() and
(
methodName =
[
"delete_all", "delete_by", "destroy_all", "destroy_by", "exists?", "find_by", "find_by!",
"find_or_create_by", "find_or_create_by!", "find_or_initialize_by", "find_by_sql", "from",
"group", "having", "joins", "lock", "not", "order", "pluck", "where", "rewhere", "select",
"reselect", "update_all"
] and
result = call.getArgument(0)
or
methodName = "calculate" and result = call.getArgument(1)
or
methodName in ["average", "count", "maximum", "minimum", "sum"] and
result = call.getArgument(0)
or
// This format was supported until Rails 2.3.8
methodName = ["all", "find", "first", "last"] and
result = call.getKeywordArgument("conditions")
or
methodName = "reload" and
result = call.getKeywordArgument("lock")
)
)
}
// An expression that, if tainted by unsanitized input, should not be used as
// part of an argument to an SQL executing method
private predicate unsafeSqlExpr(Expr sqlFragmentExpr) {
// Literals containing an interpolated value
exists(StringInterpolationComponent interpolated |
interpolated = sqlFragmentExpr.(StringlikeLiteral).getComponent(_)
)
or
// String concatenations
sqlFragmentExpr instanceof AddExpr
or
// Variable reads
sqlFragmentExpr instanceof VariableReadAccess
or
// Method call
sqlFragmentExpr instanceof MethodCall
}
/**
* A method call that may result in executing unintended user-controlled SQL
* queries if the `getSqlFragmentSinkArgument()` expression is tainted by
* unsanitized user-controlled input. For example, supposing that `User` is an
* `ActiveRecord` model class, then
*
* ```rb
* User.where("name = '#{user_name}'")
* ```
*
* may be unsafe if `user_name` is from unsanitized user input, as a value such
* as `"') OR 1=1 --"` could result in the application looking up all users
* rather than just one with a matching name.
*/
class PotentiallyUnsafeSqlExecutingMethodCall extends ActiveRecordModelClassMethodCall {
// The SQL fragment argument itself
private Expr sqlFragmentExpr;
PotentiallyUnsafeSqlExecutingMethodCall() {
exists(Expr arg |
arg = sqlFragmentArgument(this) and
unsafeSqlExpr(sqlFragmentExpr) and
(
sqlFragmentExpr = arg
or
sqlFragmentExpr = arg.(ArrayLiteral).getElement(0)
) and
// Check that method has not been overridden
not exists(SingletonMethod m |
m.getName() = this.getMethodName() and
m.getOuterScope() = this.getReceiverClass()
)
)
}
Expr getSqlFragmentSinkArgument() { result = sqlFragmentExpr }
}
/**
* An `SqlExecution::Range` for an argument to a
* `PotentiallyUnsafeSqlExecutingMethodCall` that may be vulnerable to being
* controlled by user input.
*/
class ActiveRecordSqlExecutionRange extends SqlExecution::Range {
ActiveRecordSqlExecutionRange() {
exists(PotentiallyUnsafeSqlExecutingMethodCall mc |
this.asExpr().getNode() = mc.getSqlFragmentSinkArgument()
)
}
override DataFlow::Node getSql() { result = this }
}
// TODO: model `ActiveRecord` sanitizers
// https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html
/**
* A node that may evaluate to one or more `ActiveRecordModelClass` instances.
*/
abstract class ActiveRecordModelInstantiation extends OrmInstantiation::Range,
DataFlow::LocalSourceNode {
abstract ActiveRecordModelClass getClass();
bindingset[methodName]
override predicate methodCallMayAccessField(string methodName) {
// The method is not a built-in, and...
not isBuiltInMethodForActiveRecordModelInstance(methodName) and
(
// ...There is no matching method definition in the class, or...
not exists(this.getClass().getMethod(methodName))
or
// ...the called method can access a field.
exists(Method m | m = this.getClass().getAPotentialFieldAccessMethod() |
m.getName() = methodName
)
)
}
}
// Names of class methods on ActiveRecord models that may return one or more
// instances of that model. This also includes the `initialize` method.
// See https://api.rubyonrails.org/classes/ActiveRecord/FinderMethods.html
private string finderMethodName() {
exists(string baseName |
baseName =
[
"fifth", "find", "find_by", "find_or_initialize_by", "find_or_create_by", "first",
"forty_two", "fourth", "last", "second", "second_to_last", "take", "third", "third_to_last"
] and
result = baseName + ["", "!"]
)
or
result = "new"
}
// Gets the "final" receiver in a chain of method calls.
// For example, in `Foo.bar`, this would give the `Foo` access, and in
// `foo.bar.baz("arg")` it would give the `foo` variable access
private Expr getUltimateReceiver(MethodCall call) {
exists(Expr recv |
recv = call.getReceiver() and
(
result = getUltimateReceiver(recv)
or
not recv instanceof MethodCall and result = recv
)
)
}
// A call to `find`, `where`, etc. that may return active record model object(s)
private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode {
private MethodCall call;
private ActiveRecordModelClass cls;
private Expr recv;
ActiveRecordModelFinderCall() {
call = this.asExpr().getExpr() and
recv = getUltimateReceiver(call) and
resolveConstant(recv) = cls.getQualifiedName() and
call.getMethodName() = finderMethodName()
}
final override ActiveRecordModelClass getClass() { result = cls }
}
// A `self` reference that may resolve to an active record model object
private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation {
private ActiveRecordModelClass cls;
ActiveRecordModelClassSelfReference() {
exists(Self s |
s.getEnclosingModule() = cls and
s.getEnclosingMethod() = cls.getAMethod() and
s = this.asExpr().getExpr()
)
}
final override ActiveRecordModelClass getClass() { result = cls }
}
// A (locally tracked) active record model object
private class ActiveRecordInstance extends DataFlow::Node {
private ActiveRecordModelInstantiation instantiation;
ActiveRecordInstance() { this = instantiation or instantiation.flowsTo(this) }
ActiveRecordModelClass getClass() { result = instantiation.getClass() }
}
// A call whose receiver may be an active record model object
private class ActiveRecordInstanceMethodCall extends DataFlow::CallNode {
private ActiveRecordInstance instance;
ActiveRecordInstanceMethodCall() { this.getReceiver() = instance }
ActiveRecordInstance getInstance() { result = instance }
}

View File

@@ -0,0 +1,299 @@
/**
* Provides classes for working with file system libraries.
*/
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
private import codeql.ruby.DataFlow
private import codeql.ruby.frameworks.StandardLibrary
private DataFlow::Node ioInstanceInstantiation() {
result = API::getTopLevelMember("IO").getAnInstantiation() or
result = API::getTopLevelMember("IO").getAMethodCall(["for_fd", "open", "try_convert"])
}
private DataFlow::Node ioInstance() {
result = ioInstanceInstantiation()
or
exists(DataFlow::Node inst |
inst = ioInstance() and
inst.(DataFlow::LocalSourceNode).flowsTo(result)
)
}
// Match some simple cases where a path argument specifies a shell command to
// be executed. For example, the `"|date"` argument in `IO.read("|date")`, which
// will execute a shell command and read its output rather than reading from the
// filesystem.
private predicate pathArgSpawnsSubprocess(Expr arg) {
arg.(StringlikeLiteral).getValueText().charAt(0) = "|"
}
private DataFlow::Node fileInstanceInstantiation() {
result = API::getTopLevelMember("File").getAnInstantiation()
or
result = API::getTopLevelMember("File").getAMethodCall("open")
or
// Calls to `Kernel.open` can yield `File` instances
result.(KernelMethodCall).getMethodName() = "open" and
// Assume that calls that don't invoke shell commands will instead open
// a file.
not pathArgSpawnsSubprocess(result.(KernelMethodCall).getArgument(0).asExpr().getExpr())
}
private DataFlow::Node fileInstance() {
result = fileInstanceInstantiation()
or
exists(DataFlow::Node inst |
inst = fileInstance() and
inst.(DataFlow::LocalSourceNode).flowsTo(result)
)
}
private string ioFileReaderClassMethodName() {
result = ["binread", "foreach", "read", "readlines", "try_convert"]
}
private string ioFileReaderInstanceMethodName() {
result =
[
"getbyte", "getc", "gets", "pread", "read", "read_nonblock", "readbyte", "readchar",
"readline", "readlines", "readpartial", "sysread"
]
}
private string ioFileReaderMethodName(boolean classMethodCall) {
classMethodCall = true and result = ioFileReaderClassMethodName()
or
classMethodCall = false and result = ioFileReaderInstanceMethodName()
}
/**
* Classes and predicates for modeling the core `IO` module.
*/
module IO {
/**
* An instance of the `IO` class, for example in
*
* ```rb
* rand = IO.new(IO.sysopen("/dev/random", "r"), "r")
* rand_data = rand.read(32)
* ```
*
* there are 3 `IOInstance`s - the call to `IO.new`, the assignment
* `rand = ...`, and the read access to `rand` on the second line.
*/
class IOInstance extends DataFlow::Node {
IOInstance() {
this = ioInstance() or
this = fileInstance()
}
}
// "Direct" `IO` instances, i.e. cases where there is no more specific
// subtype such as `File`
private class IOInstanceStrict extends IOInstance {
IOInstanceStrict() { this = ioInstance() }
}
/**
* A `DataFlow::CallNode` that reads data using the `IO` class. For example,
* the `IO.read call in:
*
* ```rb
* IO.read("|date")
* ```
*
* returns the output of the `date` shell command, invoked as a subprocess.
*
* This class includes reads both from shell commands and reads from the
* filesystem. For working with filesystem accesses specifically, see
* `IOFileReader` or the `FileSystemReadAccess` concept.
*/
class IOReader extends DataFlow::CallNode {
private boolean classMethodCall;
private string api;
IOReader() {
// Class methods
api = ["File", "IO"] and
classMethodCall = true and
this = API::getTopLevelMember(api).getAMethodCall(ioFileReaderMethodName(classMethodCall))
or
// IO instance methods
classMethodCall = false and
api = "IO" and
exists(IOInstanceStrict ii |
this.getReceiver() = ii and
this.asExpr().getExpr().(MethodCall).getMethodName() =
ioFileReaderMethodName(classMethodCall)
)
or
// File instance methods
classMethodCall = false and
api = "File" and
exists(File::FileInstance fi |
this.getReceiver() = fi and
this.asExpr().getExpr().(MethodCall).getMethodName() =
ioFileReaderMethodName(classMethodCall)
)
// TODO: enumeration style methods such as `each`, `foreach`, etc.
}
/**
* Returns the most specific core class used for this read, `IO` or `File`
*/
string getAPI() { result = api }
predicate isClassMethodCall() { classMethodCall = true }
}
/**
* A `DataFlow::CallNode` that reads data from the filesystem using the `IO`
* class. For example, the `IO.read call in:
*
* ```rb
* IO.read("foo.txt")
* ```
*
* reads the file `foo.txt` and returns its contents as a string.
*/
class IOFileReader extends IOReader, FileSystemReadAccess::Range {
IOFileReader() {
this.getAPI() = "File"
or
this.isClassMethodCall() and
// Assume that calls that don't invoke shell commands will instead
// read from a file.
not pathArgSpawnsSubprocess(this.getArgument(0).asExpr().getExpr())
}
// TODO: can we infer a path argument for instance method calls?
// e.g. by tracing back to the instantiation of that instance
override DataFlow::Node getAPathArgument() {
result = this.getArgument(0) and this.isClassMethodCall()
}
// This class represents calls that return data
override DataFlow::Node getADataNode() { result = this }
}
}
/**
* Classes and predicates for modeling the core `File` module.
*
* Because `File` is a subclass of `IO`, all `FileInstance`s and
* `FileModuleReader`s are also `IOInstance`s and `IOModuleReader`s
* respectively.
*/
module File {
/**
* An instance of the `File` class, for example in
*
* ```rb
* f = File.new("foo.txt")
* puts f.read()
* ```
*
* there are 3 `FileInstance`s - the call to `File.new`, the assignment
* `f = ...`, and the read access to `f` on the second line.
*/
class FileInstance extends IO::IOInstance {
FileInstance() { this = fileInstance() }
}
/**
* A read using the `File` module, e.g. the `f.read` call in
*
* ```rb
* f = File.new("foo.txt")
* puts f.read()
* ```
*/
class FileModuleReader extends IO::IOFileReader {
FileModuleReader() { this.getAPI() = "File" }
}
/**
* A call to a `File` method that may return one or more filenames.
*/
class FileModuleFilenameSource extends FileNameSource, DataFlow::CallNode {
FileModuleFilenameSource() {
// Class methods
this =
API::getTopLevelMember("File")
.getAMethodCall([
"absolute_path", "basename", "expand_path", "join", "path", "readlink",
"realdirpath", "realpath"
])
or
// Instance methods
exists(FileInstance fi |
this.getReceiver() = fi and
this.asExpr().getExpr().(MethodCall).getMethodName() = ["path", "to_path"]
)
}
}
private class FileModulePermissionModification extends FileSystemPermissionModification::Range,
DataFlow::CallNode {
private DataFlow::Node permissionArg;
FileModulePermissionModification() {
exists(string methodName | this = API::getTopLevelMember("File").getAMethodCall(methodName) |
methodName in ["chmod", "lchmod"] and permissionArg = this.getArgument(0)
or
methodName = "mkfifo" and permissionArg = this.getArgument(1)
or
methodName in ["new", "open"] and permissionArg = this.getArgument(2)
// TODO: defaults for optional args? This may depend on the umask
)
}
override DataFlow::Node getAPermissionNode() { result = permissionArg }
}
}
/**
* Classes and predicates for modeling the `FileUtils` module from the standard
* library.
*/
module FileUtils {
/**
* A call to a FileUtils method that may return one or more filenames.
*/
class FileUtilsFilenameSource extends FileNameSource {
FileUtilsFilenameSource() {
// Note that many methods in FileUtils accept a `noop` option that will
// perform a dry run of the command. This means that, for instance, `rm`
// and similar methods may not actually delete/unlink a file when called.
this =
API::getTopLevelMember("FileUtils")
.getAMethodCall([
"chmod", "chmod_R", "chown", "chown_R", "getwd", "makedirs", "mkdir", "mkdir_p",
"mkpath", "remove", "remove_dir", "remove_entry", "rm", "rm_f", "rm_r", "rm_rf",
"rmdir", "rmtree", "safe_unlink", "touch"
])
}
}
private class FileUtilsPermissionModification extends FileSystemPermissionModification::Range,
DataFlow::CallNode {
private DataFlow::Node permissionArg;
FileUtilsPermissionModification() {
exists(string methodName |
this = API::getTopLevelMember("FileUtils").getAMethodCall(methodName)
|
methodName in ["chmod", "chmod_R"] and permissionArg = this.getArgument(0)
or
methodName in ["install", "makedirs", "mkdir", "mkdir_p", "mkpath"] and
permissionArg = this.getKeywordArgument("mode")
// TODO: defaults for optional args? This may depend on the umask
)
}
override DataFlow::Node getAPermissionNode() { result = permissionArg }
}
}

View File

@@ -0,0 +1,12 @@
/**
* Helper file that imports all HTTP clients.
*/
private import codeql.ruby.frameworks.http_clients.NetHttp
private import codeql.ruby.frameworks.http_clients.Excon
private import codeql.ruby.frameworks.http_clients.Faraday
private import codeql.ruby.frameworks.http_clients.RestClient
private import codeql.ruby.frameworks.http_clients.Httparty
private import codeql.ruby.frameworks.http_clients.HttpClient
private import codeql.ruby.frameworks.http_clients.OpenURI
private import codeql.ruby.frameworks.http_clients.Typhoeus

View File

@@ -0,0 +1,337 @@
private import codeql.ruby.AST
private import codeql.ruby.Concepts
private import codeql.ruby.DataFlow
private import codeql.ruby.ApiGraphs
/**
* The `Kernel` module is included by the `Object` class, so its methods are available
* in every Ruby object. In addition, its module methods can be called by
* providing a specific receiver as in `Kernel.exit`.
*/
class KernelMethodCall extends DataFlow::CallNode {
private MethodCall methodCall;
KernelMethodCall() {
methodCall = this.asExpr().getExpr() and
(
this = API::getTopLevelMember("Kernel").getAMethodCall(_)
or
methodCall instanceof UnknownMethodCall and
(
this.getReceiver().asExpr().getExpr() instanceof Self and
isPrivateKernelMethod(methodCall.getMethodName())
or
isPublicKernelMethod(methodCall.getMethodName())
)
)
}
string getMethodName() { result = methodCall.getMethodName() }
int getNumberOfArguments() { result = methodCall.getNumberOfArguments() }
}
/**
* Public methods in the `Kernel` module. These can be invoked on any object via the usual dot syntax.
* ```ruby
* arr = []
* arr.send("push", 5) # => [5]
* ```
*/
private predicate isPublicKernelMethod(string method) {
method in ["class", "clone", "frozen?", "tap", "then", "yield_self", "send"]
}
/**
* Private methods in the `Kernel` module.
* These can be be invoked on `self`, on `Kernel`, or using a low-level primitive like `send` or `instance_eval`.
* ```ruby
* puts "hello world"
* Kernel.puts "hello world"
* 5.instance_eval { puts "hello world" }
* 5.send("puts", "hello world")
* ```
*/
private predicate isPrivateKernelMethod(string method) {
method in [
"Array", "Complex", "Float", "Hash", "Integer", "Rational", "String", "__callee__", "__dir__",
"__method__", "`", "abort", "at_exit", "autoload", "autoload?", "binding", "block_given?",
"callcc", "caller", "caller_locations", "catch", "chomp", "chop", "eval", "exec", "exit",
"exit!", "fail", "fork", "format", "gets", "global_variables", "gsub", "iterator?", "lambda",
"load", "local_variables", "loop", "open", "p", "pp", "print", "printf", "proc", "putc",
"puts", "raise", "rand", "readline", "readlines", "require", "require_relative", "select",
"set_trace_func", "sleep", "spawn", "sprintf", "srand", "sub", "syscall", "system", "test",
"throw", "trace_var", "trap", "untrace_var", "warn"
]
}
string basicObjectInstanceMethodName() {
result in [
"equal?", "instance_eval", "instance_exec", "method_missing", "singleton_method_added",
"singleton_method_removed", "singleton_method_undefined"
]
}
/**
* Instance methods on `BasicObject`, which are available to all classes.
*/
class BasicObjectInstanceMethodCall extends UnknownMethodCall {
BasicObjectInstanceMethodCall() { this.getMethodName() = basicObjectInstanceMethodName() }
}
string objectInstanceMethodName() {
result in [
"!~", "<=>", "===", "=~", "callable_methods", "define_singleton_method", "display",
"do_until", "do_while", "dup", "enum_for", "eql?", "extend", "f", "freeze", "h", "hash",
"inspect", "instance_of?", "instance_variable_defined?", "instance_variable_get",
"instance_variable_set", "instance_variables", "is_a?", "itself", "kind_of?",
"matching_methods", "method", "method_missing", "methods", "nil?", "object_id",
"private_methods", "protected_methods", "public_method", "public_methods", "public_send",
"remove_instance_variable", "respond_to?", "respond_to_missing?", "send",
"shortest_abbreviation", "singleton_class", "singleton_method", "singleton_methods", "taint",
"tainted?", "to_enum", "to_s", "trust", "untaint", "untrust", "untrusted?"
]
}
/**
* Instance methods on `Object`, which are available to all classes except `BasicObject`.
*/
class ObjectInstanceMethodCall extends UnknownMethodCall {
ObjectInstanceMethodCall() { this.getMethodName() = objectInstanceMethodName() }
}
/**
* Method calls which have no known target.
* These will typically be calls to methods inherited from a superclass.
*/
class UnknownMethodCall extends MethodCall {
UnknownMethodCall() { not exists(this.(Call).getATarget()) }
}
/**
* A system command executed via subshell literal syntax.
* E.g.
* ```ruby
* `cat foo.txt`
* %x(cat foo.txt)
* %x[cat foo.txt]
* %x{cat foo.txt}
* %x/cat foo.txt/
* ```
*/
class SubshellLiteralExecution extends SystemCommandExecution::Range {
SubshellLiteral literal;
SubshellLiteralExecution() { this.asExpr().getExpr() = literal }
override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = literal.getComponent(_) }
override predicate isShellInterpreted(DataFlow::Node arg) { arg = getAnArgument() }
}
/**
* A system command executed via shell heredoc syntax.
* E.g.
* ```ruby
* <<`EOF`
* cat foo.text
* EOF
* ```
*/
class SubshellHeredocExecution extends SystemCommandExecution::Range {
HereDoc heredoc;
SubshellHeredocExecution() { this.asExpr().getExpr() = heredoc and heredoc.isSubShell() }
override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = heredoc.getComponent(_) }
override predicate isShellInterpreted(DataFlow::Node arg) { arg = getAnArgument() }
}
/**
* A system command executed via the `Kernel.system` method.
* `Kernel.system` accepts three argument forms:
* - A single string. If it contains no shell meta characters, keywords or
* builtins, it is executed directly in a subprocess.
* Otherwise, it is executed in a subshell.
* ```ruby
* system("cat foo.txt | tail")
* ```
* - A command and one or more arguments.
* The command is executed in a subprocess.
* ```ruby
* system("cat", "foo.txt")
* ```
* - An array containing the command name and argv[0], followed by zero or more arguments.
* The command is executed in a subprocess.
* ```ruby
* system(["cat", "cat"], "foo.txt")
* ```
* In addition, `Kernel.system` accepts an optional environment hash as the
* first argument and an optional options hash as the last argument.
* We don't yet distinguish between these arguments and the command arguments.
* ```ruby
* system({"FOO" => "BAR"}, "cat foo.txt | tail", {unsetenv_others: true})
* ```
* Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-system
*/
class KernelSystemCall extends SystemCommandExecution::Range, KernelMethodCall {
KernelSystemCall() { this.getMethodName() = "system" }
override DataFlow::Node getAnArgument() { result = this.getArgument(_) }
override predicate isShellInterpreted(DataFlow::Node arg) {
// Kernel.system invokes a subshell if you provide a single string as argument
this.getNumberOfArguments() = 1 and arg = getAnArgument()
}
}
/**
* A system command executed via the `Kernel.exec` method.
* `Kernel.exec` takes the same argument forms as `Kernel.system`. See `KernelSystemCall` for details.
* Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-exec
*/
class KernelExecCall extends SystemCommandExecution::Range, KernelMethodCall {
KernelExecCall() { this.getMethodName() = "exec" }
override DataFlow::Node getAnArgument() { result = this.getArgument(_) }
override predicate isShellInterpreted(DataFlow::Node arg) {
// Kernel.exec invokes a subshell if you provide a single string as argument
this.getNumberOfArguments() = 1 and arg = getAnArgument()
}
}
/**
* A system command executed via the `Kernel.spawn` method.
* `Kernel.spawn` takes the same argument forms as `Kernel.system`.
* See `KernelSystemCall` for details.
* Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-spawn
* TODO: document and handle the env and option arguments.
* ```
* spawn([env,] command... [,options]) -> pid
* ```
*/
class KernelSpawnCall extends SystemCommandExecution::Range, KernelMethodCall {
KernelSpawnCall() { this.getMethodName() = "spawn" }
override DataFlow::Node getAnArgument() { result = this.getArgument(_) }
override predicate isShellInterpreted(DataFlow::Node arg) {
// Kernel.spawn invokes a subshell if you provide a single string as argument
this.getNumberOfArguments() = 1 and arg = getAnArgument()
}
}
/**
* A system command executed via one of the `Open3` methods.
* These methods take the same argument forms as `Kernel.system`.
* See `KernelSystemCall` for details.
*/
class Open3Call extends SystemCommandExecution::Range {
MethodCall methodCall;
Open3Call() {
this.asExpr().getExpr() = methodCall and
this =
API::getTopLevelMember("Open3")
.getAMethodCall(["popen3", "popen2", "popen2e", "capture3", "capture2", "capture2e"])
}
override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
override predicate isShellInterpreted(DataFlow::Node arg) {
// These Open3 methods invoke a subshell if you provide a single string as argument
methodCall.getNumberOfArguments() = 1 and arg.asExpr().getExpr() = methodCall.getAnArgument()
}
}
/**
* A pipeline of system commands constructed via one of the `Open3` methods.
* These methods accept a variable argument list of commands.
* Commands can be in any form supported by `Kernel.system`. See `KernelSystemCall` for details.
* ```ruby
* Open3.pipeline("cat foo.txt", "tail")
* Open3.pipeline(["cat", "foo.txt"], "tail")
* Open3.pipeline([{}, "cat", "foo.txt"], "tail")
* Open3.pipeline([["cat", "cat"], "foo.txt"], "tail")
*/
class Open3PipelineCall extends SystemCommandExecution::Range {
MethodCall methodCall;
Open3PipelineCall() {
this.asExpr().getExpr() = methodCall and
this =
API::getTopLevelMember("Open3")
.getAMethodCall(["pipeline_rw", "pipeline_r", "pipeline_w", "pipeline_start", "pipeline"])
}
override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
override predicate isShellInterpreted(DataFlow::Node arg) {
// A command in the pipeline is executed in a subshell if it is given as a single string argument.
arg.asExpr().getExpr() instanceof StringlikeLiteral and
arg.asExpr().getExpr() = methodCall.getAnArgument()
}
}
/**
* A call to `Kernel.eval`, which executes its first argument as Ruby code.
* ```ruby
* a = 1
* Kernel.eval("a = 2")
* a # => 2
* ```
*/
class EvalCallCodeExecution extends CodeExecution::Range, KernelMethodCall {
EvalCallCodeExecution() { this.getMethodName() = "eval" }
override DataFlow::Node getCode() { result = this.getArgument(0) }
}
/**
* A call to `Kernel#send`, which executes its first argument as a Ruby method call.
* ```ruby
* arr = []
* arr.send("push", 1)
* arr # => [1]
* ```
*/
class SendCallCodeExecution extends CodeExecution::Range, KernelMethodCall {
SendCallCodeExecution() { this.getMethodName() = "send" }
override DataFlow::Node getCode() { result = this.getArgument(0) }
}
/**
* A call to `BasicObject#instance_eval`, which executes its first argument as Ruby code.
*/
class InstanceEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode {
InstanceEvalCallCodeExecution() {
this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "instance_eval"
}
override DataFlow::Node getCode() { result = this.getArgument(0) }
}
/**
* A call to `Module#class_eval`, which executes its first argument as Ruby code.
*/
class ClassEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode {
ClassEvalCallCodeExecution() {
this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "class_eval"
}
override DataFlow::Node getCode() { result = this.getArgument(0) }
}
/**
* A call to `Module#module_eval`, which executes its first argument as Ruby code.
*/
class ModuleEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNode {
ModuleEvalCallCodeExecution() {
this.asExpr().getExpr().(UnknownMethodCall).getMethodName() = "module_eval"
}
override DataFlow::Node getCode() { result = this.getArgument(0) }
}

View File

@@ -0,0 +1,182 @@
private import codeql.ruby.Concepts
private import codeql.ruby.AST
private import codeql.ruby.DataFlow
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.ApiGraphs
private import codeql.ruby.controlflow.CfgNodes as CfgNodes
private class NokogiriXmlParserCall extends XmlParserCall::Range, DataFlow::CallNode {
NokogiriXmlParserCall() {
this =
[
API::getTopLevelMember("Nokogiri").getMember("XML"),
API::getTopLevelMember("Nokogiri").getMember("XML").getMember("Document"),
API::getTopLevelMember("Nokogiri")
.getMember("XML")
.getMember("SAX")
.getMember("Parser")
.getInstance()
].getAMethodCall("parse")
}
override DataFlow::Node getInput() { result = this.getArgument(0) }
override predicate externalEntitiesEnabled() {
this.getArgument(3) =
[trackEnableFeature(TNOENT()), trackEnableFeature(TDTDLOAD()), trackDisableFeature(TNONET())]
or
// calls to methods that enable/disable features in a block argument passed to this parser call.
// For example:
// ```ruby
// doc.parse(...) { |options| options.nononet; options.noent }
// ```
this.asExpr()
.getExpr()
.(MethodCall)
.getBlock()
.getAStmt()
.getAChild*()
.(MethodCall)
.getMethodName() = ["noent", "dtdload", "nononet"]
}
}
private class LibXmlRubyXmlParserCall extends XmlParserCall::Range, DataFlow::CallNode {
LibXmlRubyXmlParserCall() {
this =
[API::getTopLevelMember("LibXML").getMember("XML"), API::getTopLevelMember("XML")]
.getMember(["Document", "Parser"])
.getAMethodCall(["file", "io", "string"])
}
override DataFlow::Node getInput() { result = this.getArgument(0) }
override predicate externalEntitiesEnabled() {
exists(Pair pair |
pair = this.getArgument(1).asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
pair.getKey().(Literal).getValueText() = "options" and
pair.getValue() =
[
trackEnableFeature(TNOENT()), trackEnableFeature(TDTDLOAD()),
trackDisableFeature(TNONET())
].asExpr().getExpr()
)
}
}
private newtype TFeature =
TNOENT() or
TNONET() or
TDTDLOAD()
class Feature extends TFeature {
abstract int getValue();
string toString() { result = getConstantName() }
abstract string getConstantName();
}
private class FeatureNOENT extends Feature, TNOENT {
override int getValue() { result = 2 }
override string getConstantName() { result = "NOENT" }
}
private class FeatureNONET extends Feature, TNONET {
override int getValue() { result = 2048 }
override string getConstantName() { result = "NONET" }
}
private class FeatureDTDLOAD extends Feature, TDTDLOAD {
override int getValue() { result = 4 }
override string getConstantName() { result = "DTDLOAD" }
}
private API::Node parseOptionsModule() {
result = API::getTopLevelMember("Nokogiri").getMember("XML").getMember("ParseOptions")
or
result =
API::getTopLevelMember("LibXML").getMember("XML").getMember("Parser").getMember("Options")
or
result = API::getTopLevelMember("XML").getMember("Parser").getMember("Options")
}
private predicate bitWiseAndOr(CfgNodes::ExprNodes::OperationCfgNode operation) {
operation.getExpr() instanceof BitwiseAndExpr or
operation.getExpr() instanceof AssignBitwiseAndExpr or
operation.getExpr() instanceof BitwiseOrExpr or
operation.getExpr() instanceof AssignBitwiseOrExpr
}
private DataFlow::LocalSourceNode trackFeature(Feature f, boolean enable, TypeTracker t) {
t.start() and
(
// An integer literal with the feature-bit enabled/disabled
exists(int bitValue |
bitValue = result.asExpr().getExpr().(IntegerLiteral).getValue().bitAnd(f.getValue())
|
if bitValue = 0 then enable = false else enable = true
)
or
// Use of a constant f
enable = true and
result = parseOptionsModule().getMember(f.getConstantName()).getAUse()
or
// Treat `&`, `&=`, `|` and `|=` operators as if they preserve the on/off states
// of their operands. This is an overapproximation but likely to work well in practice
// because it makes little sense to explicitly set a feature to both `on` and `off` in the
// same code.
exists(CfgNodes::ExprNodes::OperationCfgNode operation |
bitWiseAndOr(operation) and
operation = result.asExpr().(CfgNodes::ExprNodes::OperationCfgNode) and
operation.getAnOperand() = trackFeature(f, enable).asExpr()
)
or
// The complement operator toggles a feature from enabled to disabled and vice-versa
result.asExpr().getExpr() instanceof ComplementExpr and
result.asExpr().(CfgNodes::ExprNodes::OperationCfgNode).getAnOperand() =
trackFeature(f, enable.booleanNot()).asExpr()
or
// Nokogiri has a ParseOptions class that is a wrapper around the bit-fields and
// provides methods for querying and updating the fields.
result =
API::getTopLevelMember("Nokogiri")
.getMember("XML")
.getMember("ParseOptions")
.getAnInstantiation() and
result.asExpr().(CfgNodes::ExprNodes::CallCfgNode).getArgument(0) =
trackFeature(f, enable).asExpr()
or
// The Nokogiri ParseOptions class has methods for setting/unsetting features.
// The method names are the lowercase variants of the constant names, with a "no"
// prefix for unsetting a feature.
exists(CfgNodes::ExprNodes::CallCfgNode call |
enable = true and
call.getExpr().(MethodCall).getMethodName() = f.getConstantName().toLowerCase()
or
enable = false and
call.getExpr().(MethodCall).getMethodName() = "no" + f.getConstantName().toLowerCase()
|
(
// these methods update the receiver
result.flowsTo(any(DataFlow::Node n | n.asExpr() = call.getReceiver()))
or
// in addition they return the (updated) receiver to allow chaining calls.
result.asExpr() = call
)
)
)
or
exists(TypeTracker t2 | result = trackFeature(f, enable, t2).track(t2, t))
}
private DataFlow::Node trackFeature(Feature f, boolean enable) {
trackFeature(f, enable, TypeTracker::end()).flowsTo(result)
}
private DataFlow::Node trackEnableFeature(Feature f) { result = trackFeature(f, true) }
private DataFlow::Node trackDisableFeature(Feature f) { result = trackFeature(f, false) }

View File

@@ -0,0 +1,130 @@
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
/**
* A call that makes an HTTP request using `Excon`.
* ```ruby
* # one-off request
* Excon.get("http://example.com").body
*
* # connection re-use
* connection = Excon.new("http://example.com")
* connection.get(path: "/").body
* connection.request(method: :get, path: "/")
* ```
*
* TODO: pipelining, streaming responses
* https://github.com/excon/excon/blob/master/README.md
*/
class ExconHttpRequest extends HTTP::Client::Request::Range {
DataFlow::Node requestUse;
API::Node requestNode;
API::Node connectionNode;
ExconHttpRequest() {
requestUse = requestNode.getAnImmediateUse() and
connectionNode =
[
// one-off requests
API::getTopLevelMember("Excon"),
// connection re-use
API::getTopLevelMember("Excon").getInstance(),
API::getTopLevelMember("Excon").getMember("Connection").getInstance()
] and
requestNode =
connectionNode
.getReturn([
// Excon#request exists but Excon.request doesn't.
// This shouldn't be a problem - in real code the latter would raise NoMethodError anyway.
"get", "head", "delete", "options", "post", "put", "patch", "trace", "request"
]) and
this = requestUse.asExpr().getExpr()
}
override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
// Check for `ssl_verify_peer: false` in the options hash.
exists(DataFlow::Node arg, int i |
i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
|
argSetsVerifyPeer(arg, false, disablingNode)
)
or
// Or we see a call to `Excon.defaults[:ssl_verify_peer] = false` before the
// request, and no `ssl_verify_peer: true` in the explicit options hash for
// the request call.
exists(DataFlow::CallNode disableCall |
setsDefaultVerification(disableCall, false) and
disableCall.asExpr().getASuccessor+() = requestUse.asExpr() and
disablingNode = disableCall and
not exists(DataFlow::Node arg, int i |
i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
|
argSetsVerifyPeer(arg, true, _)
)
)
}
override string getFramework() { result = "Excon" }
}
/**
* Holds if `arg` represents an options hash that contains the key
* `:ssl_verify_peer` with `value`, where `kvNode` is the data-flow node for
* this key-value pair.
*/
predicate argSetsVerifyPeer(DataFlow::Node arg, boolean value, DataFlow::Node kvNode) {
// Either passed as an individual key:value argument, e.g.:
// Excon.get(..., ssl_verify_peer: false)
isSslVerifyPeerPair(arg.asExpr().getExpr(), value) and
kvNode = arg
or
// Or as a single hash argument, e.g.:
// Excon.get(..., { ssl_verify_peer: false, ... })
exists(DataFlow::LocalSourceNode optionsNode, Pair p |
p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
isSslVerifyPeerPair(p, value) and
optionsNode.flowsTo(arg) and
kvNode.asExpr().getExpr() = p
)
}
/**
* Holds if `callNode` sets `Excon.defaults[:ssl_verify_peer]` or
* `Excon.ssl_verify_peer` to `value`.
*/
private predicate setsDefaultVerification(DataFlow::CallNode callNode, boolean value) {
callNode = API::getTopLevelMember("Excon").getReturn("defaults").getAMethodCall("[]=") and
isSslVerifyPeerLiteral(callNode.getArgument(0)) and
hasBooleanValue(callNode.getArgument(1), value)
or
callNode = API::getTopLevelMember("Excon").getAMethodCall("ssl_verify_peer=") and
hasBooleanValue(callNode.getArgument(0), value)
}
private predicate isSslVerifyPeerLiteral(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verify_peer" and
literal.flowsTo(node)
)
}
/** Holds if `node` can contain `value`. */
private predicate hasBooleanValue(DataFlow::Node node, boolean value) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(BooleanLiteral).getValue() = value and
literal.flowsTo(node)
)
}
/** Holds if `p` is the pair `ssl_verify_peer: <value>`. */
private predicate isSslVerifyPeerPair(Pair p, boolean value) {
exists(DataFlow::Node key, DataFlow::Node valueNode |
key.asExpr().getExpr() = p.getKey() and valueNode.asExpr().getExpr() = p.getValue()
|
isSslVerifyPeerLiteral(key) and
hasBooleanValue(valueNode, value)
)
}

View File

@@ -0,0 +1,140 @@
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
/**
* A call that makes an HTTP request using `Faraday`.
* ```ruby
* # one-off request
* Faraday.get("http://example.com").body
*
* # connection re-use
* connection = Faraday.new("http://example.com")
* connection.get("/").body
* ```
*/
class FaradayHttpRequest extends HTTP::Client::Request::Range {
DataFlow::Node requestUse;
API::Node requestNode;
API::Node connectionNode;
FaradayHttpRequest() {
connectionNode =
[
// one-off requests
API::getTopLevelMember("Faraday"),
// connection re-use
API::getTopLevelMember("Faraday").getInstance()
] and
requestNode =
connectionNode.getReturn(["get", "head", "delete", "post", "put", "patch", "trace"]) and
requestUse = requestNode.getAnImmediateUse() and
this = requestUse.asExpr().getExpr()
}
override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
// `Faraday::new` takes an options hash as its second argument, and we're
// looking for
// `{ ssl: { verify: false } }`
// or
// `{ ssl: { verify_mode: OpenSSL::SSL::VERIFY_NONE } }`
exists(DataFlow::Node arg, int i |
i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
|
// Either passed as an individual key:value argument, e.g.:
// Faraday.new(..., ssl: {...})
isSslOptionsPairDisablingValidation(arg.asExpr().getExpr()) and
disablingNode = arg
or
// Or as a single hash argument, e.g.:
// Faraday.new(..., { ssl: {...} })
exists(DataFlow::LocalSourceNode optionsNode, Pair p |
p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
isSslOptionsPairDisablingValidation(p) and
optionsNode.flowsTo(arg) and
disablingNode.asExpr().getExpr() = p
)
)
}
override string getFramework() { result = "Faraday" }
}
/**
* Holds if the pair `p` contains the key `:ssl` for which the value is a hash
* containing either `verify: false` or
* `verify_mode: OpenSSL::SSL::VERIFY_NONE`.
*/
private predicate isSslOptionsPairDisablingValidation(Pair p) {
exists(DataFlow::Node key, DataFlow::Node value |
key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
|
isSymbolLiteral(key, "ssl") and
(isHashWithVerifyFalse(value) or isHashWithVerifyModeNone(value))
)
}
/** Holds if `node` represents the symbol literal with the given `valueText`. */
private predicate isSymbolLiteral(DataFlow::Node node, string valueText) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(SymbolLiteral).getValueText() = valueText and
literal.flowsTo(node)
)
}
/**
* Holds if `node` represents a hash containing the key-value pair
* `verify: false`.
*/
private predicate isHashWithVerifyFalse(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode hash |
isVerifyFalsePair(hash.asExpr().getExpr().(HashLiteral).getAKeyValuePair()) and
hash.flowsTo(node)
)
}
/**
* Holds if `node` represents a hash containing the key-value pair
* `verify_mode: OpenSSL::SSL::VERIFY_NONE`.
*/
private predicate isHashWithVerifyModeNone(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode hash |
isVerifyModeNonePair(hash.asExpr().getExpr().(HashLiteral).getAKeyValuePair()) and
hash.flowsTo(node)
)
}
/**
* Holds if the pair `p` has the key `:verify_mode` and the value
* `OpenSSL::SSL::VERIFY_NONE`.
*/
private predicate isVerifyModeNonePair(Pair p) {
exists(DataFlow::Node key, DataFlow::Node value |
key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
|
isSymbolLiteral(key, "verify_mode") and
value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
)
}
/**
* Holds if the pair `p` has the key `:verify` and the value `false`.
*/
private predicate isVerifyFalsePair(Pair p) {
exists(DataFlow::Node key, DataFlow::Node value |
key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
|
isSymbolLiteral(key, "verify") and
isFalse(value)
)
}
/** Holds if `node` can contain the Boolean value `false`. */
private predicate isFalse(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(BooleanLiteral).isFalse() and
literal.flowsTo(node)
)
}

View File

@@ -0,0 +1,55 @@
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
/**
* A call that makes an HTTP request using `HTTPClient`.
* ```ruby
* HTTPClient.get("http://example.com").body
* HTTPClient.get_content("http://example.com")
* ```
*/
class HttpClientRequest extends HTTP::Client::Request::Range {
API::Node requestNode;
API::Node connectionNode;
DataFlow::Node requestUse;
string method;
HttpClientRequest() {
connectionNode =
[
// One-off requests
API::getTopLevelMember("HTTPClient"),
// Conncection re-use
API::getTopLevelMember("HTTPClient").getInstance()
] and
requestNode = connectionNode.getReturn(method) and
requestUse = requestNode.getAnImmediateUse() and
method in [
"get", "head", "delete", "options", "post", "put", "trace", "get_content", "post_content"
] and
this = requestUse.asExpr().getExpr()
}
override DataFlow::Node getResponseBody() {
// The `get_content` and `post_content` methods return the response body as
// a string. The other methods return a `HTTPClient::Message` object which
// has various methods that return the response body.
method in ["get_content", "post_content"] and result = requestUse
or
not method in ["get_content", "put_content"] and
result = requestNode.getAMethodCall(["body", "http_body", "content", "dump"])
}
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
// Look for calls to set
// `c.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE`
// on an HTTPClient connection object `c`.
disablingNode =
connectionNode.getReturn("ssl_config").getReturn("verify_mode=").getAnImmediateUse() and
disablingNode.(DataFlow::CallNode).getArgument(0) =
API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
}
override string getFramework() { result = "HTTPClient" }
}

View File

@@ -0,0 +1,95 @@
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
/**
* A call that makes an HTTP request using `HTTParty`.
* ```ruby
* # one-off request - returns the response body
* HTTParty.get("http://example.com")
*
* # TODO: module inclusion
* class MyClass
* include HTTParty
* end
*
* MyClass.new("http://example.com")
* ```
*/
class HttpartyRequest extends HTTP::Client::Request::Range {
API::Node requestNode;
DataFlow::Node requestUse;
HttpartyRequest() {
requestUse = requestNode.getAnImmediateUse() and
requestNode =
API::getTopLevelMember("HTTParty")
.getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and
this = requestUse.asExpr().getExpr()
}
override DataFlow::Node getResponseBody() {
// If HTTParty can recognise the response type, it will parse and return it
// directly from the request call. Otherwise, it will return a `HTTParty::Response`
// object that has a `#body` method.
// So if there's a call to `#body` on the response, treat that as the response body.
exists(DataFlow::Node r | r = requestNode.getAMethodCall("body") | result = r)
or
// Otherwise, treat the response as the response body.
not exists(DataFlow::Node r | r = requestNode.getAMethodCall("body")) and
result = requestUse
}
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
// The various request methods take an options hash as their second
// argument, and we're looking for `{ verify: false }` or
// `{ verify_peer: false }`.
exists(DataFlow::Node arg, int i |
i > 0 and arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i)
|
// Either passed as an individual key:value argument, e.g.:
// HTTParty.get(..., verify: false)
isVerifyFalsePair(arg.asExpr().getExpr()) and
disablingNode = arg
or
// Or as a single hash argument, e.g.:
// HTTParty.get(..., { verify: false, ... })
exists(DataFlow::LocalSourceNode optionsNode, Pair p |
p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
isVerifyFalsePair(p) and
optionsNode.flowsTo(arg) and
disablingNode.asExpr().getExpr() = p
)
)
}
override string getFramework() { result = "HTTParty" }
}
/** Holds if `node` represents the symbol literal `verify` or `verify_peer`. */
private predicate isVerifyLiteral(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(SymbolLiteral).getValueText() = ["verify", "verify_peer"] and
literal.flowsTo(node)
)
}
/** Holds if `node` can contain the Boolean value `false`. */
private predicate isFalse(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(BooleanLiteral).isFalse() and
literal.flowsTo(node)
)
}
/**
* Holds if `p` is the pair `verify: false` or `verify_peer: false`.
*/
private predicate isVerifyFalsePair(Pair p) {
exists(DataFlow::Node key, DataFlow::Node value |
key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
|
isVerifyLiteral(key) and
isFalse(value)
)
}

View File

@@ -0,0 +1,69 @@
private import codeql.ruby.AST
private import codeql.ruby.Concepts
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.ApiGraphs
private import codeql.ruby.dataflow.internal.DataFlowPublic
/**
* A `Net::HTTP` call which initiates an HTTP request.
* ```ruby
* Net::HTTP.get("http://example.com/")
* Net::HTTP.post("http://example.com/", "some_data")
* req = Net::HTTP.new("example.com")
* response = req.get("/")
* ```
*/
class NetHttpRequest extends HTTP::Client::Request::Range {
private DataFlow::CallNode request;
private DataFlow::Node responseBody;
NetHttpRequest() {
exists(API::Node requestNode, string method |
request = requestNode.getAnImmediateUse() and
this = request.asExpr().getExpr()
|
// Net::HTTP.get(...)
method = "get" and
requestNode = API::getTopLevelMember("Net").getMember("HTTP").getReturn(method) and
responseBody = request
or
// Net::HTTP.post(...).body
method in ["post", "post_form"] and
requestNode = API::getTopLevelMember("Net").getMember("HTTP").getReturn(method) and
responseBody = requestNode.getAMethodCall(["body", "read_body", "entity"])
or
// Net::HTTP.new(..).get(..).body
method in [
"get", "get2", "request_get", "head", "head2", "request_head", "delete", "put", "patch",
"post", "post2", "request_post", "request"
] and
requestNode = API::getTopLevelMember("Net").getMember("HTTP").getInstance().getReturn(method) and
responseBody = requestNode.getAMethodCall(["body", "read_body", "entity"])
)
}
/**
* Gets the node representing the URL of the request.
* Currently unused, but may be useful in future, e.g. to filter out certain requests.
*/
DataFlow::Node getURLArgument() { result = request.getArgument(0) }
override DataFlow::Node getResponseBody() { result = responseBody }
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
// A Net::HTTP request bypasses certificate validation if we see a setter
// call like this:
// foo.verify_mode = OpenSSL::SSL::VERIFY_NONE
// and then the receiver of that call flows to the receiver in the request:
// foo.request(...)
exists(DataFlow::CallNode setter |
disablingNode =
API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() and
setter.asExpr().getExpr().(SetterMethodCall).getMethodName() = "verify_mode=" and
disablingNode = setter.getArgument(0) and
localFlow(setter.getReceiver(), request.getReceiver())
)
}
override string getFramework() { result = "Net::HTTP" }
}

View File

@@ -0,0 +1,113 @@
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
private import codeql.ruby.frameworks.StandardLibrary
/**
* A call that makes an HTTP request using `OpenURI` via `URI.open` or
* `URI.parse(...).open`.
*
* ```ruby
* URI.open("http://example.com").readlines
* URI.parse("http://example.com").open.read
* ```
*/
class OpenUriRequest extends HTTP::Client::Request::Range {
API::Node requestNode;
DataFlow::Node requestUse;
OpenUriRequest() {
requestNode =
[API::getTopLevelMember("URI"), API::getTopLevelMember("URI").getReturn("parse")]
.getReturn("open") and
requestUse = requestNode.getAnImmediateUse() and
this = requestUse.asExpr().getExpr()
}
override DataFlow::Node getResponseBody() {
result = requestNode.getAMethodCall(["read", "readlines"])
}
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
exists(DataFlow::Node arg |
arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(_)
|
argumentDisablesValidation(arg, disablingNode)
)
}
override string getFramework() { result = "OpenURI" }
}
/**
* A call that makes an HTTP request using `OpenURI` and its `Kernel.open`
* interface.
*
* ```ruby
* Kernel.open("http://example.com").read
* ```
*/
class OpenUriKernelOpenRequest extends HTTP::Client::Request::Range {
DataFlow::Node requestUse;
OpenUriKernelOpenRequest() {
requestUse instanceof KernelMethodCall and
this.getMethodName() = "open" and
this = requestUse.asExpr().getExpr()
}
override DataFlow::CallNode getResponseBody() {
result.asExpr().getExpr().(MethodCall).getMethodName() in ["read", "readlines"] and
requestUse.(DataFlow::LocalSourceNode).flowsTo(result.getReceiver())
}
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
exists(DataFlow::Node arg, int i |
i > 0 and
arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i)
|
argumentDisablesValidation(arg, disablingNode)
)
}
override string getFramework() { result = "OpenURI" }
}
/**
* Holds if the argument `arg` is an options hash that disables certificate
* validation, and `disablingNode` is the specific node representing the
* `ssl_verify_mode: OpenSSL::SSL_VERIFY_NONE` pair.
*/
private predicate argumentDisablesValidation(DataFlow::Node arg, DataFlow::Node disablingNode) {
// Either passed as an individual key:value argument, e.g.:
// URI.open(..., ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE)
isSslVerifyModeNonePair(arg.asExpr().getExpr()) and
disablingNode = arg
or
// Or as a single hash argument, e.g.:
// URI.open(..., { ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, ... })
exists(DataFlow::LocalSourceNode optionsNode, Pair p |
p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
isSslVerifyModeNonePair(p) and
optionsNode.flowsTo(arg) and
disablingNode.asExpr().getExpr() = p
)
}
/** Holds if `p` is the pair `ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE`. */
private predicate isSslVerifyModeNonePair(Pair p) {
exists(DataFlow::Node key, DataFlow::Node value |
key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
|
isSslVerifyModeLiteral(key) and
value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
)
}
/** Holds if `node` can represent the symbol literal `:ssl_verify_mode`. */
private predicate isSslVerifyModeLiteral(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verify_mode" and
literal.flowsTo(node)
)
}

View File

@@ -0,0 +1,71 @@
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
/**
* A call that makes an HTTP request using `RestClient`.
* ```ruby
* RestClient.get("http://example.com").body
* ```
*/
class RestClientHttpRequest extends HTTP::Client::Request::Range {
DataFlow::Node requestUse;
API::Node requestNode;
API::Node connectionNode;
RestClientHttpRequest() {
connectionNode =
[
API::getTopLevelMember("RestClient"),
API::getTopLevelMember("RestClient").getMember("Resource").getInstance()
] and
requestNode =
connectionNode.getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and
requestUse = requestNode.getAnImmediateUse() and
this = requestUse.asExpr().getExpr()
}
override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
// `RestClient::Resource::new` takes an options hash argument, and we're
// looking for `{ verify_ssl: OpenSSL::SSL::VERIFY_NONE }`.
exists(DataFlow::Node arg, int i |
i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i)
|
// Either passed as an individual key:value argument, e.g.:
// RestClient::Resource.new(..., verify_ssl: OpenSSL::SSL::VERIFY_NONE)
isVerifySslNonePair(arg.asExpr().getExpr()) and
disablingNode = arg
or
// Or as a single hash argument, e.g.:
// RestClient::Resource.new(..., { verify_ssl: OpenSSL::SSL::VERIFY_NONE })
exists(DataFlow::LocalSourceNode optionsNode, Pair p |
p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
isVerifySslNonePair(p) and
optionsNode.flowsTo(arg) and
disablingNode.asExpr().getExpr() = p
)
)
}
override string getFramework() { result = "RestClient" }
}
/** Holds if `p` is the pair `verify_ssl: OpenSSL::SSL::VERIFY_NONE`. */
private predicate isVerifySslNonePair(Pair p) {
exists(DataFlow::Node key, DataFlow::Node value |
key.asExpr().getExpr() = p.getKey() and value.asExpr().getExpr() = p.getValue()
|
isSslVerifyModeLiteral(key) and
value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse()
)
}
/** Holds if `node` can represent the symbol literal `:verify_ssl`. */
private predicate isSslVerifyModeLiteral(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "verify_ssl" and
literal.flowsTo(node)
)
}

View File

@@ -0,0 +1,74 @@
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.ApiGraphs
/**
* A call that makes an HTTP request using `Typhoeus`.
* ```ruby
* Typhoeus.get("http://example.com").body
* ```
*/
class TyphoeusHttpRequest extends HTTP::Client::Request::Range {
DataFlow::Node requestUse;
API::Node requestNode;
TyphoeusHttpRequest() {
requestUse = requestNode.getAnImmediateUse() and
requestNode =
API::getTopLevelMember("Typhoeus")
.getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and
this = requestUse.asExpr().getExpr()
}
override DataFlow::Node getResponseBody() { result = requestNode.getAMethodCall("body") }
override predicate disablesCertificateValidation(DataFlow::Node disablingNode) {
// Check for `ssl_verifypeer: false` in the options hash.
exists(DataFlow::Node arg, int i |
i > 0 and arg.asExpr().getExpr() = requestUse.asExpr().getExpr().(MethodCall).getArgument(i)
|
// Either passed as an individual key:value argument, e.g.:
// Typhoeus.get(..., ssl_verifypeer: false)
isSslVerifyPeerFalsePair(arg.asExpr().getExpr()) and
disablingNode = arg
or
// Or as a single hash argument, e.g.:
// Typhoeus.get(..., { ssl_verifypeer: false, ... })
exists(DataFlow::LocalSourceNode optionsNode, Pair p |
p = optionsNode.asExpr().getExpr().(HashLiteral).getAKeyValuePair() and
isSslVerifyPeerFalsePair(p) and
optionsNode.flowsTo(arg) and
disablingNode.asExpr().getExpr() = p
)
)
}
override string getFramework() { result = "Typhoeus" }
}
/** Holds if `p` is the pair `ssl_verifypeer: false`. */
private predicate isSslVerifyPeerFalsePair(Pair p) {
exists(DataFlow::Node key, DataFlow::Node value |
key.asExpr().getExpr() = p.getKey() and
value.asExpr().getExpr() = p.getValue()
|
isSslVerifyPeerLiteral(key) and
isFalse(value)
)
}
/** Holds if `node` represents the symbol literal `verify` or `verify_peer`. */
private predicate isSslVerifyPeerLiteral(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(SymbolLiteral).getValueText() = "ssl_verifypeer" and
literal.flowsTo(node)
)
}
/** Holds if `node` can contain the Boolean value `false`. */
private predicate isFalse(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode literal |
literal.asExpr().getExpr().(BooleanLiteral).isFalse() and
literal.flowsTo(node)
)
}

View File

@@ -0,0 +1,203 @@
/**
* Provides queries to pretty-print a Ruby abstract syntax tree as a graph.
*
* By default, this will print the AST for all nodes in the database. To change
* this behavior, extend `PrintASTConfiguration` and override `shouldPrintNode`
* to hold for only the AST nodes you wish to view.
*/
private import AST
private import codeql.ruby.regexp.RegExpTreeView as RETV
/** Holds if `n` appears in the desugaring of some other node. */
predicate isDesugared(AstNode n) {
n = any(AstNode sugar).getDesugared()
or
isDesugared(n.getParent())
}
/**
* The query can extend this class to control which nodes are printed.
*/
class PrintAstConfiguration extends string {
PrintAstConfiguration() { this = "PrintAstConfiguration" }
/**
* Holds if the given node should be printed.
*/
predicate shouldPrintNode(AstNode n) {
not isDesugared(n)
or
not n.isSynthesized()
or
n.isSynthesized() and
not n = any(AstNode sugar).getDesugared() and
exists(AstNode parent |
parent = n.getParent() and
not parent.isSynthesized() and
not n = parent.getDesugared()
)
}
predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) {
child = parent.getAChild(edgeName) and
not child = parent.getDesugared()
}
}
private predicate shouldPrintNode(AstNode n) {
any(PrintAstConfiguration config).shouldPrintNode(n)
}
private predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode child) {
any(PrintAstConfiguration config).shouldPrintAstEdge(parent, edgeName, child)
}
newtype TPrintNode =
TPrintRegularAstNode(AstNode n) { shouldPrintNode(n) } or
TPrintRegExpNode(RETV::RegExpTerm term) {
exists(RegExpLiteral literal |
shouldPrintNode(literal) and
term.getRootTerm() = literal.getParsed()
)
}
/**
* A node in the output tree.
*/
class PrintAstNode extends TPrintNode {
/** Gets a textual representation of this node in the PrintAst output tree. */
string toString() { none() }
/**
* Gets the child node with name `edgeName`. Typically this is the name of the
* predicate used to access the child.
*/
PrintAstNode getChild(string edgeName) { none() }
/** Gets a child of this node. */
final PrintAstNode getAChild() { result = getChild(_) }
/** Gets the parent of this node, if any. */
final PrintAstNode getParent() { result.getAChild() = this }
/**
* Holds if this node is at the specified location. The location spans column
* `startcolumn` of line `startline` to column `endcolumn` of line `endline`
* in file `filepath`. For more information, see
* [LGTM locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
/** Gets a value used to order this node amongst its siblings. */
int getOrder() { none() }
/**
* Gets the value of the property of this node, where the name of the property
* is `key`.
*/
final string getProperty(string key) {
key = "semmle.label" and
result = this.toString()
or
key = "semmle.order" and result = this.getOrder().toString()
}
}
/** An `AstNode` in the output tree. */
class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {
AstNode astNode;
PrintRegularAstNode() { this = TPrintRegularAstNode(astNode) }
override string toString() {
result = "[" + concat(astNode.getAPrimaryQlClass(), ", ") + "] " + astNode.toString()
}
override PrintAstNode getChild(string edgeName) {
exists(AstNode child | shouldPrintAstEdge(astNode, edgeName, child) |
result = TPrintRegularAstNode(child)
)
or
// If this AST node is a regexp literal, add the parsed regexp tree as a
// child.
exists(RETV::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
result = TPrintRegExpNode(t) and edgeName = "getParsed"
)
}
override int getOrder() {
this =
rank[result](PrintRegularAstNode p, Location l, File f |
l = p.getLocation() and
f = l.getFile()
|
p order by f.getBaseName(), f.getAbsolutePath(), l.getStartLine(), l.getStartColumn()
)
}
/** Gets the location of this node. */
Location getLocation() { result = astNode.getLocation() }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
astNode.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/** A parsed regexp node in the output tree. */
class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
RETV::RegExpTerm regexNode;
PrintRegExpNode() { this = TPrintRegExpNode(regexNode) }
override string toString() {
result = "[" + concat(regexNode.getAPrimaryQlClass(), ", ") + "] " + regexNode.toString()
}
override PrintAstNode getChild(string edgeName) {
// Use the child index as an edge name.
exists(int i | result = TPrintRegExpNode(regexNode.getChild(i)) and edgeName = i.toString())
}
override int getOrder() { exists(RETV::RegExpTerm p | p.getChild(result) = regexNode) }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
regexNode.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/**
* Holds if `node` belongs to the output tree, and its property `key` has the
* given `value`.
*/
query predicate nodes(PrintAstNode node, string key, string value) { value = node.getProperty(key) }
/**
* Holds if `target` is a child of `source` in the AST, and property `key` of
* the edge has the given `value`.
*/
query predicate edges(PrintAstNode source, PrintAstNode target, string key, string value) {
target = source.getChild(_) and
(
key = "semmle.label" and
value = strictconcat(string name | source.getChild(name) = target | name, "/")
or
key = "semmle.order" and
value = target.getProperty("semmle.order")
)
}
/**
* Holds if property `key` of the graph has the given `value`.
*/
query predicate graphProperties(string key, string value) {
key = "semmle.graphKind" and value = "tree"
}

View File

@@ -0,0 +1,343 @@
private import ReDoSUtil
private import RegExpTreeView
private import codeql.Locations
/*
* This query implements the analysis described in the following two papers:
*
* James Kirrage, Asiri Rathnayake, Hayo Thielecke: Static Analysis for
* Regular Expression Denial-of-Service Attacks. NSS 2013.
* (http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf)
* Asiri Rathnayake, Hayo Thielecke: Static Analysis for Regular Expression
* Exponential Runtime via Substructural Logics. 2014.
* (https://www.cs.bham.ac.uk/~hxt/research/redos_full.pdf)
*
* The basic idea is to search for overlapping cycles in the NFA, that is,
* states `q` such that there are two distinct paths from `q` to itself
* that consume the same word `w`.
*
* For any such state `q`, an attack string can be constructed as follows:
* concatenate a prefix `v` that takes the NFA to `q` with `n` copies of
* the word `w` that leads back to `q` along two different paths, followed
* by a suffix `x` that is _not_ accepted in state `q`. A backtracking
* implementation will need to explore at least 2^n different ways of going
* from `q` back to itself while trying to match the `n` copies of `w`
* before finally giving up.
*
* Now in order to identify overlapping cycles, all we have to do is find
* pumpable forks, that is, states `q` that can transition to two different
* states `r1` and `r2` on the same input symbol `c`, such that there are
* paths from both `r1` and `r2` to `q` that consume the same word. The latter
* condition is equivalent to saying that `(q, q)` is reachable from `(r1, r2)`
* in the product NFA.
*
* This is what the query does. It makes a simple attempt to construct a
* prefix `v` leading into `q`, but only to improve the alert message.
* And the query tries to prove the existence of a suffix that ensures
* rejection. This check might fail, which can cause false positives.
*
* Finally, sometimes it depends on the translation whether the NFA generated
* for a regular expression has a pumpable fork or not. We implement one
* particular translation, which may result in false positives or negatives
* relative to some particular JavaScript engine.
*
* More precisely, the query constructs an NFA from a regular expression `r`
* as follows:
*
* * Every sub-term `t` gives rise to an NFA state `Match(t,i)`, representing
* the state of the automaton before attempting to match the `i`th character in `t`.
* * There is one accepting state `Accept(r)`.
* * There is a special `AcceptAnySuffix(r)` state, which accepts any suffix string
* by using an epsilon transition to `Accept(r)` and an any transition to itself.
* * Transitions between states may be labelled with epsilon, or an abstract
* input symbol.
* * Each abstract input symbol represents a set of concrete input characters:
* either a single character, a set of characters represented by a
* character class, or the set of all characters.
* * The product automaton is constructed lazily, starting with pair states
* `(q, q)` where `q` is a fork, and proceding along an over-approximate
* step relation.
* * The over-approximate step relation allows transitions along pairs of
* abstract input symbols where the symbols have overlap in the characters they accept.
* * Once a trace of pairs of abstract input symbols that leads from a fork
* back to itself has been identified, we attempt to construct a concrete
* string corresponding to it, which may fail.
* * Lastly we ensure that any state reached by repeating `n` copies of `w` has
* a suffix `x` (possible empty) that is most likely __not__ accepted.
*/
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideBacktracking(State s) {
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
}
/**
* A infinitely repeating quantifier that might backtrack.
*/
private class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
MaybeBacktrackingRepetition() {
exists(RegExpTerm child |
child instanceof RegExpAlt or
child instanceof RegExpQuantifier
|
child.getParent+() = this
)
}
}
/**
* A state in the product automaton.
*
* We lazily only construct those states that we are actually
* going to need: `(q, q)` for every fork state `q`, and any
* pair of states that can be reached from a pair that we have
* already constructed. To cut down on the number of states,
* we only represent states `(q1, q2)` where `q1` is lexicographically
* no bigger than `q2`.
*
* States are only constructed if both states in the pair are
* inside a repetition that might backtrack.
*/
private newtype TStatePair =
MkStatePair(State q1, State q2) {
isFork(q1, _, _, _, _) and q2 = q1
or
(step(_, _, _, q1, q2) or step(_, _, _, q2, q1)) and
rankState(q1) <= rankState(q2)
}
/**
* Gets a unique number for a `state`.
* Is used to create an ordering of states, where states with the same `toString()` will be ordered differently.
*/
private int rankState(State state) {
state =
rank[result](State s, Location l |
l = s.getRepr().getLocation()
|
s order by l.getStartLine(), l.getStartColumn(), s.toString()
)
}
/**
* A state in the product automaton.
*/
private class StatePair extends TStatePair {
State q1;
State q2;
StatePair() { this = MkStatePair(q1, q2) }
/** Gets a textual representation of this element. */
string toString() { result = "(" + q1 + ", " + q2 + ")" }
/** Gets the first component of the state pair. */
State getLeft() { result = q1 }
/** Gets the second component of the state pair. */
State getRight() { result = q2 }
}
/**
* Holds for all constructed state pairs.
*
* Used in `statePairDist`
*/
private predicate isStatePair(StatePair p) { any() }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r`.
*
* Used in `statePairDist`
*/
private predicate delta2(StatePair q, StatePair r) { step(q, _, _, r) }
/**
* Gets the minimum length of a path from `q` to `r` in the
* product automaton.
*/
private int statePairDist(StatePair q, StatePair r) =
shortestDistances(isStatePair/1, delta2/2)(q, r, result)
/**
* Holds if there are transitions from `q` to `r1` and from `q` to `r2`
* labelled with `s1` and `s2`, respectively, where `s1` and `s2` do not
* trivially have an empty intersection.
*
* This predicate only holds for states associated with regular expressions
* that have at least one repetition quantifier in them (otherwise the
* expression cannot be vulnerable to ReDoS attacks anyway).
*/
pragma[noopt]
private predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
stateInsideBacktracking(q) and
exists(State q1, State q2 |
q1 = epsilonSucc*(q) and
delta(q1, s1, r1) and
q2 = epsilonSucc*(q) and
delta(q2, s2, r2) and
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
exists(intersect(s1, s2))
|
s1 != s2
or
r1 != r2
or
r1 = r2 and q1 != q2
or
// If q can reach itself by epsilon transitions, then there are two distinct paths to the q1/q2 state:
// one that uses the loop and one that doesn't. The engine will separately attempt to match with each path,
// despite ending in the same state. The "fork" thus arises from the choice of whether to use the loop or not.
// To avoid every state in the loop becoming a fork state,
// we arbitrarily pick the InfiniteRepetitionQuantifier state as the canonical fork state for the loop
// (every epsilon-loop must contain such a state).
//
// We additionally require that the there exists another InfiniteRepetitionQuantifier `mid` on the path from `q` to itself.
// This is done to avoid flagging regular expressions such as `/(a?)*b/` - that only has polynomial runtime, and is detected by `js/polynomial-redos`.
// The below code is therefore a heuritic, that only flags regular expressions such as `/(a*)*b/`,
// and does not flag regular expressions such as `/(a?b?)c/`, but the latter pattern is not used frequently.
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
// One of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier and
q = epsilonSucc+(mid) and
not mid = q
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
/**
* Gets the state pair `(q1, q2)` or `(q2, q1)`; note that only
* one or the other is defined.
*/
private StatePair mkStatePair(State q1, State q2) {
result = MkStatePair(q1, q2) or result = MkStatePair(q2, q1)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1` and `s2`, respectively.
*/
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
exists(State r1, State r2 | step(q, s1, s2, r1, r2) and r = mkStatePair(r1, r2))
}
/**
* Holds if there are transitions from the components of `q` to `r1` and `r2`
* labelled with `s1` and `s2`, respectively.
*
* We only consider transitions where the resulting states `(r1, r2)` are both
* inside a repetition that might backtrack.
*/
pragma[noopt]
private predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
// use noopt to force the join on `intersect` to happen last.
exists(intersect(s1, s2))
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, TTrace t) {
exists(StatePair p |
isReachableFromFork(_, p, t, _) and
step(p, s1, s2, _)
)
or
t = Nil() and isFork(_, s1, s2, _, _)
}
/**
* A list of pairs of input symbols that describe a path in the product automaton
* starting from some fork state.
*/
private class Trace extends TTrace {
/** Gets a textual representation of this element. */
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, Trace t | this = Step(s1, s2, t) |
result = "Step(" + s1 + ", " + s2 + ", " + t + ")"
)
}
}
/**
* Gets a string corresponding to the trace `t`.
*/
private string concretise(Trace t) {
t = Nil() and result = ""
or
exists(InputSymbol s1, InputSymbol s2, Trace rest | t = Step(s1, s2, rest) |
result = concretise(rest) + intersect(s1, s2)
)
}
/**
* Holds if `r` is reachable from `(fork, fork)` under input `w`, and there is
* a path from `r` back to `(fork, fork)` with `rem` steps.
*/
private predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
// base case
exists(InputSymbol s1, InputSymbol s2, State q1, State q2 |
isFork(fork, s1, s2, q1, q2) and
r = MkStatePair(q1, q2) and
w = Step(s1, s2, Nil()) and
rem = statePairDist(r, MkStatePair(fork, fork))
)
or
// recursive case
exists(StatePair p, Trace v, InputSymbol s1, InputSymbol s2 |
isReachableFromFork(fork, p, v, rem + 1) and
step(p, s1, s2, r) and
w = Step(s1, s2, v) and
rem >= statePairDist(r, MkStatePair(fork, fork))
)
}
/**
* Gets a state in the product automaton from which `(fork, fork)` is
* reachable in zero or more epsilon transitions.
*/
private StatePair getAForkPair(State fork) {
isFork(fork, _, _, _, _) and
result = MkStatePair(epsilonPred*(fork), epsilonPred*(fork))
}
/**
* Holds if `fork` is a pumpable fork with word `w`.
*/
private predicate isPumpable(State fork, string w) {
exists(StatePair q, Trace t |
isReachableFromFork(fork, q, t, _) and
q = getAForkPair(fork) and
w = concretise(t)
)
}
/**
* An instantiation of `ReDoSConfiguration` for exponential backtracking.
*/
class ExponentialReDoSConfiguration extends ReDoSConfiguration {
ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" }
override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) }
}

View File

@@ -0,0 +1,891 @@
/**
* Library for parsing for Ruby regular expressions.
*
* N.B. does not yet handle stripping whitespace and comments in regexes with
* the `x` (free-spacing) flag.
*/
private import codeql.ruby.ast.Literal as AST
private import codeql.Locations
class RegExp extends AST::RegExpLiteral {
/**
* Helper predicate for `charSetStart(int start, int end)`.
*
* In order to identify left brackets ('[') which actually start a character class,
* we perform a left to right scan of the string.
*
* To avoid negative recursion we return a boolean. See `escaping`,
* the helper for `escapingChar`, for a clean use of this pattern.
*
* result is true for those start chars that actually mark a start of a char set.
*/
boolean charSetStart(int pos) {
exists(int index |
// is opening bracket
this.charSetDelimiter(index, pos) = true and
(
// if this is the first bracket, `pos` starts a char set
index = 1 and result = true
or
// if the previous char set delimiter was not a closing bracket, `pos` does
// not start a char set. This is needed to handle cases such as `[[]` (a
// char set that matches the `[` char)
index > 1 and
not this.charSetDelimiter(index - 1, _) = false and
result = false
or
// special handling of cases such as `[][]` (the character-set of the characters `]` and `[`).
exists(int prevClosingBracketPos |
// previous bracket is a closing bracket
this.charSetDelimiter(index - 1, prevClosingBracketPos) = false and
if
// check if the character that comes before the previous closing bracket
// is an opening bracket (taking `^` into account)
// check if the character that comes before the previous closing bracket
// is an opening bracket (taking `^` into account)
exists(int posBeforePrevClosingBracket |
if this.getChar(prevClosingBracketPos - 1) = "^"
then posBeforePrevClosingBracket = prevClosingBracketPos - 2
else posBeforePrevClosingBracket = prevClosingBracketPos - 1
|
this.charSetDelimiter(index - 2, posBeforePrevClosingBracket) = true
)
then
// brackets without anything in between is not valid character ranges, so
// the first closing bracket in `[]]` and `[^]]` does not count,
//
// and we should _not_ mark the second opening bracket in `[][]` and `[^][]`
// as starting a new char set. ^ ^
exists(int posBeforePrevClosingBracket |
this.charSetDelimiter(index - 2, posBeforePrevClosingBracket) = true
|
result = this.charSetStart(posBeforePrevClosingBracket).booleanNot()
)
else
// if not, `pos` does in fact mark a real start of a character range
result = true
)
)
)
}
/**
* Helper predicate for chars that could be character-set delimiters.
* Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string.
* Result if `true` is the char is `[`, and `false` if the char is `]`.
*/
boolean charSetDelimiter(int index, int pos) {
pos =
rank[index](int p |
(this.nonEscapedCharAt(p) = "[" or this.nonEscapedCharAt(p) = "]") and
// Brackets that art part of POSIX expressions should not count as
// char-set delimiters.
not exists(int x, int y |
this.posixStyleNamedCharacterProperty(x, y, _) and pos >= x and pos < y
)
) and
(
this.nonEscapedCharAt(pos) = "[" and result = true
or
this.nonEscapedCharAt(pos) = "]" and result = false
)
}
predicate charSetStart(int start, int end) {
this.charSetStart(start) = true and
(
this.getChar(start + 1) = "^" and end = start + 2
or
not this.getChar(start + 1) = "^" and end = start + 1
)
}
/** Whether there is a character class, between start (inclusive) and end (exclusive) */
predicate charSet(int start, int end) {
exists(int innerStart, int innerEnd |
this.charSetStart(start, innerStart) and
not this.charSetStart(_, start)
|
end = innerEnd + 1 and
innerEnd =
min(int e |
e > innerStart and
this.nonEscapedCharAt(e) = "]" and
not exists(int x, int y |
this.posixStyleNamedCharacterProperty(x, y, _) and e >= x and e < y
)
|
e
)
)
}
predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
tokenStart =
rank[index](int start, int end | this.charSetToken(charsetStart, start, end) | start) and
this.charSetToken(charsetStart, tokenStart, tokenEnd)
}
/** Either a char or a - */
predicate charSetToken(int charsetStart, int start, int end) {
this.charSetStart(charsetStart, start) and
(
this.escapedCharacter(start, end)
or
this.namedCharacterProperty(start, end, _)
or
exists(this.nonEscapedCharAt(start)) and end = start + 1
)
or
this.charSetToken(charsetStart, _, start) and
(
this.escapedCharacter(start, end)
or
this.namedCharacterProperty(start, end, _)
or
exists(this.nonEscapedCharAt(start)) and
end = start + 1 and
not this.getChar(start) = "]"
)
}
predicate charSetChild(int charsetStart, int start, int end) {
this.charSetToken(charsetStart, start, end) and
not exists(int rangeStart, int rangeEnd |
this.charRange(charsetStart, rangeStart, _, _, rangeEnd) and
rangeStart <= start and
rangeEnd >= end
)
or
this.charRange(charsetStart, start, _, _, end)
}
predicate charRange(int charsetStart, int start, int lowerEnd, int upperStart, int end) {
exists(int index |
this.charRangeEnd(charsetStart, index) = true and
this.charSetToken(charsetStart, index - 2, start, lowerEnd) and
this.charSetToken(charsetStart, index, upperStart, end)
)
}
private boolean charRangeEnd(int charsetStart, int index) {
this.charSetToken(charsetStart, index, _, _) and
(
index in [1, 2] and result = false
or
index > 2 and
exists(int connectorStart |
this.charSetToken(charsetStart, index - 1, connectorStart, _) and
this.nonEscapedCharAt(connectorStart) = "-" and
result =
this.charRangeEnd(charsetStart, index - 2)
.booleanNot()
.booleanAnd(this.charRangeEnd(charsetStart, index - 1).booleanNot())
)
or
not exists(int connectorStart |
this.charSetToken(charsetStart, index - 1, connectorStart, _) and
this.nonEscapedCharAt(connectorStart) = "-"
) and
result = false
)
}
predicate escapingChar(int pos) { this.escaping(pos) = true }
private boolean escaping(int pos) {
pos = -1 and result = false
or
this.getChar(pos) = "\\" and result = this.escaping(pos - 1).booleanNot()
or
this.getChar(pos) != "\\" and result = false
}
/** Gets the text of this regex */
string getText() { result = this.getValueText() }
string getChar(int i) { result = this.getText().charAt(i) }
string nonEscapedCharAt(int i) {
result = this.getText().charAt(i) and
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
}
private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" }
private predicate isGroupEnd(int i) { this.nonEscapedCharAt(i) = ")" and not this.inCharSet(i) }
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
predicate failedToParse(int i) {
exists(this.getChar(i)) and
not exists(int start, int end |
this.topLevel(start, end) and
start <= i and
end > i
)
}
/** Matches named character properties such as `\p{Word}` and `[[:digit:]]` */
predicate namedCharacterProperty(int start, int end, string name) {
pStyleNamedCharacterProperty(start, end, name) or
posixStyleNamedCharacterProperty(start, end, name)
}
/** Gets the name of the character property in start,end */
string getCharacterPropertyName(int start, int end) {
this.namedCharacterProperty(start, end, result)
}
/** Matches a POSIX bracket expression such as `[:alnum:]` within a character class. */
private predicate posixStyleNamedCharacterProperty(int start, int end, string name) {
this.getChar(start) = "[" and
this.getChar(start + 1) = ":" and
end =
min(int e |
e > start and
this.getChar(e - 2) = ":" and
this.getChar(e - 1) = "]"
|
e
) and
exists(int nameStart |
this.getChar(start + 2) = "^" and nameStart = start + 3
or
not this.getChar(start + 2) = "^" and nameStart = start + 2
|
name = this.getText().substring(nameStart, end - 2)
)
}
/**
* Matches named character properties. For example:
* - `\p{Space}`
* - `\P{Digit}` upper-case P means inverted
* - `\p{^Word}` caret also means inverted
*
* These can occur both inside and outside of character classes.
*/
private predicate pStyleNamedCharacterProperty(int start, int end, string name) {
this.escapingChar(start) and
this.getChar(start + 1) in ["p", "P"] and
this.getChar(start + 2) = "{" and
this.getChar(end - 1) = "}" and
end > start and
not exists(int i | start + 2 < i and i < end - 1 | this.getChar(i) = "}") and
exists(int nameStart |
this.getChar(start + 3) = "^" and nameStart = start + 4
or
not this.getChar(start + 3) = "^" and nameStart = start + 3
|
name = this.getText().substring(nameStart, end - 1)
)
}
/**
* Holds if the named character property is inverted. Examples for which it holds:
* - `\P{Digit}` upper-case P means inverted
* - `\p{^Word}` caret also means inverted
* - `[[:^digit:]]`
*
* Examples for which it doesn't hold:
* - `\p{Word}`
* - `\P{^Space}` - upper-case P and caret cancel each other out
* - `[[:alnum:]]`
*/
predicate namedCharacterPropertyIsInverted(int start, int end) {
this.pStyleNamedCharacterProperty(start, end, _) and
exists(boolean upperP, boolean caret |
(if this.getChar(start + 1) = "P" then upperP = true else upperP = false) and
(if this.getChar(start + 3) = "^" then caret = true else caret = false)
|
upperP.booleanXor(caret) = true
)
or
this.posixStyleNamedCharacterProperty(start, end, _) and
this.getChar(start + 3) = "^"
}
predicate escapedCharacter(int start, int end) {
this.escapingChar(start) and
not this.numberedBackreference(start, _, _) and
not this.namedBackreference(start, _, _) and
not this.pStyleNamedCharacterProperty(start, _, _) and
(
// hex char \xhh
this.getChar(start + 1) = "x" and end = start + 4
or
// wide hex char \uhhhh
this.getChar(start + 1) = "u" and end = start + 6
or
// escape not handled above; update when adding a new case
not this.getChar(start + 1) in ["x", "u"] and
not exists(this.getChar(start + 1).toInt()) and
end = start + 2
)
}
predicate inCharSet(int index) {
exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2])
}
predicate inPosixBracket(int index) {
exists(int x, int y |
this.posixStyleNamedCharacterProperty(x, y, _) and index in [x + 1 .. y - 2]
)
}
/** 'Simple' characters are any that don't alter the parsing of the regex. */
private predicate simpleCharacter(int start, int end) {
end = start + 1 and
not this.charSet(start, _) and
not this.charSet(_, start + 1) and
not exists(int x, int y |
this.posixStyleNamedCharacterProperty(x, y, _) and
start >= x and
end <= y
) and
exists(string c | c = this.getChar(start) |
exists(int x, int y, int z |
this.charSet(x, z) and
this.charSetStart(x, y)
|
start = y
or
start = z - 2
or
start > y and start < z - 2 and not this.charRange(_, _, start, end, _)
)
or
not this.inCharSet(start) and
not c = "(" and
not c = "[" and
not c = ")" and
not c = "|" and
not this.qualifier(start, _, _, _)
)
}
predicate character(int start, int end) {
(
this.simpleCharacter(start, end) and
not exists(int x, int y | this.escapedCharacter(x, y) and x <= start and y >= end)
or
this.escapedCharacter(start, end)
) and
not exists(int x, int y | this.groupStart(x, y) and x <= start and y >= end) and
not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end) and
not exists(int x, int y |
this.pStyleNamedCharacterProperty(x, y, _) and x <= start and y >= end
)
}
predicate normalCharacter(int start, int end) {
this.character(start, end) and
not this.specialCharacter(start, end, _)
}
predicate specialCharacter(int start, int end, string char) {
this.character(start, end) and
not this.inCharSet(start) and
(
end = start + 1 and
char = this.getChar(start) and
(char = "$" or char = "^" or char = ".")
or
end = start + 2 and
this.escapingChar(start) and
char = this.getText().substring(start, end) and
char = ["\\A", "\\Z", "\\z"]
)
}
/** Whether the text in the range `start,end` is a group */
predicate group(int start, int end) {
this.groupContents(start, end, _, _)
or
this.emptyGroup(start, end)
}
/** Gets the number of the group in start,end */
int getGroupNumber(int start, int end) {
this.group(start, end) and
result =
count(int i | this.group(i, _) and i < start and not this.nonCapturingGroupStart(i, _)) + 1
}
/** Gets the name, if it has one, of the group in start,end */
string getGroupName(int start, int end) {
this.group(start, end) and
exists(int nameEnd |
this.namedGroupStart(start, nameEnd) and
result = this.getText().substring(start + 4, nameEnd - 1)
)
}
/** Whether the text in the range start, end is a group and can match the empty string. */
predicate zeroWidthMatch(int start, int end) {
this.emptyGroup(start, end)
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookaheadAssertionGroup(start, end)
or
this.positiveLookbehindAssertionGroup(start, end)
}
predicate emptyGroup(int start, int end) {
exists(int endm1 | end = endm1 + 1 |
this.groupStart(start, endm1) and
this.isGroupEnd(endm1)
)
}
private predicate emptyMatchAtStartGroup(int start, int end) {
this.emptyGroup(start, end)
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookaheadAssertionGroup(start, end)
}
private predicate emptyMatchAtEndGroup(int start, int end) {
this.emptyGroup(start, end)
or
this.negativeAssertionGroup(start, end)
or
this.positiveLookbehindAssertionGroup(start, end)
}
private predicate negativeAssertionGroup(int start, int end) {
exists(int inStart |
this.negativeLookaheadAssertionStart(start, inStart)
or
this.negativeLookbehindAssertionStart(start, inStart)
|
this.groupContents(start, end, inStart, _)
)
}
predicate negativeLookaheadAssertionGroup(int start, int end) {
exists(int inStart | this.negativeLookaheadAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
)
}
predicate negativeLookbehindAssertionGroup(int start, int end) {
exists(int inStart | this.negativeLookbehindAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
)
}
predicate positiveLookaheadAssertionGroup(int start, int end) {
exists(int inStart | this.lookaheadAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
)
}
predicate positiveLookbehindAssertionGroup(int start, int end) {
exists(int inStart | this.lookbehindAssertionStart(start, inStart) |
this.groupContents(start, end, inStart, _)
)
}
private predicate groupStart(int start, int end) {
this.nonCapturingGroupStart(start, end)
or
this.namedGroupStart(start, end)
or
this.lookaheadAssertionStart(start, end)
or
this.negativeLookaheadAssertionStart(start, end)
or
this.lookbehindAssertionStart(start, end)
or
this.negativeLookbehindAssertionStart(start, end)
or
this.commentGroupStart(start, end)
or
this.simpleGroupStart(start, end)
}
/** Matches the start of a non-capturing group, e.g. `(?:` */
private predicate nonCapturingGroupStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = ":" and
end = start + 3
}
/** Matches the start of a simple group, e.g. `(a+)`. */
private predicate simpleGroupStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) != "?" and
end = start + 1
}
/**
* Matches the start of a named group, such as:
* - `(?<name>\w+)`
* - `(?'name'\w+)`
*/
private predicate namedGroupStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
(
this.getChar(start + 2) = "<" and
not this.getChar(start + 3) = "=" and // (?<=foo) is a positive lookbehind assertion
not this.getChar(start + 3) = "!" and // (?<!foo) is a negative lookbehind assertion
exists(int nameEnd |
nameEnd = min(int i | i > start + 3 and this.getChar(i) = ">") and
end = nameEnd + 1
)
or
this.getChar(start + 2) = "'" and
exists(int nameEnd |
nameEnd = min(int i | i > start + 2 and this.getChar(i) = "'") and end = nameEnd + 1
)
)
}
/** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
private predicate lookaheadAssertionStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = "=" and
end = start + 3
}
/** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
private predicate negativeLookaheadAssertionStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = "!" and
end = start + 3
}
/** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
private predicate lookbehindAssertionStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = "<" and
this.getChar(start + 3) = "=" and
end = start + 4
}
/** Matches the start of a negative lookbehind assertion, i.e. `(?<!`. */
private predicate negativeLookbehindAssertionStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = "<" and
this.getChar(start + 3) = "!" and
end = start + 4
}
/** Matches the start of a comment group, i.e. `(?#`. */
private predicate commentGroupStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = "#" and
end = start + 3
}
predicate groupContents(int start, int end, int inStart, int inEnd) {
this.groupStart(start, inStart) and
end = inEnd + 1 and
this.topLevel(inStart, inEnd) and
this.isGroupEnd(inEnd)
}
/** Matches a named backreference, e.g. `\k<foo>`. */
predicate namedBackreference(int start, int end, string name) {
this.escapingChar(start) and
this.getChar(start + 1) = "k" and
this.getChar(start + 2) = "<" and
exists(int nameEnd | nameEnd = min(int i | i > start + 3 and this.getChar(i) = ">") |
end = nameEnd + 1 and
name = this.getText().substring(start + 3, nameEnd)
)
}
/** Matches a numbered backreference, e.g. `\1`. */
predicate numberedBackreference(int start, int end, int value) {
this.escapingChar(start) and
not this.getChar(start + 1) = "0" and
exists(string text, string svalue, int len |
end = start + len and
text = this.getText() and
len in [2 .. 3]
|
svalue = text.substring(start + 1, start + len) and
value = svalue.toInt() and
not exists(text.substring(start + 1, start + len + 1).toInt()) and
value > 0
)
}
/** Whether the text in the range `start,end` is a back reference */
predicate backreference(int start, int end) {
this.numberedBackreference(start, end, _)
or
this.namedBackreference(start, end, _)
}
/** Gets the number of the back reference in start,end */
int getBackRefNumber(int start, int end) { this.numberedBackreference(start, end, result) }
/** Gets the name, if it has one, of the back reference in start,end */
string getBackRefName(int start, int end) { this.namedBackreference(start, end, result) }
private predicate baseItem(int start, int end) {
this.character(start, end) and
not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
or
this.group(start, end)
or
this.charSet(start, end)
or
this.backreference(start, end)
or
this.pStyleNamedCharacterProperty(start, end, _)
}
private predicate qualifier(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) {
this.shortQualifier(start, end, maybeEmpty, mayRepeatForever) and
not this.getChar(end) = "?"
or
exists(int shortEnd | this.shortQualifier(start, shortEnd, maybeEmpty, mayRepeatForever) |
if this.getChar(shortEnd) = "?" then end = shortEnd + 1 else end = shortEnd
)
}
private predicate shortQualifier(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) {
(
this.getChar(start) = "+" and maybeEmpty = false and mayRepeatForever = true
or
this.getChar(start) = "*" and maybeEmpty = true and mayRepeatForever = true
or
this.getChar(start) = "?" and maybeEmpty = true and mayRepeatForever = false
) and
end = start + 1
or
exists(string lower, string upper |
this.multiples(start, end, lower, upper) and
(if lower = "" or lower.toInt() = 0 then maybeEmpty = true else maybeEmpty = false) and
if upper = "" then mayRepeatForever = true else mayRepeatForever = false
)
}
predicate multiples(int start, int end, string lower, string upper) {
exists(string text, string match, string inner |
text = this.getText() and
end = start + match.length() and
inner = match.substring(1, match.length() - 1)
|
match = text.regexpFind("\\{[0-9]+\\}", _, start) and
lower = inner and
upper = lower
or
match = text.regexpFind("\\{[0-9]*,[0-9]*\\}", _, start) and
exists(int commaIndex |
commaIndex = inner.indexOf(",") and
lower = inner.prefix(commaIndex) and
upper = inner.suffix(commaIndex + 1)
)
)
}
/**
* Whether the text in the range start,end is a qualified item, where item is a character,
* a character set or a group.
*/
predicate qualifiedItem(int start, int end, boolean maybeEmpty, boolean mayRepeatForever) {
this.qualifiedPart(start, _, end, maybeEmpty, mayRepeatForever)
}
predicate qualifiedPart(
int start, int partEnd, int end, boolean maybeEmpty, boolean mayRepeatForever
) {
this.baseItem(start, partEnd) and
this.qualifier(partEnd, end, maybeEmpty, mayRepeatForever)
}
predicate item(int start, int end) {
this.qualifiedItem(start, end, _, _)
or
this.baseItem(start, end) and not this.qualifier(end, _, _, _)
}
private predicate subsequence(int start, int end) {
(
start = 0 or
this.groupStart(_, start) or
this.isOptionDivider(start - 1)
) and
this.item(start, end)
or
exists(int mid |
this.subsequence(start, mid) and
this.item(mid, end)
)
}
/**
* Whether the text in the range start,end is a sequence of 1 or more items, where an item is a character,
* a character set or a group.
*/
predicate sequence(int start, int end) {
this.sequenceOrQualified(start, end) and
not this.qualifiedItem(start, end, _, _)
}
private predicate sequenceOrQualified(int start, int end) {
this.subsequence(start, end) and
not this.itemStart(end)
}
private predicate itemStart(int start) {
this.character(start, _) or
this.isGroupStart(start) or
this.charSet(start, _) or
this.backreference(start, _) or
this.namedCharacterProperty(start, _, _)
}
private predicate itemEnd(int end) {
this.character(_, end)
or
exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
or
this.charSet(_, end)
or
this.qualifier(_, end, _, _)
}
private predicate topLevel(int start, int end) {
this.subalternation(start, end, _) and
not this.isOptionDivider(end)
}
private predicate subalternation(int start, int end, int itemStart) {
this.sequenceOrQualified(start, end) and
not this.isOptionDivider(start - 1) and
itemStart = start
or
start = end and
not this.itemEnd(start) and
this.isOptionDivider(end) and
itemStart = start
or
exists(int mid |
this.subalternation(start, mid, _) and
this.isOptionDivider(mid) and
itemStart = mid + 1
|
this.sequenceOrQualified(itemStart, end)
or
not this.itemStart(end) and end = itemStart
)
}
/**
* Whether the text in the range start,end is an alternation
*/
predicate alternation(int start, int end) {
this.topLevel(start, end) and
exists(int less | this.subalternation(start, less, _) and less < end)
}
/**
* Whether the text in the range start,end is an alternation and the text in partStart, partEnd is one of the
* options in that alternation.
*/
predicate alternationOption(int start, int end, int partStart, int partEnd) {
this.alternation(start, end) and
this.subalternation(start, partEnd, partStart)
}
/** A part of the regex that may match the start of the string. */
private predicate firstPart(int start, int end) {
start = 0 and end = this.getText().length()
or
exists(int x | this.firstPart(x, end) |
this.emptyMatchAtStartGroup(x, start)
or
this.qualifiedItem(x, start, true, _)
or
// ^ and \A match the start of the string
this.specialCharacter(x, start, ["^", "\\A"])
)
or
exists(int y | this.firstPart(start, y) |
this.item(start, end)
or
this.qualifiedPart(start, end, y, _, _)
)
or
exists(int x, int y | this.firstPart(x, y) |
this.groupContents(x, y, start, end)
or
this.alternationOption(x, y, start, end)
)
}
/** A part of the regex that may match the end of the string. */
private predicate lastPart(int start, int end) {
start = 0 and end = this.getText().length()
or
exists(int y | this.lastPart(start, y) |
this.emptyMatchAtEndGroup(end, y)
or
this.qualifiedItem(end, y, true, _)
or
// $, \Z, and \z match the end of the string.
this.specialCharacter(end, y, ["$", "\\Z", "\\z"])
)
or
exists(int x |
this.lastPart(x, end) and
this.item(start, end)
)
or
exists(int y | this.lastPart(start, y) | this.qualifiedPart(start, end, y, _, _))
or
exists(int x, int y | this.lastPart(x, y) |
this.groupContents(x, y, start, end)
or
this.alternationOption(x, y, start, end)
)
}
/**
* Whether the item at [start, end) is one of the first items
* to be matched.
*/
predicate firstItem(int start, int end) {
(
this.character(start, end)
or
this.qualifiedItem(start, end, _, _)
or
this.charSet(start, end)
) and
this.firstPart(start, end)
}
/**
* Whether the item at [start, end) is one of the last items
* to be matched.
*/
predicate lastItem(int start, int end) {
(
this.character(start, end)
or
this.qualifiedItem(start, end, _, _)
or
this.charSet(start, end)
) and
this.lastPart(start, end)
}
}

View File

@@ -0,0 +1,131 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about
* polynomial regular expression denial-of-service attacks, as well
* as extension points for adding your own.
*/
private import codeql.ruby.AST as AST
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.regexp.ParseRegExp as RegExp
private import codeql.ruby.regexp.RegExpTreeView
private import codeql.ruby.regexp.SuperlinearBackTracking
module PolynomialReDoS {
/**
* A data flow source node for polynomial regular expression denial-of-service vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink node for polynomial regular expression denial-of-service vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/** Gets the regex that is being executed by this node. */
abstract RegExpTerm getRegExp();
/** Gets the node to highlight in the alert message. */
DataFlow::Node getHighlight() { result = this }
}
/**
* A sanitizer for polynomial regular expression denial-of-service vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for polynomial regular expression denial of service
* vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* Gets the AST of a regular expression object that can flow to `node`.
*/
RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) {
exists(DataFlow::LocalSourceNode regexp |
regexp.flowsTo(node) and
result = regexp.asExpr().(CfgNodes::ExprNodes::RegExpLiteralCfgNode).getExpr().getParsed()
)
}
/**
* A regexp match against a superlinear backtracking term, seen as a sink for
* polynomial regular expression denial-of-service vulnerabilities.
*/
class PolynomialBackTrackingTermMatch extends Sink {
PolynomialBackTrackingTerm term;
DataFlow::ExprNode matchNode;
PolynomialBackTrackingTermMatch() {
exists(DataFlow::Node regexp |
term.getRootTerm() = getRegExpObjectFromNode(regexp) and
(
// `=~` or `!~`
exists(CfgNodes::ExprNodes::BinaryOperationCfgNode op |
matchNode.asExpr() = op and
(
op.getExpr() instanceof AST::RegExpMatchExpr or
op.getExpr() instanceof AST::NoRegExpMatchExpr
) and
(
this.asExpr() = op.getLeftOperand() and regexp.asExpr() = op.getRightOperand()
or
this.asExpr() = op.getRightOperand() and regexp.asExpr() = op.getLeftOperand()
)
)
or
// Any of the methods on `String` that take a regexp.
exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
matchNode.asExpr() = call and
call.getExpr().getMethodName() =
[
"[]", "gsub", "gsub!", "index", "match", "match?", "partition", "rindex",
"rpartition", "scan", "slice!", "split", "sub", "sub!"
] and
this.asExpr() = call.getReceiver() and
regexp.asExpr() = call.getArgument(0)
)
or
// A call to `match` or `match?` where the regexp is the receiver.
exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
matchNode.asExpr() = call and
call.getExpr().getMethodName() = ["match", "match?"] and
regexp.asExpr() = call.getReceiver() and
this.asExpr() = call.getArgument(0)
)
)
)
}
override RegExpTerm getRegExp() { result = term }
override DataFlow::Node getHighlight() { result = matchNode }
}
/**
* A check on the length of a string, seen as a sanitizer guard.
*/
class LengthGuard extends SanitizerGuard, CfgNodes::ExprNodes::RelationalOperationCfgNode {
private DataFlow::Node input;
LengthGuard() {
exists(DataFlow::CallNode length, DataFlow::ExprNode operand |
length.asExpr().getExpr().(AST::MethodCall).getMethodName() = "length" and
length.getReceiver() = input and
length.flowsTo(operand) and
operand.getExprNode() = this.getAnOperand()
)
}
override predicate checks(CfgNode node, boolean branch) {
node = input.asExpr() and branch = true
}
}
}

View File

@@ -0,0 +1,37 @@
/**
* Provides a taint tracking configuration for reasoning about polynomial
* regular expression denial-of-service attacks.
*
* Note, for performance reasons: only import this file if `Configuration` is
* needed. Otherwise, `PolynomialReDoSCustomizations` should be imported
* instead.
*/
private import codeql.ruby.DataFlow
private import codeql.ruby.TaintTracking
/**
* Provides a taint-tracking configuration for detecting polynomial regular
* expression denial of service vulnerabilities.
*/
module PolynomialReDoS {
import PolynomialReDoSCustomizations::PolynomialReDoS
/**
* A taint-tracking configuration for detecting polynomial regular expression
* denial of service vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PolynomialReDoS" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard node) {
node instanceof SanitizerGuard
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,724 @@
private import codeql.ruby.ast.Literal as AST
private import codeql.Locations
private import ParseRegExp
/**
* An element containing a regular expression term, that is, either
* a string literal (parsed as a regular expression)
* or another regular expression term.
*/
class RegExpParent extends TRegExpParent {
string toString() { result = "RegExpParent" }
RegExpTerm getChild(int i) { none() }
RegExpTerm getAChild() { result = getChild(_) }
int getNumChild() { result = count(getAChild()) }
/**
* Gets the name of a primary CodeQL class to which this regular
* expression term belongs.
*/
string getAPrimaryQlClass() { result = "RegExpParent" }
/**
* Gets a comma-separated list of the names of the primary CodeQL classes to
* which this regular expression term belongs.
*/
final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
}
class RegExpLiteral extends TRegExpLiteral, RegExpParent {
RegExp re;
RegExpLiteral() { this = TRegExpLiteral(re) }
override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
predicate isDotAll() { re.hasMultilineFlag() }
override string getAPrimaryQlClass() { result = "RegExpLiteral" }
}
class RegExpTerm extends RegExpParent {
RegExp re;
int start;
int end;
RegExpTerm() {
this = TRegExpAlt(re, start, end)
or
this = TRegExpBackRef(re, start, end)
or
this = TRegExpCharacterClass(re, start, end)
or
this = TRegExpCharacterRange(re, start, end)
or
this = TRegExpNormalChar(re, start, end)
or
this = TRegExpGroup(re, start, end)
or
this = TRegExpQuantifier(re, start, end)
or
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
or
this = TRegExpSpecialChar(re, start, end)
or
this = TRegExpNamedCharacterProperty(re, start, end)
}
RegExpTerm getRootTerm() {
this.isRootTerm() and result = this
or
result = getParent().(RegExpTerm).getRootTerm()
}
predicate isUsedAsRegExp() { any() }
predicate isRootTerm() { start = 0 and end = re.getText().length() }
override RegExpTerm getChild(int i) {
result = this.(RegExpAlt).getChild(i)
or
result = this.(RegExpBackRef).getChild(i)
or
result = this.(RegExpCharacterClass).getChild(i)
or
result = this.(RegExpCharacterRange).getChild(i)
or
result = this.(RegExpNormalChar).getChild(i)
or
result = this.(RegExpGroup).getChild(i)
or
result = this.(RegExpQuantifier).getChild(i)
or
result = this.(RegExpSequence).getChild(i)
or
result = this.(RegExpSpecialChar).getChild(i)
or
result = this.(RegExpNamedCharacterProperty).getChild(i)
}
RegExpParent getParent() { result.getAChild() = this }
RegExp getRegExp() { result = re }
int getStart() { result = start }
int getEnd() { result = end }
override string toString() { result = re.getText().substring(start, end) }
override string getAPrimaryQlClass() { result = "RegExpTerm" }
Location getLocation() { result = re.getLocation() }
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int re_start, int re_end |
re.getComponent(0).getLocation().hasLocationInfo(filepath, startline, re_start, _, _) and
re.getComponent(re.getNumberOfComponents() - 1)
.getLocation()
.hasLocationInfo(filepath, _, _, endline, re_end)
|
startcolumn = re_start + start and
endcolumn = re_start + end - 1
)
}
File getFile() { result = this.getLocation().getFile() }
string getRawValue() { result = this.toString() }
RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
/** Gets the regular expression term that is matched (textually) before this one, if any. */
RegExpTerm getPredecessor() {
exists(RegExpTerm parent | parent = getParent() |
result = parent.(RegExpSequence).previousElement(this)
or
not exists(parent.(RegExpSequence).previousElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getPredecessor()
)
}
/** Gets the regular expression term that is matched (textually) after this one, if any. */
RegExpTerm getSuccessor() {
exists(RegExpTerm parent | parent = getParent() |
result = parent.(RegExpSequence).nextElement(this)
or
not exists(parent.(RegExpSequence).nextElement(this)) and
not parent instanceof RegExpSubPattern and
result = parent.getSuccessor()
)
}
}
newtype TRegExpParent =
TRegExpLiteral(RegExp re) or
TRegExpQuantifier(RegExp re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
TRegExpSequence(RegExp re, int start, int end) { re.sequence(start, end) } or
TRegExpAlt(RegExp re, int start, int end) { re.alternation(start, end) } or
TRegExpCharacterClass(RegExp re, int start, int end) { re.charSet(start, end) } or
TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or
TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
TRegExpNormalChar(RegExp re, int start, int end) { re.normalCharacter(start, end) } or
TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or
TRegExpNamedCharacterProperty(RegExp re, int start, int end) {
re.namedCharacterProperty(start, end, _)
}
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
int part_end;
boolean maybe_empty;
boolean may_repeat_forever;
RegExpQuantifier() {
this = TRegExpQuantifier(re, start, end) and
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
result.getEnd() = part_end
}
predicate mayRepeatForever() { may_repeat_forever = true }
string getQualifier() { result = re.getText().substring(part_end, end) }
override string getAPrimaryQlClass() { result = "RegExpQuantifier" }
}
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
override string getAPrimaryQlClass() { result = "InfiniteRepetitionQuantifier" }
}
class RegExpStar extends InfiniteRepetitionQuantifier {
RegExpStar() { this.getQualifier().charAt(0) = "*" }
override string getAPrimaryQlClass() { result = "RegExpStar" }
}
class RegExpPlus extends InfiniteRepetitionQuantifier {
RegExpPlus() { this.getQualifier().charAt(0) = "+" }
override string getAPrimaryQlClass() { result = "RegExpPlus" }
}
class RegExpOpt extends RegExpQuantifier {
RegExpOpt() { this.getQualifier().charAt(0) = "?" }
override string getAPrimaryQlClass() { result = "RegExpOpt" }
}
class RegExpRange extends RegExpQuantifier {
string upper;
string lower;
RegExpRange() { re.multiples(part_end, end, lower, upper) }
string getUpper() { result = upper }
string getLower() { result = lower }
/**
* Gets the upper bound of the range, if any.
*
* If there is no upper bound, any number of repetitions is allowed.
* For a term of the form `r{lo}`, both the lower and the upper bound
* are `lo`.
*/
int getUpperBound() { result = this.getUpper().toInt() }
/** Gets the lower bound of the range. */
int getLowerBound() { result = this.getLower().toInt() }
override string getAPrimaryQlClass() { result = "RegExpRange" }
}
class RegExpSequence extends RegExpTerm, TRegExpSequence {
RegExpSequence() {
this = TRegExpSequence(re, start, end) and
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
}
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
/** Gets the element preceding `element` in this sequence. */
RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
/** Gets the element following `element` in this sequence. */
RegExpTerm nextElement(RegExpTerm element) {
exists(int i |
element = this.getChild(i) and
result = this.getChild(i + 1)
)
}
override string getAPrimaryQlClass() { result = "RegExpSequence" }
}
pragma[nomagic]
private int seqChildEnd(RegExp re, int start, int end, int i) {
result = seqChild(re, start, end, i).getEnd()
}
// moved out so we can use it in the charpred
private RegExpTerm seqChild(RegExp re, int start, int end, int i) {
re.sequence(start, end) and
(
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
exists(int itemEnd |
re.item(start, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegExp() = re and
exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
result.getStart() = itemStart and
re.item(itemStart, result.getEnd())
)
)
}
class RegExpAlt extends RegExpTerm, TRegExpAlt {
RegExpAlt() { this = TRegExpAlt(re, start, end) }
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
exists(int part_end |
re.alternationOption(start, end, start, part_end) and
result.getEnd() = part_end
)
or
i > 0 and
result.getRegExp() = re and
exists(int part_start |
part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
|
result.getStart() = part_start and
re.alternationOption(start, end, part_start, result.getEnd())
)
}
override string getAPrimaryQlClass() { result = "RegExpAlt" }
}
class RegExpEscape extends RegExpNormalChar {
RegExpEscape() { re.escapedCharacter(start, end) }
/**
* Gets the name of the escaped; for example, `w` for `\w`.
* TODO: Handle named escapes.
*/
override string getValue() {
this.isIdentityEscape() and result = this.getUnescaped()
or
this.getUnescaped() = "n" and result = "\n"
or
this.getUnescaped() = "r" and result = "\r"
or
this.getUnescaped() = "t" and result = "\t"
or
isUnicode() and
result = getUnicode()
}
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t"] }
/**
* Gets the text for this escape. That is e.g. "\w".
*/
private string getText() { result = re.getText().substring(start, end) }
/**
* Holds if this is a unicode escape.
*/
private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
/**
* Gets the unicode char for this escape.
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
isUnicode() and
exists(string hex, string char | hex = getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
string getUnescaped() { result = this.getText().suffix(1) }
override string getAPrimaryQlClass() { result = "RegExpEscape" }
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* A character class escape in a regular expression.
* That is, an escaped character that denotes multiple characters.
*
* Examples:
*
* ```
* \w
* \S
* ```
*/
class RegExpCharacterClassEscape extends RegExpEscape {
RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W", "h", "H"] }
/** Gets the name of the character class; for example, `w` for `\w`. */
// override string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" }
}
/**
* A character class.
*
* Examples:
*
* ```rb
* /[a-fA-F0-9]/
* /[^abc]/
* ```
*/
class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
predicate isInverted() { re.getChar(start + 1) = "^" }
predicate isUniversalClass() {
// [^]
isInverted() and not exists(getAChild())
or
// [\w\W] and similar
not isInverted() and
exists(string cce1, string cce2 |
cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
exists(int itemStart, int itemEnd |
result.getStart() = itemStart and
re.charSetStart(start, itemStart) and
re.charSetChild(start, itemStart, itemEnd) and
result.getEnd() = itemEnd
)
or
i > 0 and
result.getRegExp() = re and
exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
result.getStart() = itemStart and
re.charSetChild(start, itemStart, result.getEnd())
)
}
override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
}
class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
int lower_end;
int upper_start;
RegExpCharacterRange() {
this = TRegExpCharacterRange(re, start, end) and
re.charRange(_, start, lower_end, upper_start, end)
}
predicate isRange(string lo, string hi) {
lo = re.getText().substring(start, lower_end) and
hi = re.getText().substring(upper_start, end)
}
override RegExpTerm getChild(int i) {
i = 0 and
result.getRegExp() = re and
result.getStart() = start and
result.getEnd() = lower_end
or
i = 1 and
result.getRegExp() = re and
result.getStart() = upper_start and
result.getEnd() = end
}
override string getAPrimaryQlClass() { result = "RegExpCharacterRange" }
}
class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpNormalChar" }
}
class RegExpConstant extends RegExpTerm {
string value;
RegExpConstant() {
this = TRegExpNormalChar(re, start, end) and
not this instanceof RegExpCharacterClassEscape and
// exclude chars in qualifiers
// TODO: push this into regex library
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
qstart <= start and end <= qend
) and
value = this.(RegExpNormalChar).getValue()
or
this = TRegExpSpecialChar(re, start, end) and
re.inCharSet(start) and
value = this.(RegExpSpecialChar).getChar()
}
predicate isCharacter() { any() }
string getValue() { result = value }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpConstant" }
}
class RegExpGroup extends RegExpTerm, TRegExpGroup {
RegExpGroup() { this = TRegExpGroup(re, start, end) }
/**
* Gets the index of this capture group within the enclosing regular
* expression literal.
*
* For example, in the regular expression `/((a?).)(?:b)/`, the
* group `((a?).)` has index 1, the group `(a?)` nested inside it
* has index 2, and the group `(?:b)` has no index, since it is
* not a capture group.
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }
/** Gets the name of this capture group, if any. */
string getName() { result = re.getGroupName(start, end) }
predicate isCharacter() { any() }
string getValue() { result = re.getText().substring(start, end) }
override RegExpTerm getChild(int i) {
result.getRegExp() = re and
i = 0 and
re.groupContents(start, end, result.getStart(), result.getEnd())
}
override string getAPrimaryQlClass() { result = "RegExpGroup" }
}
class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
string char;
RegExpSpecialChar() {
this = TRegExpSpecialChar(re, start, end) and
re.specialCharacter(start, end, char)
}
predicate isCharacter() { any() }
string getChar() { result = char }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpSpecialChar" }
}
class RegExpDot extends RegExpSpecialChar {
RegExpDot() { this.getChar() = "." }
override string getAPrimaryQlClass() { result = "RegExpDot" }
}
class RegExpDollar extends RegExpSpecialChar {
RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] }
override string getAPrimaryQlClass() { result = "RegExpDollar" }
}
class RegExpCaret extends RegExpSpecialChar {
RegExpCaret() { this.getChar() = ["^", "\\A"] }
override string getAPrimaryQlClass() { result = "RegExpCaret" }
}
class RegExpZeroWidthMatch extends RegExpGroup {
RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
override predicate isCharacter() { any() }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpZeroWidthMatch" }
}
/**
* A zero-width lookahead or lookbehind assertion.
*
* Examples:
*
* ```
* (?=\w)
* (?!\n)
* (?<=\.)
* (?<!\\)
* ```
*/
class RegExpSubPattern extends RegExpZeroWidthMatch {
RegExpSubPattern() { not re.emptyGroup(start, end) }
/** Gets the lookahead term. */
RegExpTerm getOperand() {
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
result.getRegExp() = re and
result.getStart() = in_start and
result.getEnd() = in_end
)
}
}
abstract class RegExpLookahead extends RegExpSubPattern { }
class RegExpPositiveLookahead extends RegExpLookahead {
RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpPositiveLookahead" }
}
class RegExpNegativeLookahead extends RegExpLookahead {
RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpNegativeLookahead" }
}
abstract class RegExpLookbehind extends RegExpSubPattern { }
class RegExpPositiveLookbehind extends RegExpLookbehind {
RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpPositiveLookbehind" }
}
class RegExpNegativeLookbehind extends RegExpLookbehind {
RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
override string getAPrimaryQlClass() { result = "RegExpNegativeLookbehind" }
}
class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
/**
* Gets the number of the capture group this back reference refers to, if any.
*/
int getNumber() { result = re.getBackRefNumber(start, end) }
/**
* Gets the name of the capture group this back reference refers to, if any.
*/
string getName() { result = re.getBackRefName(start, end) }
/** Gets the capture group this back reference refers to. */
RegExpGroup getGroup() {
result.getLiteral() = this.getLiteral() and
(
result.getNumber() = this.getNumber() or
result.getName() = this.getName()
)
}
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpBackRef" }
}
/**
* A named character property. For example, the POSIX bracket expression
* `[[:digit:]]`.
*/
class RegExpNamedCharacterProperty extends RegExpTerm, TRegExpNamedCharacterProperty {
RegExpNamedCharacterProperty() { this = TRegExpNamedCharacterProperty(re, start, end) }
override RegExpTerm getChild(int i) { none() }
override string getAPrimaryQlClass() { result = "RegExpNamedCharacterProperty" }
/**
* Gets the property name. For example, in `\p{Space}`, the result is
* `"Space"`.
*/
string getName() { result = re.getCharacterPropertyName(start, end) }
/**
* Holds if the property is inverted. For example, it holds for `\p{^Digit}`,
* which matches non-digits.
*/
predicate isInverted() { re.namedCharacterPropertyIsInverted(start, end) }
}
RegExpTerm getParsedRegExp(AST::RegExpLiteral re) {
result.getRegExp() = re and result.isRootTerm()
}

View File

@@ -0,0 +1,420 @@
/**
* Provides classes for working with regular expressions that can
* perform backtracking in superlinear time.
*/
import ReDoSUtil
/*
* This module implements the analysis described in the paper:
* Valentin Wustholz, Oswaldo Olivo, Marijn J. H. Heule, and Isil Dillig:
* Static Detection of DoS Vulnerabilities in
* Programs that use Regular Expressions
* (Extended Version).
* (https://arxiv.org/pdf/1701.04045.pdf)
*
* Theorem 3 from the paper describes the basic idea.
*
* The following explains the idea using variables and predicate names that are used in the implementation:
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
*
* We create a product automaton of 3-tuples of states (see `StateTuple`).
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
*
* We start a search in the product automaton at `(pivot, pivot, succ)`,
* and search for a series of transitions (a `Trace`), such that we end
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
*
* For example, consider the regular expression `/^\d*5\w*$/`.
* The search will start at the tuple `(\d*, \d*, \w*)` and search
* for a path to `(\d*, \w*, \w*)`.
* This path exists, and consists of a single transition in the product automaton,
* where the three corresponding NFA edges all match the character `"5"`.
*
* The start-state in the NFA has an any-transition to itself, this allows us to
* flag regular expressions such as `/a*$/` - which does not have a start anchor -
* and can thus start matching anywhere.
*
* The implementation is not perfect.
* It has the same suffix detection issue as the `js/redos` query, which can cause false positives.
* It also doesn't find all transitions in the product automaton, which can cause false negatives.
*/
/**
* An instantiaion of `ReDoSConfiguration` for superlinear ReDoS.
*/
class SuperLinearReDoSConfiguration extends ReDoSConfiguration {
SuperLinearReDoSConfiguration() { this = "SuperLinearReDoSConfiguration" }
override predicate isReDoSCandidate(State state, string pump) { isPumpable(_, state, pump) }
}
/**
* Gets any root (start) state of a regular expression.
*/
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
* trick where `q1 <= q2`. This trick cannot be used here as the order
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
/**
* Gest a string repesentation of this tuple.
*/
string toString() { result = "(" + q1 + ", " + q2 + ", " + q3 + ")" }
/**
* Holds if this tuple is `(r1, r2, r3)`.
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
}
/**
* A module for determining feasible tuples for the product automaton.
*
* The implementation is split into many predicates for performance reasons.
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
}
/**
* Holds if `s` is either inside a repetition, or is the start state (which is a repetition).
*/
pragma[noinline]
private predicate isRepetitionOrStart(State s) { stateInsideRepetition(s) or s = getRootState() }
/**
* Holds if state `s` might be inside a backtracking repetition.
*/
pragma[noinline]
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) { delta+(s) = any(State succ | isStartLoops(_, succ)) }
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
pivot = mkMatch(any(RegExpRoot root))
)
}
/**
* Gets a state for which there exists a transition in the NFA from `s'.
*/
State delta(State s) { delta(s, _, result) }
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
*/
pragma[noopt]
predicate step(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
deltaClosed(q1, s1, r1) and
deltaClosed(q2, s2, r2) and
deltaClosed(q3, s3, r3) and
// use noopt to force the join on `getAThreewayIntersect` to happen last.
exists(getAThreewayIntersect(s1, s2, s3))
)
}
/**
* Gets a char that is matched by all the edges `s1`, `s2`, and `s3`.
*
* The result is not complete, and might miss some combination of edges that share some character.
*/
pragma[noinline]
string getAThreewayIntersect(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
result = minAndMaxIntersect(s1, s2) and result = [intersect(s2, s3), intersect(s1, s3)]
or
result = minAndMaxIntersect(s1, s3) and result = [intersect(s2, s3), intersect(s1, s2)]
or
result = minAndMaxIntersect(s2, s3) and result = [intersect(s1, s2), intersect(s1, s3)]
}
/**
* Gets the minimum and maximum characters that intersect between `a` and `b`.
* This predicate is used to limit the size of `getAThreewayIntersect`.
*/
pragma[noinline]
string minAndMaxIntersect(InputSymbol a, InputSymbol b) {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
exists(StateTuple p |
isReachableFromStartTuple(_, _, p, t, _) and
step(p, s1, s2, s3, _)
)
or
exists(State pivot, State succ | isStartLoops(pivot, succ) |
t = Nil() and step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, _)
)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
}
/**
* Gets a string corresponding to the trace `t`.
*/
string concretise(Trace t) {
t = Nil() and result = ""
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace rest | t = Step(s1, s2, s3, rest) |
result = concretise(rest) + getAThreewayIntersect(s1, s2, s3)
)
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
*
* The implementation searches backwards from the end-tuple.
* This approach was chosen because it is way more efficient if the first predicate given to `shortestDistances` is small.
* The `end` argument must always be an end state.
*/
int distBackFromEnd(StateTuple r, StateTuple end) =
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
*/
predicate isReachableFromStartTuple(State pivot, State succ, StateTuple tuple, Trace trace, int dist) {
// base case. The first step is inlined to start the search after all possible 1-steps, and not just the ones with the shortest path.
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, State q1, State q2, State q3 |
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(q1, q2, q3) and
trace = Step(s1, s2, s3, Nil()) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
)
or
// recursive case
exists(StateTuple p, Trace v, InputSymbol s1, InputSymbol s2, InputSymbol s3 |
isReachableFromStartTuple(pivot, succ, p, v, dist + 1) and
dist = isReachableFromStartTupleHelper(pivot, succ, tuple, p, s1, s2, s3) and
trace = Step(s1, s2, s3, v)
)
}
/**
* Helper predicate for the recursive case in `isReachableFromStartTuple`.
*/
pragma[noinline]
private int isReachableFromStartTupleHelper(
State pivot, State succ, StateTuple r, StateTuple p, InputSymbol s1, InputSymbol s2,
InputSymbol s3
) {
result = distBackFromEnd(r, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, r)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
*
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
exists(StateTuple q, Trace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
pump = concretise(t)
)
}
/**
* Holds if repetitions of `pump` at `t` will cause polynomial backtracking.
*/
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot |
hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and
prev = pivot.getRepr()
)
}
/**
* Gets a message for why `term` can cause polynomial backtracking.
*/
string getReasonString(RegExpTerm term, string pump, string prefixMsg, RegExpTerm prev) {
polynimalReDoS(term, pump, prefixMsg, prev) and
result =
"Strings " + prefixMsg + "with many repetitions of '" + pump +
"' can start matching anywhere after the start of the preceeding " + prev
}
/**
* A term that may cause a regular expression engine to perform a
* polynomial number of match attempts, relative to the input length.
*/
class PolynomialBackTrackingTerm extends InfiniteRepetitionQuantifier {
string reason;
string pump;
string prefixMsg;
RegExpTerm prev;
PolynomialBackTrackingTerm() {
reason = getReasonString(this, pump, prefixMsg, prev) and
// there might be many reasons for this term to have polynomial backtracking - we pick the shortest one.
reason = min(string msg | msg = getReasonString(this, _, _, _) | msg order by msg.length(), msg)
}
/**
* Holds if all non-empty successors to the polynomial backtracking term matches the end of the line.
*/
predicate isAtEndLine() {
forall(RegExpTerm succ | this.getSuccessor+() = succ and not matchesEpsilon(succ) |
succ instanceof RegExpDollar
)
}
/**
* Gets the string that should be repeated to cause this regular expression to perform polynomially.
*/
string getPumpString() { result = pump }
/**
* Gets a message for which prefix a matching string must start with for this term to cause polynomial backtracking.
*/
string getPrefixMessage() { result = prefixMsg }
/**
* Gets a predecessor to `this`, which also loops on the pump string, and thereby causes polynomial backtracking.
*/
RegExpTerm getPreviousLoop() { result = prev }
/**
* Gets the reason for the number of match attempts.
*/
string getReason() { result = reason }
}

Some files were not shown because too many files have changed in this diff Show More