Merge remote-tracking branch 'upstream/main' into incomplete-url-string-sanitization

Conflicts:
	config/identical-files.json
	javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql
	javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll
	ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll
This commit is contained in:
Arthur Baars
2022-03-18 16:08:59 +01:00
1308 changed files with 77789 additions and 53601 deletions

View File

@@ -1,3 +1,10 @@
## 0.0.11
### Minor Analysis Improvements
* The `Regex` class is now an abstract class that extends `StringlikeLiteral` with implementations for `RegExpLiteral` and string literals that 'flow' into functions that are known to interpret string arguments as regular expressions such as `Regex.new` and `String.match`.
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.
## 0.0.10
### Minor Analysis Improvements

View File

@@ -0,0 +1,5 @@
---
category: deprecated
---
* Many classes/predicates/modules that had upper-case acronyms have been renamed to follow our style-guide.
The old name still exists as a deprecated alias.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added `OrmWriteAccess` concept to model data written to a database using an object-relational mapping (ORM) library.

View File

@@ -0,0 +1,4 @@
---
category: feature
---
* The data flow and taint tracking libraries have been extended with versions of `isBarrierIn`, `isBarrierOut`, and `isBarrierGuard`, respectively `isSanitizerIn`, `isSanitizerOut`, and `isSanitizerGuard`, that support flow states.

View File

@@ -0,0 +1,4 @@
---
category: breaking
---
* The flow state variants of `isBarrier` and `isAdditionalFlowStep` are no longer exposed in the taint tracking library. The `isSanitizer` and `isAdditionalTaintStep` predicates should be used instead.

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* `getConstantValue()` now returns the contents of strings and symbols after escape sequences have been interpreted. For example, for the Ruby string literal `"\n"`, `getConstantValue().getString()` previously returned a QL string with two characters, a backslash followed by `n`; now it returns the single-character string "\n" (U+000A, known as newline).
* `getConstantValue().getInt()` previously returned incorrect values for integers larger than 2<sup>31</sup>-1 (the largest value that can be represented by the QL `int` type). It now returns no result in those cases.

View File

@@ -1,4 +1,6 @@
---
category: minorAnalysis
---
## 0.0.11
### Minor Analysis Improvements
* The `Regex` class is now an abstract class that extends `StringlikeLiteral` with implementations for `RegExpLiteral` and string literals that 'flow' into functions that are known to interpret string arguments as regular expressions such as `Regex.new` and `String.match`.
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.0.10
lastReleaseVersion: 0.0.11

View File

@@ -0,0 +1,129 @@
/**
* Provides predicates for working with numeric values and their string
* representations.
*/
/**
* Gets the integer value of `binary` when interpreted as binary. `binary` must
* contain only the digits 0 and 1. For values greater than
* 01111111111111111111111111111111 (2^31-1, the maximum value that `int` can
* represent), there is no result.
*
* ```
* "0" => 0
* "01" => 1
* "1010101" => 85
* ```
*/
bindingset[binary]
int parseBinaryInt(string binary) {
exists(string stripped | stripped = stripLeadingZeros(binary) |
stripped.length() <= 31 and
result >= 0 and
result =
sum(int index, string c, int digit |
c = stripped.charAt(index) and
digit = "01".indexOf(c)
|
twoToThe(stripped.length() - 1 - index) * digit
)
)
}
/**
* Gets the integer value of `hex` when interpreted as hex. `hex` must be a
* valid hexadecimal string. For values greater than 7FFFFFFF (2^31-1, the
* maximum value that `int` can represent), there is no result.
*
* ```
* "0" => 0
* "FF" => 255
* "f00d" => 61453
* ```
*/
bindingset[hex]
int parseHexInt(string hex) {
exists(string stripped | stripped = stripLeadingZeros(hex) |
stripped.length() <= 8 and
result >= 0 and
result =
sum(int index, string c |
c = stripped.charAt(index)
|
sixteenToThe(stripped.length() - 1 - index) * toHex(c)
)
)
}
/**
* Gets the integer value of `octal` when interpreted as octal. `octal` must be
* a valid octal string containing only the digits 0-7. For values greater than
* 17777777777 (2^31-1, the maximum value that `int` can represent), there is no
* result.
*
* ```
* "0" => 0
* "77" => 63
* "76543210" => 16434824
* ```
*/
bindingset[octal]
int parseOctalInt(string octal) {
exists(string stripped | stripped = stripLeadingZeros(octal) |
stripped.length() <= 11 and
result >= 0 and
result =
sum(int index, string c, int digit |
c = stripped.charAt(index) and
digit = "01234567".indexOf(c)
|
eightToThe(stripped.length() - 1 - index) * digit
)
)
}
/** Gets the integer value of the `hex` char. */
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* Gets the value of 16 to the power of `n`. Holds only for `n` in the range
* 0..7 (inclusive).
*/
int sixteenToThe(int n) {
// 16**7 is the largest power of 16 that fits in an int.
n in [0 .. 7] and result = 1.bitShiftLeft(4 * n)
}
/**
* Gets the value of 8 to the power of `n`. Holds only for `n` in the range
* 0..10 (inclusive).
*/
int eightToThe(int n) {
// 8**10 is the largest power of 8 that fits in an int.
n in [0 .. 10] and result = 1.bitShiftLeft(3 * n)
}
/**
* Gets the value of 2 to the power of `n`. Holds only for `n` in the range
* 0..30 (inclusive).
*/
int twoToThe(int n) { n in [0 .. 30] and result = 1.bitShiftLeft(n) }
/** Gets `s` with any leading "0" characters removed. */
bindingset[s]
private string stripLeadingZeros(string s) { result = s.regexpCapture("0*(.*)", 1) }

View File

@@ -266,6 +266,41 @@ module API {
/** A node corresponding to the method being invoked at a method call. */
class MethodAccessNode extends Node, Impl::MkMethodAccessNode {
override string toString() { result = "MethodAccessNode " + tryGetPath(this) }
/** Gets the call node corresponding to this method access. */
DataFlow::CallNode getCallNode() { this = Impl::MkMethodAccessNode(result) }
}
/**
* An API entry point.
*
* By default, API graph nodes are only created for nodes that come from an external
* library or escape into an external library. The points where values are cross the boundary
* between codebases are called "entry points".
*
* Anything in the global scope is considered to be an entry point, but
* additional entry points may be added by extending this class.
*/
abstract class EntryPoint extends string {
bindingset[this]
EntryPoint() { any() }
/** Gets a data-flow node corresponding to a use-node for this entry point. */
DataFlow::LocalSourceNode getAUse() { none() }
/** Gets a data-flow node corresponding to a def-node for this entry point. */
DataFlow::Node getARhs() { none() }
/** Gets a call corresponding to a method access node for this entry point. */
DataFlow::CallNode getACall() { none() }
/** Gets an API-node for this entry point. */
API::Node getANode() { result = root().getASuccessor(Label::entryPoint(this)) }
}
// Ensure all entry points are imported from ApiGraphs.qll
private module ImportEntryPoints {
private import codeql.ruby.frameworks.data.ModelsAsData
}
/** Gets the root node. */
@@ -324,7 +359,7 @@ module API {
/**
* Holds if `ref` is a use of a node that should have an incoming edge from the root
* node labeled `lbl` in the API graph.
* node labeled `lbl` in the API graph (not including those from API::EntryPoint).
*/
pragma[nomagic]
private predicate useRoot(Label::ApiLabel lbl, DataFlow::Node ref) {
@@ -371,6 +406,10 @@ module API {
useCandFwd().flowsTo(nd.(DataFlow::CallNode).getReceiver())
or
parameterStep(_, defCand(), nd)
or
nd = any(EntryPoint entry).getAUse()
or
nd = any(EntryPoint entry).getACall()
}
/**
@@ -416,6 +455,8 @@ module API {
private predicate isDef(DataFlow::Node rhs) {
// If a call node is relevant as a use-node, treat its arguments as def-nodes
argumentStep(_, useCandFwd(), rhs)
or
rhs = any(EntryPoint entry).getARhs()
}
/** Gets a data flow node that flows to the RHS of a def-node. */
@@ -590,6 +631,17 @@ module API {
)
)
)
or
exists(EntryPoint entry |
pred = root() and
lbl = Label::entryPoint(entry)
|
succ = MkDef(entry.getARhs())
or
succ = MkUse(entry.getAUse())
or
succ = MkMethodAccessNode(entry.getACall())
)
}
/**
@@ -619,7 +671,8 @@ module API {
or
any(DataFlowDispatch::ParameterPosition c).isPositional(n)
} or
MkLabelBlockParameter()
MkLabelBlockParameter() or
MkLabelEntryPoint(EntryPoint name)
}
/** Provides classes modeling the various edges (labels) in the API graph. */
@@ -710,6 +763,18 @@ module API {
override string toString() { result = "getBlock()" }
}
/** A label from the root node to a custom entry point. */
class LabelEntryPoint extends ApiLabel {
private API::EntryPoint name;
LabelEntryPoint() { this = MkLabelEntryPoint(name) }
override string toString() { result = name }
/** Gets the name of the entry point. */
API::EntryPoint getName() { result = name }
}
}
/** Gets the `member` edge label for member `m`. */
@@ -735,5 +800,8 @@ module API {
/** Gets the label representing the block argument/parameter. */
LabelBlockParameter blockParameter() { any() }
/** Gets the label for the edge from the root node to a custom entry point of the given name. */
LabelEntryPoint entryPoint(API::EntryPoint name) { result.getName() = name }
}
}

View File

@@ -625,6 +625,35 @@ module OrmInstantiation {
}
}
/**
* A data flow node that writes persistent data.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `PersistentWriteAccess::Range` instead.
*/
class PersistentWriteAccess extends DataFlow::Node instanceof PersistentWriteAccess::Range {
/**
* Gets the data flow node corresponding to the written value.
*/
DataFlow::Node getValue() { result = super.getValue() }
}
/** Provides a class for modeling new persistent write access APIs. */
module PersistentWriteAccess {
/**
* A data flow node that writes persistent data.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `PersistentWriteAccess` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the data flow node corresponding to the written value.
*/
abstract DataFlow::Node getValue();
}
}
/**
* A data-flow node that may set or unset Cross-site request forgery protection.
*

View File

@@ -24,21 +24,8 @@ class Expr extends Stmt, TExpr {
}
}
/**
* A reference to the current object. For example:
* - `self == other`
* - `self.method_name`
* - `def self.method_name ... end`
*
* This also includes implicit references to the current object in method
* calls. For example, the method call `foo(123)` has an implicit `self`
* receiver, and is equivalent to the explicit `self.foo(123)`.
*/
class Self extends Expr, TSelf {
final override string getAPrimaryQlClass() { result = "Self" }
final override string toString() { result = "self" }
}
/** DEPRECATED: Use `SelfVariableAccess` instead. */
deprecated class Self = SelfVariableAccess;
/**
* A sequence of expressions in the right-hand side of an assignment or

View File

@@ -226,9 +226,51 @@ class StringComponent extends AstNode, TStringComponent {
* ```
*/
class StringTextComponent extends StringComponent, TStringTextComponentNonRegexp {
final override string getAPrimaryQlClass() { result = "StringTextComponent" }
/** Gets the text of this component as it appears in the source code. */
string getRawText() { none() }
}
private class StringTextComponentStringOrHeredocContent extends StringTextComponent,
TStringTextComponentNonRegexpStringOrHeredocContent {
private Ruby::Token g;
StringTextComponent() { this = TStringTextComponentNonRegexp(g) }
StringTextComponentStringOrHeredocContent() {
this = TStringTextComponentNonRegexpStringOrHeredocContent(g)
}
final override string toString() { result = this.getRawText() }
final override ConstantValue::ConstantStringValue getConstantValue() {
result.isString(this.getUnescapedText())
}
final override string getRawText() { result = g.getValue() }
final private string getUnescapedText() { result = unescapeTextComponent(this.getRawText()) }
}
private class StringTextComponentSimpleSymbol extends StringTextComponent,
TStringTextComponentNonRegexpSimpleSymbol {
private Ruby::SimpleSymbol g;
StringTextComponentSimpleSymbol() { this = TStringTextComponentNonRegexpSimpleSymbol(g) }
final override string toString() { result = getSimpleSymbolValue(g) }
final override ConstantValue::ConstantStringValue getConstantValue() {
result.isString(getSimpleSymbolValue(g))
}
final override string getRawText() { result = getSimpleSymbolValue(g) }
}
private class StringTextComponentHashKeySymbol extends StringTextComponent,
TStringTextComponentNonRegexpHashKeySymbol {
private Ruby::HashKeySymbol g;
StringTextComponentHashKeySymbol() { this = TStringTextComponentNonRegexpHashKeySymbol(g) }
final override string toString() { result = g.getValue() }
@@ -236,7 +278,7 @@ class StringTextComponent extends StringComponent, TStringTextComponentNonRegexp
result.isString(g.getValue())
}
final override string getAPrimaryQlClass() { result = "StringTextComponent" }
final override string getRawText() { result = g.getValue() }
}
/**
@@ -247,13 +289,18 @@ class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequen
StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponentNonRegexp(g) }
final override string toString() { result = g.getValue() }
final override string toString() { result = this.getRawText() }
final override ConstantValue::ConstantStringValue getConstantValue() {
result.isString(g.getValue())
result.isString(this.getUnescapedText())
}
final override string getAPrimaryQlClass() { result = "StringEscapeSequenceComponent" }
/** Gets the text of this component as it appears in the source code. */
final string getRawText() { result = g.getValue() }
final private string getUnescapedText() { result = unescapeEscapeSequence(this.getRawText()) }
}
/**
@@ -565,18 +612,6 @@ class SymbolLiteral extends StringlikeLiteral, TSymbolLiteral {
}
}
private class SimpleSymbolLiteral extends SymbolLiteral, TSimpleSymbolLiteral {
private Ruby::SimpleSymbol g;
SimpleSymbolLiteral() { this = TSimpleSymbolLiteral(g) }
final override ConstantValue::ConstantSymbolValue getConstantValue() {
result.isSymbol(getSimpleSymbolValue(g))
}
final override string toString() { result = g.getValue() }
}
/**
* A subshell literal.
*

View File

@@ -201,7 +201,16 @@ class ClassVariableWriteAccess extends ClassVariableAccess, VariableWriteAccess
/** An access to a class variable where the value is read. */
class ClassVariableReadAccess extends ClassVariableAccess, VariableReadAccess { }
/** An access to the `self` variable */
/**
* An access to the `self` variable. For example:
* - `self == other`
* - `self.method_name`
* - `def self.method_name ... end`
*
* This also includes implicit references to the current object in method
* calls. For example, the method call `foo(123)` has an implicit `self`
* receiver, and is equivalent to the explicit `self.foo(123)`.
*/
class SelfVariableAccess extends LocalVariableAccess instanceof SelfVariableAccessImpl {
final override string getAPrimaryQlClass() { result = "SelfVariableAccess" }
}

View File

@@ -284,10 +284,12 @@ private module Cached {
TStringInterpolationComponentRegexp(Ruby::Interpolation g) {
g.getParent() instanceof Ruby::Regex
} or
TStringTextComponentNonRegexp(Ruby::Token g) {
TStringTextComponentNonRegexpStringOrHeredocContent(Ruby::Token g) {
(g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent) and
not g.getParent() instanceof Ruby::Regex
} or
TStringTextComponentNonRegexpSimpleSymbol(Ruby::SimpleSymbol g) or
TStringTextComponentNonRegexpHashKeySymbol(Ruby::HashKeySymbol g) or
TStringTextComponentRegexp(Ruby::Token g) {
(g instanceof Ruby::StringContent or g instanceof Ruby::HeredocContent) and
g.getParent() instanceof Ruby::Regex
@@ -511,7 +513,9 @@ private module Cached {
n = TStringEscapeSequenceComponentRegexp(result) or
n = TStringInterpolationComponentNonRegexp(result) or
n = TStringInterpolationComponentRegexp(result) or
n = TStringTextComponentNonRegexp(result) or
n = TStringTextComponentNonRegexpStringOrHeredocContent(result) or
n = TStringTextComponentNonRegexpSimpleSymbol(result) or
n = TStringTextComponentNonRegexpHashKeySymbol(result) or
n = TStringTextComponentRegexp(result) or
n = TSubExprReal(result) or
n = TSubshellLiteral(result) or
@@ -702,6 +706,10 @@ class TIntegerLiteral = TIntegerLiteralReal or TIntegerLiteralSynth;
class TBooleanLiteral = TTrueLiteral or TFalseLiteral;
class TStringTextComponentNonRegexp =
TStringTextComponentNonRegexpStringOrHeredocContent or
TStringTextComponentNonRegexpSimpleSymbol or TStringTextComponentNonRegexpHashKeySymbol;
class TStringTextComponent = TStringTextComponentNonRegexp or TStringTextComponentRegexp;
class TStringEscapeSequenceComponent =

View File

@@ -3,43 +3,23 @@ private import AST
private import Constant
private import TreeSitter
private import codeql.ruby.controlflow.CfgNodes
private import codeql.NumberUtils
int parseInteger(Ruby::Integer i) {
exists(string s | s = i.getValue().toLowerCase().replaceAll("_", "") |
s.charAt(0) != "0" and
result = s.toInt()
or
exists(string str, string values, int shift |
s.matches("0b%") and
values = "01" and
str = s.suffix(2) and
shift = 1
or
s.matches("0x%") and
values = "0123456789abcdef" and
str = s.suffix(2) and
shift = 4
or
s.charAt(0) = "0" and
not s.charAt(1) = ["b", "x", "o"] and
values = "01234567" and
str = s.suffix(1) and
shift = 3
or
s.matches("0o%") and
values = "01234567" and
str = s.suffix(2) and
shift = 3
|
result =
sum(int index, string c, int v, int exp |
c = str.charAt(index) and
v = values.indexOf(c.toLowerCase()) and
exp = str.length() - index - 1
|
v.bitShiftLeft((str.length() - index - 1) * shift)
)
)
s.matches("0b%") and result = parseBinaryInt(s.suffix(2))
or
s.matches("0x%") and result = parseHexInt(s.suffix(2))
or
s.charAt(0) = "0" and
not s.charAt(1) = ["b", "x", "o"] and
result = parseOctalInt(s.suffix(1))
or
s.matches("0o%") and
result = parseOctalInt(s.suffix(2))
)
}
@@ -146,18 +126,97 @@ private class RequiredFileLiteralConstantValue extends RequiredConstantValue {
}
}
private class RequiredStringTextComponentConstantValue extends RequiredConstantValue {
private class RequiredStringTextComponentNonRegexpStringOrHeredocContentConstantValue extends RequiredConstantValue {
override predicate requiredString(string s) {
s = any(Ruby::Token t | exists(TStringTextComponentNonRegexp(t))).getValue()
s =
unescapeTextComponent(any(Ruby::Token t |
exists(TStringTextComponentNonRegexpStringOrHeredocContent(t))
).getValue())
}
}
private class RequiredStringTextComponentNonRegexpSimpleSymbolConstantValue extends RequiredConstantValue {
override predicate requiredString(string s) { s = getSimpleSymbolValue(_) }
}
private class RequiredStringTextComponentNonRegexpHashKeySymbolConstantValue extends RequiredConstantValue {
override predicate requiredString(string s) { s = any(Ruby::HashKeySymbol h).getValue() }
}
private class RequiredStringEscapeSequenceComponentConstantValue extends RequiredConstantValue {
override predicate requiredString(string s) {
s = any(Ruby::Token t | exists(TStringEscapeSequenceComponentNonRegexp(t))).getValue()
s =
unescapeEscapeSequence(any(Ruby::Token t | exists(TStringEscapeSequenceComponentNonRegexp(t)))
.getValue())
}
}
/**
* Gets the string represented by the escape sequence in `escaped`. For example:
*
* ```
* \\ => \
* \141 => a
* \u0078 => x
* ```
*/
bindingset[escaped]
string unescapeEscapeSequence(string escaped) {
result = unescapeKnownEscapeSequence(escaped)
or
// Any other character following a backslash is just that character.
not exists(unescapeKnownEscapeSequence(escaped)) and
result = escaped.suffix(1)
}
bindingset[escaped]
private string unescapeKnownEscapeSequence(string escaped) {
escaped = "\\\\" and result = "\\"
or
escaped = "\\'" and result = "'"
or
escaped = "\\\"" and result = "\""
or
escaped = "\\a" and result = 7.toUnicode()
or
escaped = "\\b" and result = 8.toUnicode()
or
escaped = "\\t" and result = "\t"
or
escaped = "\\n" and result = "\n"
or
escaped = "\\v" and result = 11.toUnicode()
or
escaped = "\\f" and result = 12.toUnicode()
or
escaped = "\\r" and result = "\r"
or
escaped = "\\e" and result = 27.toUnicode()
or
escaped = "\\s" and result = " "
or
escaped = ["\\c?", "\\C-?"] and result = 127.toUnicode()
or
result = parseOctalInt(escaped.regexpCapture("\\\\([0-7]{1,3})", 1)).toUnicode()
or
result = parseHexInt(escaped.regexpCapture("\\\\x([0-9a-fA-F]{1,2})", 1)).toUnicode()
or
result = parseHexInt(escaped.regexpCapture("\\\\u([0-9a-fA-F]{4})", 1)).toUnicode()
or
result = parseHexInt(escaped.regexpCapture("\\\\u\\{([0-9a-fA-F]{1,6})\\}", 1)).toUnicode()
}
/**
* Gets the result of unescaping a string text component by replacing `\\` and
* `\'` with `\` and `'`, respectively.
*
* ```rb
* 'foo\\bar \'baz\'' # foo\bar 'baz'
* ```
*/
bindingset[text]
string unescapeTextComponent(string text) { result = text.regexpReplaceAll("\\\\(['\\\\])", "$1") }
class TRegExpComponent =
TStringTextComponentRegexp or TStringEscapeSequenceComponentRegexp or
TStringInterpolationComponentRegexp;
@@ -223,6 +282,8 @@ private class SimpleSymbolLiteral extends SymbolLiteral, TSimpleSymbolLiteral {
}
final override string toString() { result = g.getValue() }
final override StringComponent getComponent(int n) { n = 0 and toGenerated(result) = g }
}
class ComplexSymbolLiteral extends SymbolLiteral, TComplexSymbolLiteral { }
@@ -257,6 +318,8 @@ private class HashKeySymbolLiteral extends SymbolLiteral, THashKeySymbolLiteral
}
final override string toString() { result = ":" + g.getValue() }
final override StringComponent getComponent(int n) { n = 0 and toGenerated(result) = g }
}
private class RequiredCharacterConstantValue extends RequiredConstantValue {

View File

@@ -73,7 +73,7 @@ private module Cached {
m = resolveConstantReadAccess(c.getReceiver())
or
m = enclosingModule(c).getModule() and
c.getReceiver() instanceof Self
c.getReceiver() instanceof SelfVariableAccess
) and
result = resolveConstantReadAccess(c.getAnArgument())
}
@@ -437,7 +437,7 @@ private module ResolveImpl {
encl = enclosingModule(this) and
result = [qualifiedModuleNameNonRec(encl, _, _), qualifiedModuleNameRec(encl, _, _)]
|
this.getReceiver() instanceof Self
this.getReceiver() instanceof SelfVariableAccess
or
not exists(this.getReceiver())
)

View File

@@ -184,9 +184,7 @@ abstract class ScopeImpl extends AstNode, TScopeType {
}
private class ScopeRealImpl extends ScopeImpl, TScopeReal {
private Scope::Range range;
ScopeRealImpl() { range = toGenerated(this) }
ScopeRealImpl() { toGenerated(this) instanceof Scope::Range }
override Variable getAVariableImpl() { result.getDeclaringScope() = this }
}

View File

@@ -366,7 +366,23 @@ private module Cached {
cached
predicate isCapturedAccess(LocalVariableAccess access) {
access.getVariable().getDeclaringScope() != access.getCfgScope()
exists(Scope scope1, Scope scope2 |
scope1 = access.getVariable().getDeclaringScope() and
scope2 = access.getCfgScope() and
scope1 != scope2
|
if access instanceof SelfVariableAccess
then
// ```
// class C
// def self.m // not a captured access
// end
// end
// ```
not scope2 instanceof Toplevel or
not access = any(SingletonMethod m).getObject()
else any()
)
}
cached
@@ -659,10 +675,11 @@ private class ClassVariableAccessSynth extends ClassVariableAccessRealImpl,
abstract class SelfVariableAccessImpl extends LocalVariableAccessImpl, TSelfVariableAccess { }
private class SelfVariableAccessReal extends SelfVariableAccessImpl, TSelfReal {
private Ruby::Self self;
private SelfVariable var;
SelfVariableAccessReal() { this = TSelfReal(self) and var = TSelfVariable(scopeOf(self)) }
SelfVariableAccessReal() {
exists(Ruby::Self self | this = TSelfReal(self) and var = TSelfVariable(scopeOf(self)))
}
final override SelfVariable getVariableImpl() { result = var }

View File

@@ -14,7 +14,7 @@ class EntryNode extends CfgNode, TEntryNode {
EntryNode() { this = TEntryNode(scope) }
final override EntryBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() }
final override EntryBasicBlock getBasicBlock() { result = super.getBasicBlock() }
final override Location getLocation() { result = scope.getLocation() }
@@ -31,7 +31,7 @@ class AnnotatedExitNode extends CfgNode, TAnnotatedExitNode {
/** Holds if this node represent a normal exit. */
final predicate isNormal() { normal = true }
final override AnnotatedExitBasicBlock getBasicBlock() { result = CfgNode.super.getBasicBlock() }
final override AnnotatedExitBasicBlock getBasicBlock() { result = super.getBasicBlock() }
final override Location getLocation() { result = scope.getLocation() }

View File

@@ -1,4 +1,4 @@
private import ruby as rb
private import ruby as RB
private import ControlFlowGraphImpl as Impl
private import Completion as Comp
private import codeql.ruby.ast.internal.Synthesis
@@ -6,11 +6,11 @@ private import Splitting as Splitting
private import codeql.ruby.CFG as CFG
/** The base class for `ControlFlowTree`. */
class ControlFlowTreeBase extends rb::AstNode {
class ControlFlowTreeBase extends RB::AstNode {
ControlFlowTreeBase() { not any(Synthesis s).excludeFromControlFlowTree(this) }
}
class ControlFlowElement = rb::AstNode;
class ControlFlowElement = RB::AstNode;
class Completion = Comp::Completion;
@@ -69,6 +69,6 @@ predicate isAbnormalExitType(SuccessorType t) {
t instanceof CFG::SuccessorTypes::ExitSuccessor
}
class Location = rb::Location;
class Location = RB::Location;
class Node = CFG::CfgNode;

View File

@@ -2,6 +2,8 @@
import ruby
import codeql.ruby.DataFlow
private import codeql.ruby.frameworks.data.ModelsAsData
private import codeql.ruby.ApiGraphs
private import internal.FlowSummaryImpl as Impl
private import internal.DataFlowDispatch
private import internal.DataFlowPrivate
@@ -165,3 +167,33 @@ private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable
}
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
private class SummarizedCallableFromModel extends SummarizedCallable {
string package;
string type;
string path;
SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(package, type, path, _, _, _) and
this = package + ";" + type + ";" + path
}
override Call getACall() {
exists(API::MethodAccessNode base |
ModelOutput::resolvedSummaryBase(package, type, path, base) and
result = base.getCallNode().asExpr().getExpr()
)
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(string kind |
ModelOutput::relevantSummaryModel(package, type, path, input, output, kind)
|
kind = "value" and
preservesValue = true
or
kind = "taint" and
preservesValue = false
)
}
}

View File

@@ -233,6 +233,8 @@ module Ssa {
)
}
override SelfVariable getSourceVariable() { result = v }
final override string toString() { result = "self (" + v.getDeclaringScope() + ")" }
final override Location getLocation() { result = this.getControlFlowNode().getLocation() }
@@ -314,7 +316,7 @@ module Ssa {
CapturedCallDefinition() {
exists(Variable v, BasicBlock bb, int i |
this.definesAt(v, bb, i) and
SsaImpl::capturedCallWrite(bb, i, v)
SsaImpl::capturedCallWrite(_, bb, i, v)
)
}

View File

@@ -203,7 +203,7 @@ private module Cached {
result = lookupMethod(tp, method) and
if result.(Method).isPrivate()
then
exists(Self self |
exists(SelfVariableAccess self |
self = call.getReceiver().getExpr() and
pragma[only_bind_out](self.getEnclosingModule().getModule().getSuperClass*()) =
pragma[only_bind_out](result.getEnclosingModule().getModule())
@@ -232,6 +232,18 @@ private module Cached {
)
}
/** Gets a viable run-time target for the call `call`. */
cached
DataFlowCallable viableCallable(DataFlowCall call) {
result = TCfgScope(getTarget(call.asCall())) and
not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
or
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.asCall().getExpr() = callable.getACall()
)
}
cached
newtype TArgumentPosition =
TSelfArgumentPosition() or
@@ -300,28 +312,14 @@ private DataFlow::LocalSourceNode trackInstance(Module tp, TypeTracker t) {
)
or
// `self` in method
exists(Self self, Method enclosing |
self = result.asExpr().getExpr() and
enclosing = self.getEnclosingMethod() and
tp = enclosing.getEnclosingModule().getModule() and
not self.getEnclosingModule().getEnclosingMethod() = enclosing
)
tp = result.(SsaSelfDefinitionNode).getSelfScope().(Method).getEnclosingModule().getModule()
or
// `self` in singleton method
exists(Self self, MethodBase enclosing |
self = result.asExpr().getExpr() and
flowsToSingletonMethodObject(trackInstance(tp), enclosing) and
enclosing = self.getEnclosingMethod() and
not self.getEnclosingModule().getEnclosingMethod() = enclosing
)
flowsToSingletonMethodObject(trackInstance(tp), result.(SsaSelfDefinitionNode).getSelfScope())
or
// `self` in top-level
exists(Self self, Toplevel enclosing |
self = result.asExpr().getExpr() and
enclosing = self.getEnclosingModule() and
tp = TResolved("Object") and
not self.getEnclosingMethod().getEnclosingModule() = enclosing
)
result.(SsaSelfDefinitionNode).getSelfScope() instanceof Toplevel and
tp = TResolved("Object")
or
// a module or class
exists(Module m |
@@ -371,7 +369,7 @@ private predicate singletonMethod(MethodBase method, Expr object) {
pragma[nomagic]
private predicate flowsToSingletonMethodObject(DataFlow::LocalSourceNode nodeFrom, MethodBase method) {
exists(DataFlow::LocalSourceNode nodeTo |
exists(DataFlow::Node nodeTo |
nodeFrom.flowsTo(nodeTo) and
singletonMethod(method, nodeTo.asExpr().getExpr())
)
@@ -409,13 +407,8 @@ private DataFlow::LocalSourceNode trackSingletonMethod(MethodBase m, string name
name = m.getName()
}
private DataFlow::Node selfInModule(Module tp) {
exists(Self self, ModuleBase enclosing |
self = result.asExpr().getExpr() and
enclosing = self.getEnclosingModule() and
tp = enclosing.getModule() and
not self.getEnclosingMethod().getEnclosingModule() = enclosing
)
private SsaSelfDefinitionNode selfInModule(Module tp) {
tp = result.getSelfScope().(ModuleBase).getModule()
}
private DataFlow::LocalSourceNode trackModule(Module tp, TypeTracker t) {
@@ -442,17 +435,6 @@ private DataFlow::LocalSourceNode trackModule(Module tp) {
result = trackModule(tp, TypeTracker::end())
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) {
result = TCfgScope(getTarget(call.asCall())) and
not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
or
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.asCall().getExpr() = callable.getACall()
)
}
/**
* Holds if the set of viable implementations that can be called by `call`
* might be improved by knowing the call context. This is the case if the

View File

@@ -87,12 +87,30 @@ abstract class Configuration extends string {
/** Holds if data flow into `node` is prohibited. */
predicate isBarrierIn(Node node) { none() }
/**
* Holds if data flow into `node` is prohibited when the flow state is
* `state`
*/
predicate isBarrierIn(Node node, FlowState state) { none() }
/** Holds if data flow out of `node` is prohibited. */
predicate isBarrierOut(Node node) { none() }
/**
* Holds if data flow out of `node` is prohibited when the flow state is
* `state`
*/
predicate isBarrierOut(Node node, FlowState state) { none() }
/** Holds if data flow through nodes guarded by `guard` is prohibited. */
predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
@@ -305,7 +323,7 @@ private class RetNodeEx extends NodeEx {
ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
}
private predicate inBarrier(NodeEx node, Configuration config) {
private predicate fullInBarrier(NodeEx node, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierIn(n)
@@ -314,7 +332,16 @@ private predicate inBarrier(NodeEx node, Configuration config) {
)
}
private predicate outBarrier(NodeEx node, Configuration config) {
private predicate stateInBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierIn(n, state)
|
config.isSource(n, state)
)
}
private predicate fullOutBarrier(NodeEx node, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierOut(n)
@@ -323,6 +350,15 @@ private predicate outBarrier(NodeEx node, Configuration config) {
)
}
private predicate stateOutBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierOut(n, state)
|
config.isSink(n, state)
)
}
pragma[nomagic]
private predicate fullBarrier(NodeEx node, Configuration config) {
exists(Node n | node.asNode() = n |
@@ -345,9 +381,19 @@ private predicate fullBarrier(NodeEx node, Configuration config) {
pragma[nomagic]
private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
exists(Node n | node.asNode() = n |
config.isBarrier(n, state)
or
config.isBarrierIn(n, state) and
not config.isSource(n, state)
or
config.isBarrierOut(n, state) and
not config.isSink(n, state)
or
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
)
}
@@ -376,8 +422,8 @@ private predicate sinkNode(NodeEx node, FlowState state, Configuration config) {
/** Provides the relevant barriers for a step from `node1` to `node2`. */
pragma[inline]
private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) {
not outBarrier(node1, config) and
not inBarrier(node2, config) and
not fullOutBarrier(node1, config) and
not fullInBarrier(node2, config) and
not fullBarrier(node1, config) and
not fullBarrier(node2, config)
}
@@ -430,6 +476,8 @@ private predicate additionalLocalStateStep(
config.isAdditionalFlowStep(n1, s1, n2, s2) and
getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
stepFilter(node1, node2, config) and
not stateOutBarrier(node1, s1, config) and
not stateInBarrier(node2, s2, config) and
not stateBarrier(node1, s1, config) and
not stateBarrier(node2, s2, config)
)
@@ -471,6 +519,8 @@ private predicate additionalJumpStateStep(
config.isAdditionalFlowStep(n1, s1, n2, s2) and
getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
stepFilter(node1, node2, config) and
not stateOutBarrier(node1, s1, config) and
not stateInBarrier(node2, s2, config) and
not stateBarrier(node1, s1, config) and
not stateBarrier(node2, s2, config) and
not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext
@@ -870,8 +920,8 @@ private module Stage1 {
private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
revFlow(node, true, config) and
fwdFlow(node, true, config) and
not inBarrier(node, config) and
not outBarrier(node, config)
not fullInBarrier(node, config) and
not fullOutBarrier(node, config)
}
/** Holds if flow may return from `callable`. */
@@ -966,8 +1016,8 @@ private predicate flowOutOfCallNodeCand1(
) {
viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
Stage1::revFlow(ret, config) and
not outBarrier(ret, config) and
not inBarrier(out, config)
not fullOutBarrier(ret, config) and
not fullInBarrier(out, config)
}
pragma[nomagic]
@@ -988,8 +1038,8 @@ private predicate flowIntoCallNodeCand1(
) {
viableParamArgNodeCand1(call, p, arg, config) and
Stage1::revFlow(p, config) and
not outBarrier(arg, config) and
not inBarrier(p, config)
not fullOutBarrier(arg, config) and
not fullInBarrier(p, config)
}
/**
@@ -1706,18 +1756,31 @@ private module LocalFlowBigStep {
* Holds if `node` can be the first node in a maximal subsequence of local
* flow steps in a dataflow path.
*/
predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
Stage2::revFlow(node, state, config) and
(
sourceNode(node, state, config) or
jumpStep(_, node, config) or
additionalJumpStep(_, node, config) or
additionalJumpStateStep(_, _, node, state, config) or
node instanceof ParamNodeEx or
node.asNode() instanceof OutNodeExt or
store(_, _, node, _, config) or
read(_, _, node, config) or
sourceNode(node, state, config)
or
jumpStep(_, node, config)
or
additionalJumpStep(_, node, config)
or
additionalJumpStateStep(_, _, node, state, config)
or
node instanceof ParamNodeEx
or
node.asNode() instanceof OutNodeExt
or
store(_, _, node, _, config)
or
read(_, _, node, config)
or
node instanceof FlowCheckNode
or
exists(FlowState s |
additionalLocalStateStep(_, s, node, state, config) and
s != state
)
)
}
@@ -1737,6 +1800,9 @@ private module LocalFlowBigStep {
or
exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
additionalJumpStateStep(node, state, next, s, config)
or
additionalLocalStateStep(node, state, next, s, config) and
s != state
)
or
Stage2::revFlow(node, state, config) and
@@ -1770,42 +1836,40 @@ private module LocalFlowBigStep {
*/
pragma[nomagic]
private predicate localFlowStepPlus(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
DataFlowType t, Configuration config, LocalCallContext cc
NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
Configuration config, LocalCallContext cc
) {
not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
(
localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
(
localFlowStepNodeCand1(node1, node2, config) and
state1 = state2 and
preservesValue = true and
t = node1.getDataFlowType() // irrelevant dummy value
t = node1.getDataFlowType() and // irrelevant dummy value
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
preservesValue = false and
t = node2.getDataFlowType()
) and
node1 != node2 and
cc.relevantFor(node1.getEnclosingCallable()) and
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
or
exists(NodeEx mid |
localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
pragma[only_bind_into](config), cc) and
localFlowStepNodeCand1(mid, node2, config) and
not mid instanceof FlowCheckNode and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
)
or
exists(NodeEx mid, FlowState st |
localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
exists(NodeEx mid |
localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
not mid instanceof FlowCheckNode and
preservesValue = false and
t = node2.getDataFlowType() and
Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
t = node2.getDataFlowType()
)
)
}
@@ -1819,9 +1883,19 @@ private module LocalFlowBigStep {
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
) {
localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
callContext) and
localFlowExit(node2, state2, config)
localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
localFlowExit(node2, state1, config) and
state1 = state2
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
state1 != state2 and
preservesValue = false and
apf = TFrontNil(node2.getDataFlowType()) and
callContext.relevantFor(node1.getEnclosingCallable()) and
not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
isUnreachableInCallCached(node1.asNode(), call) or
isUnreachableInCallCached(node2.asNode(), call)
)
}
}
@@ -2695,10 +2769,10 @@ private module Stage4 {
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, _, config) and
result =
getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
node.getEnclosingCallable())
node.getEnclosingCallable()) and
exists(config)
}
private predicate localStep(

View File

@@ -87,12 +87,30 @@ abstract class Configuration extends string {
/** Holds if data flow into `node` is prohibited. */
predicate isBarrierIn(Node node) { none() }
/**
* Holds if data flow into `node` is prohibited when the flow state is
* `state`
*/
predicate isBarrierIn(Node node, FlowState state) { none() }
/** Holds if data flow out of `node` is prohibited. */
predicate isBarrierOut(Node node) { none() }
/**
* Holds if data flow out of `node` is prohibited when the flow state is
* `state`
*/
predicate isBarrierOut(Node node, FlowState state) { none() }
/** Holds if data flow through nodes guarded by `guard` is prohibited. */
predicate isBarrierGuard(BarrierGuard guard) { none() }
/**
* Holds if data flow through nodes guarded by `guard` is prohibited when
* the flow state is `state`
*/
predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
@@ -305,7 +323,7 @@ private class RetNodeEx extends NodeEx {
ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() }
}
private predicate inBarrier(NodeEx node, Configuration config) {
private predicate fullInBarrier(NodeEx node, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierIn(n)
@@ -314,7 +332,16 @@ private predicate inBarrier(NodeEx node, Configuration config) {
)
}
private predicate outBarrier(NodeEx node, Configuration config) {
private predicate stateInBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierIn(n, state)
|
config.isSource(n, state)
)
}
private predicate fullOutBarrier(NodeEx node, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierOut(n)
@@ -323,6 +350,15 @@ private predicate outBarrier(NodeEx node, Configuration config) {
)
}
private predicate stateOutBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
config.isBarrierOut(n, state)
|
config.isSink(n, state)
)
}
pragma[nomagic]
private predicate fullBarrier(NodeEx node, Configuration config) {
exists(Node n | node.asNode() = n |
@@ -345,9 +381,19 @@ private predicate fullBarrier(NodeEx node, Configuration config) {
pragma[nomagic]
private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) {
exists(Node n |
node.asNode() = n and
exists(Node n | node.asNode() = n |
config.isBarrier(n, state)
or
config.isBarrierIn(n, state) and
not config.isSource(n, state)
or
config.isBarrierOut(n, state) and
not config.isSink(n, state)
or
exists(BarrierGuard g |
config.isBarrierGuard(g, state) and
n = g.getAGuardedNode()
)
)
}
@@ -376,8 +422,8 @@ private predicate sinkNode(NodeEx node, FlowState state, Configuration config) {
/** Provides the relevant barriers for a step from `node1` to `node2`. */
pragma[inline]
private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) {
not outBarrier(node1, config) and
not inBarrier(node2, config) and
not fullOutBarrier(node1, config) and
not fullInBarrier(node2, config) and
not fullBarrier(node1, config) and
not fullBarrier(node2, config)
}
@@ -430,6 +476,8 @@ private predicate additionalLocalStateStep(
config.isAdditionalFlowStep(n1, s1, n2, s2) and
getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and
stepFilter(node1, node2, config) and
not stateOutBarrier(node1, s1, config) and
not stateInBarrier(node2, s2, config) and
not stateBarrier(node1, s1, config) and
not stateBarrier(node2, s2, config)
)
@@ -471,6 +519,8 @@ private predicate additionalJumpStateStep(
config.isAdditionalFlowStep(n1, s1, n2, s2) and
getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and
stepFilter(node1, node2, config) and
not stateOutBarrier(node1, s1, config) and
not stateInBarrier(node2, s2, config) and
not stateBarrier(node1, s1, config) and
not stateBarrier(node2, s2, config) and
not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext
@@ -870,8 +920,8 @@ private module Stage1 {
private predicate throughFlowNodeCand(NodeEx node, Configuration config) {
revFlow(node, true, config) and
fwdFlow(node, true, config) and
not inBarrier(node, config) and
not outBarrier(node, config)
not fullInBarrier(node, config) and
not fullOutBarrier(node, config)
}
/** Holds if flow may return from `callable`. */
@@ -966,8 +1016,8 @@ private predicate flowOutOfCallNodeCand1(
) {
viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and
Stage1::revFlow(ret, config) and
not outBarrier(ret, config) and
not inBarrier(out, config)
not fullOutBarrier(ret, config) and
not fullInBarrier(out, config)
}
pragma[nomagic]
@@ -988,8 +1038,8 @@ private predicate flowIntoCallNodeCand1(
) {
viableParamArgNodeCand1(call, p, arg, config) and
Stage1::revFlow(p, config) and
not outBarrier(arg, config) and
not inBarrier(p, config)
not fullOutBarrier(arg, config) and
not fullInBarrier(p, config)
}
/**
@@ -1706,18 +1756,31 @@ private module LocalFlowBigStep {
* Holds if `node` can be the first node in a maximal subsequence of local
* flow steps in a dataflow path.
*/
predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) {
Stage2::revFlow(node, state, config) and
(
sourceNode(node, state, config) or
jumpStep(_, node, config) or
additionalJumpStep(_, node, config) or
additionalJumpStateStep(_, _, node, state, config) or
node instanceof ParamNodeEx or
node.asNode() instanceof OutNodeExt or
store(_, _, node, _, config) or
read(_, _, node, config) or
sourceNode(node, state, config)
or
jumpStep(_, node, config)
or
additionalJumpStep(_, node, config)
or
additionalJumpStateStep(_, _, node, state, config)
or
node instanceof ParamNodeEx
or
node.asNode() instanceof OutNodeExt
or
store(_, _, node, _, config)
or
read(_, _, node, config)
or
node instanceof FlowCheckNode
or
exists(FlowState s |
additionalLocalStateStep(_, s, node, state, config) and
s != state
)
)
}
@@ -1737,6 +1800,9 @@ private module LocalFlowBigStep {
or
exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) |
additionalJumpStateStep(node, state, next, s, config)
or
additionalLocalStateStep(node, state, next, s, config) and
s != state
)
or
Stage2::revFlow(node, state, config) and
@@ -1770,42 +1836,40 @@ private module LocalFlowBigStep {
*/
pragma[nomagic]
private predicate localFlowStepPlus(
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
DataFlowType t, Configuration config, LocalCallContext cc
NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t,
Configuration config, LocalCallContext cc
) {
not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
(
localFlowEntry(node1, pragma[only_bind_into](state1), pragma[only_bind_into](config)) and
localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and
(
localFlowStepNodeCand1(node1, node2, config) and
state1 = state2 and
preservesValue = true and
t = node1.getDataFlowType() // irrelevant dummy value
t = node1.getDataFlowType() and // irrelevant dummy value
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and
preservesValue = false and
t = node2.getDataFlowType()
) and
node1 != node2 and
cc.relevantFor(node1.getEnclosingCallable()) and
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall())
or
exists(NodeEx mid |
localFlowStepPlus(node1, state1, mid, pragma[only_bind_into](state2), preservesValue, t,
localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t,
pragma[only_bind_into](config), cc) and
localFlowStepNodeCand1(mid, node2, config) and
not mid instanceof FlowCheckNode and
Stage2::revFlow(node2, pragma[only_bind_into](state2), pragma[only_bind_into](config))
Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config))
)
or
exists(NodeEx mid, FlowState st |
localFlowStepPlus(node1, state1, mid, st, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, st, node2, state2, config) and
exists(NodeEx mid |
localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and
additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and
not mid instanceof FlowCheckNode and
preservesValue = false and
t = node2.getDataFlowType() and
Stage2::revFlow(node2, state2, pragma[only_bind_into](config))
t = node2.getDataFlowType()
)
)
}
@@ -1819,9 +1883,19 @@ private module LocalFlowBigStep {
NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue,
AccessPathFrontNil apf, Configuration config, LocalCallContext callContext
) {
localFlowStepPlus(node1, state1, node2, state2, preservesValue, apf.getType(), config,
callContext) and
localFlowExit(node2, state2, config)
localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and
localFlowExit(node2, state1, config) and
state1 = state2
or
additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and
state1 != state2 and
preservesValue = false and
apf = TFrontNil(node2.getDataFlowType()) and
callContext.relevantFor(node1.getEnclosingCallable()) and
not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() |
isUnreachableInCallCached(node1.asNode(), call) or
isUnreachableInCallCached(node2.asNode(), call)
)
}
}
@@ -2695,10 +2769,10 @@ private module Stage4 {
bindingset[node, cc, config]
private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
localFlowEntry(node, _, config) and
result =
getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)),
node.getEnclosingCallable())
node.getEnclosingCallable()) and
exists(config)
}
private predicate localStep(

View File

@@ -70,6 +70,20 @@ module LocalFlow {
)
}
/** Gets the SSA definition node corresponding to the implicit `self` parameter for `m`. */
private SsaDefinitionNode getSelfParameterDefNode(MethodBase m) {
result.getDefinition().(Ssa::SelfDefinition).getSourceVariable().getDeclaringScope() = m
}
/**
* Holds if `nodeFrom` is a parameter node, and `nodeTo` is a corresponding SSA node.
*/
predicate localFlowSsaParamInput(Node nodeFrom, Node nodeTo) {
nodeTo = getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
or
nodeTo = getSelfParameterDefNode(nodeFrom.(SelfParameterNode).getMethod())
}
/**
* Holds if there is a local use-use flow step from `nodeFrom` to `nodeTo`
* involving SSA definition `def`.
@@ -115,9 +129,6 @@ module LocalFlow {
predicate localFlowStepCommon(Node nodeFrom, Node nodeTo) {
localSsaFlowStep(nodeFrom, nodeTo)
or
nodeFrom.(SelfParameterNode).getMethod() = nodeTo.asExpr().getExpr().getEnclosingCallable() and
nodeTo.asExpr().getExpr() instanceof Self
or
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::AssignExprCfgNode).getRhs()
or
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::BlockArgumentCfgNode).getValue()
@@ -236,7 +247,7 @@ private module Cached {
or
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
or
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
LocalFlow::localFlowSsaParamInput(nodeFrom, nodeTo)
or
nodeTo.(SynthReturnNode).getAnInput() = nodeFrom
or
@@ -253,7 +264,7 @@ private module Cached {
or
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
or
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
LocalFlow::localFlowSsaParamInput(nodeFrom, nodeTo)
or
LocalFlow::localSsaFlowStepUseUse(_, nodeFrom, nodeTo)
or
@@ -275,27 +286,34 @@ private module Cached {
LocalFlow::localSsaFlowStepUseUse(_, nodeFrom, nodeTo)
}
private predicate entrySsaDefinition(SsaDefinitionNode n) {
n = LocalFlow::getParameterDefNode(_)
or
exists(Ssa::Definition def | def = n.getDefinition() |
def instanceof Ssa::SelfDefinition
or
def instanceof Ssa::CapturedEntryDefinition
)
}
cached
predicate isLocalSourceNode(Node n) {
n instanceof ParameterNode
or
// This case should not be needed once we have proper use-use flow
// for `self`. At that point, the `self`s returned by `trackInstance`
// in `DataFlowDispatch.qll` should refer to the post-update node,
// and we can remove this case.
n.asExpr().getExpr() instanceof Self
n instanceof PostUpdateNodes::ExprPostUpdateNode
or
// Nodes that can't be reached from another parameter or expression.
not localFlowStepTypeTracker+(any(Node e |
e instanceof ExprNode
// Expressions that can't be reached from another entry definition or expression.
not localFlowStepTypeTracker+(any(Node n0 |
n0 instanceof ExprNode
or
e instanceof ParameterNode
), n)
entrySsaDefinition(n0)
), n.(ExprNode))
or
// Ensure all parameter SSA nodes are local sources -- this is needed by type tracking.
// Note that when the parameter has a default value, it will be reachable from an
// expression (the default value) and therefore won't be caught by the rule above.
n = LocalFlow::getParameterDefNode(_)
// Ensure all entry SSA definitions are local sources -- for parameters, this
// is needed by type tracking. Note that when the parameter has a default value,
// it will be reachable from an expression (the default value) and therefore
// won't be caught by the rule above.
entrySsaDefinition(n)
}
cached
@@ -358,6 +376,16 @@ class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode {
override string toStringImpl() { result = def.toString() }
}
/** An SSA definition for a `self` variable. */
class SsaSelfDefinitionNode extends LocalSourceNode, SsaDefinitionNode {
private SelfVariable self;
SsaSelfDefinitionNode() { self = def.getSourceVariable() }
/** Gets the scope in which the `self` variable is declared. */
Scope getSelfScope() { result = self.getDeclaringScope() }
}
/**
* A value returning statement, viewed as a node in a data flow graph.
*
@@ -745,13 +773,6 @@ predicate jumpStep(Node pred, Node succ) {
SsaImpl::captureFlowOut(pred.(SsaDefinitionNode).getDefinition(),
succ.(SsaDefinitionNode).getDefinition())
or
exists(Self s, Method m |
s = succ.asExpr().getExpr() and
pred.(SelfParameterNode).getMethod() = m and
m = s.getEnclosingMethod() and
m != s.getEnclosingCallable()
)
or
succ.asExpr().getExpr().(ConstantReadAccess).getValue() = pred.asExpr().getExpr()
}

View File

@@ -1,4 +1,5 @@
private import SsaImplCommon
private import SsaImplSpecific as SsaImplSpecific
private import codeql.ruby.AST
private import codeql.ruby.CFG
private import codeql.ruby.ast.Variable
@@ -40,58 +41,50 @@ private predicate capturedExitRead(AnnotatedExitBasicBlock bb, int i, LocalVaria
i = bb.length()
}
private CfgScope getCaptureOuterCfgScope(CfgScope scope) {
result = scope.getOuterCfgScope() and
(
scope instanceof Block
or
scope instanceof Lambda
)
}
/** Holds if captured variable `v` is read inside `scope`. */
/**
* Holds if captured variable `v` is read directly inside `scope`,
* or inside a (transitively) nested scope of `scope`.
*/
pragma[noinline]
private predicate hasCapturedRead(Variable v, CfgScope scope) {
any(LocalVariableReadAccess read |
read.getVariable() = v and scope = getCaptureOuterCfgScope*(read.getCfgScope())
read.getVariable() = v and scope = read.getCfgScope().getOuterCfgScope*()
).isCapturedAccess()
}
/**
* Holds if `v` is written inside basic block `bb`, which is in the immediate
* outer scope of `scope`.
*/
pragma[noinline]
private predicate variableWriteInOuterScope(BasicBlock bb, LocalVariable v, CfgScope scope) {
SsaImplSpecific::variableWrite(bb, _, v, _) and
scope.getOuterCfgScope() = bb.getScope()
}
pragma[noinline]
private predicate hasVariableWriteWithCapturedRead(BasicBlock bb, LocalVariable v, CfgScope scope) {
hasCapturedRead(v, scope) and
exists(VariableWriteAccess write |
write = bb.getANode().getNode() and
write.getVariable() = v and
bb.getScope() = scope.getOuterCfgScope()
)
variableWriteInOuterScope(bb, v, scope)
}
/**
* Holds if the call at index `i` in basic block `bb` may reach a callable
* that reads captured variable `v`.
* Holds if the call `call` at index `i` in basic block `bb` may reach
* a callable that reads captured variable `v`.
*/
private predicate capturedCallRead(BasicBlock bb, int i, LocalVariable v) {
private predicate capturedCallRead(Call call, BasicBlock bb, int i, LocalVariable v) {
exists(CfgScope scope |
hasVariableWriteWithCapturedRead(bb.getAPredecessor*(), v, scope) and
bb.getNode(i).getNode() instanceof Call
call = bb.getNode(i).getNode()
|
not scope instanceof Block
or
// If the read happens inside a block, we restrict to the call that
// contains the block
scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock()
not scope instanceof Block
or
scope = call.(MethodCall).getBlock()
)
}
/** Holds if captured variable `v` is written inside `scope`. */
pragma[noinline]
private predicate hasCapturedWrite(Variable v, CfgScope scope) {
any(LocalVariableWriteAccess write |
write.getVariable() = v and scope = getCaptureOuterCfgScope*(write.getCfgScope())
).isCapturedAccess()
}
/** Holds if `v` is read at index `i` in basic block `bb`. */
private predicate variableReadActual(BasicBlock bb, int i, LocalVariable v) {
exists(VariableReadAccess read |
@@ -104,21 +97,38 @@ predicate variableRead(BasicBlock bb, int i, LocalVariable v, boolean certain) {
variableReadActual(bb, i, v) and
certain = true
or
capturedCallRead(bb, i, v) and
capturedCallRead(_, bb, i, v) and
certain = false
or
capturedExitRead(bb, i, v) and
certain = false
}
/**
* Holds if captured variable `v` is written directly inside `scope`,
* or inside a (transitively) nested scope of `scope`.
*/
pragma[noinline]
private predicate hasCapturedWrite(Variable v, CfgScope scope) {
any(LocalVariableWriteAccess write |
write.getVariable() = v and scope = write.getCfgScope().getOuterCfgScope*()
).isCapturedAccess()
}
/**
* Holds if `v` is read inside basic block `bb`, which is in the immediate
* outer scope of `scope`.
*/
pragma[noinline]
private predicate variableReadActualInOuterScope(BasicBlock bb, LocalVariable v, CfgScope scope) {
variableReadActual(bb, _, v) and
bb.getScope() = scope.getOuterCfgScope()
}
pragma[noinline]
private predicate hasVariableReadWithCapturedWrite(BasicBlock bb, LocalVariable v, CfgScope scope) {
hasCapturedWrite(v, scope) and
exists(VariableReadAccess read |
read = bb.getANode().getNode() and
read.getVariable() = v and
bb.getScope() = scope.getOuterCfgScope()
)
variableReadActualInOuterScope(bb, v, scope)
}
cached
@@ -134,20 +144,20 @@ private module Cached {
}
/**
* Holds if the call at index `i` in basic block `bb` may reach a callable
* Holds if the call `call` at index `i` in basic block `bb` may reach a callable
* that writes captured variable `v`.
*/
cached
predicate capturedCallWrite(BasicBlock bb, int i, LocalVariable v) {
predicate capturedCallWrite(Call call, BasicBlock bb, int i, LocalVariable v) {
exists(CfgScope scope |
hasVariableReadWithCapturedWrite(bb.getASuccessor*(), v, scope) and
bb.getNode(i).getNode() instanceof Call
call = bb.getNode(i).getNode()
|
not scope instanceof Block
or
// If the write happens inside a block, we restrict to the call that
// contains the block
scope = any(MethodCall c | bb.getNode(i) = c.getAControlFlowNode()).getBlock()
not scope instanceof Block
or
scope = call.(MethodCall).getBlock()
)
}
@@ -177,6 +187,26 @@ private module Cached {
)
}
pragma[noinline]
private predicate defReachesCallReadInOuterScope(
Definition def, Call call, LocalVariable v, CfgScope scope
) {
exists(BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
capturedCallRead(call, bb, i, v) and
scope.getOuterCfgScope() = bb.getScope()
)
}
pragma[noinline]
private predicate hasCapturedEntryWrite(Definition entry, LocalVariable v, CfgScope scope) {
exists(BasicBlock bb, int i |
capturedEntryWrite(bb, i, v) and
entry.definesAt(v, bb, i) and
bb.getScope().getOuterCfgScope*() = scope
)
}
/**
* Holds if there is flow for a captured variable from the enclosing scope into a block.
* ```rb
@@ -188,13 +218,35 @@ private module Cached {
*/
cached
predicate captureFlowIn(Definition def, Definition entry) {
exists(LocalVariable v, BasicBlock bb, int i |
exists(Call call, LocalVariable v, CfgScope scope |
defReachesCallReadInOuterScope(def, call, v, scope) and
hasCapturedEntryWrite(entry, v, scope)
|
// If the read happens inside a block, we restrict to the call that
// contains the block
not scope instanceof Block
or
scope = call.(MethodCall).getBlock()
)
}
private import codeql.ruby.dataflow.SSA
pragma[noinline]
private predicate defReachesExitReadInInnerScope(Definition def, LocalVariable v, CfgScope scope) {
exists(BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
capturedCallRead(bb, i, v) and
exists(BasicBlock bb2, int i2 |
capturedEntryWrite(bb2, i2, v) and
entry.definesAt(v, bb2, i2)
)
capturedExitRead(bb, i, v) and
scope = bb.getScope().getOuterCfgScope*()
)
}
pragma[noinline]
private predicate hasCapturedExitRead(Definition exit, Call call, LocalVariable v, CfgScope scope) {
exists(BasicBlock bb, int i |
capturedCallWrite(call, bb, i, v) and
exit.definesAt(v, bb, i) and
bb.getScope() = scope.getOuterCfgScope()
)
}
@@ -210,13 +262,15 @@ private module Cached {
*/
cached
predicate captureFlowOut(Definition def, Definition exit) {
exists(LocalVariable v, BasicBlock bb, int i |
ssaDefReachesRead(v, def, bb, i) and
capturedExitRead(bb, i, v) and
exists(BasicBlock bb2, int i2 |
capturedCallWrite(bb2, i2, v) and
exit.definesAt(v, bb2, i2)
)
exists(Call call, LocalVariable v, CfgScope scope |
defReachesExitReadInInnerScope(def, v, scope) and
hasCapturedExitRead(exit, call, v, _)
|
// If the read happens inside a block, we restrict to the call that
// contains the block
not scope instanceof Block
or
scope = call.(MethodCall).getBlock()
)
}

View File

@@ -287,20 +287,6 @@ private module SsaDefReaches {
)
}
/**
* Holds if the SSA definition of `v` at `def` reaches uncertain SSA definition
* `redef` in the same basic block, without crossing another SSA definition of `v`.
*/
predicate ssaDefReachesUncertainDefWithinBlock(
SourceVariable v, Definition def, UncertainWriteDefinition redef
) {
exists(BasicBlock bb, int rnk, int i |
ssaDefReachesRank(bb, def, rnk, v) and
rnk = ssaRefRank(bb, i, v, SsaDef()) - 1 and
redef.definesAt(v, bb, i)
)
}
/**
* Same as `ssaRefRank()`, but restricted to a particular SSA definition `def`.
*/

View File

@@ -40,7 +40,7 @@ predicate variableWrite(BasicBlock bb, int i, SourceVariable v, boolean certain)
) and
certain = true
or
SsaImpl::capturedCallWrite(bb, i, v) and
SsaImpl::capturedCallWrite(_, bb, i, v) and
certain = false
}

View File

@@ -66,35 +66,53 @@ private CfgNodes::ExprNodes::VariableWriteAccessCfgNode variablesInPattern(
)
}
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included
* in all global taint flow configurations.
*/
cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// value of `case` expression into variables in patterns
exists(CfgNodes::ExprNodes::CaseExprCfgNode case, CfgNodes::ExprNodes::InClauseCfgNode clause |
nodeFrom.asExpr() = case.getValue() and
clause = case.getBranch(_) and
nodeTo.(SsaDefinitionNode).getDefinition().getControlFlowNode() =
variablesInPattern(clause.getPattern())
)
or
// operation involving `nodeFrom`
exists(CfgNodes::ExprNodes::OperationCfgNode op |
op = nodeTo.asExpr() and
op.getAnOperand() = nodeFrom.asExpr() and
not op.getExpr() instanceof AssignExpr
)
or
// string interpolation of `nodeFrom` into `nodeTo`
nodeFrom.asExpr() =
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
or
// Although flow through arrays is modelled precisely using stores/reads, we still
// allow flow out of a _tainted_ array. This is needed in order to support taint-
// tracking configurations where the source is an array.
readStep(nodeFrom, any(DataFlow::Content::ArrayElementContent c), nodeTo)
private module Cached {
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included
* in all global taint flow configurations.
*/
cached
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// value of `case` expression into variables in patterns
exists(CfgNodes::ExprNodes::CaseExprCfgNode case, CfgNodes::ExprNodes::InClauseCfgNode clause |
nodeFrom.asExpr() = case.getValue() and
clause = case.getBranch(_) and
nodeTo.(SsaDefinitionNode).getDefinition().getControlFlowNode() =
variablesInPattern(clause.getPattern())
)
or
// operation involving `nodeFrom`
exists(CfgNodes::ExprNodes::OperationCfgNode op |
op = nodeTo.asExpr() and
op.getAnOperand() = nodeFrom.asExpr() and
not op.getExpr() instanceof AssignExpr
)
or
// string interpolation of `nodeFrom` into `nodeTo`
nodeFrom.asExpr() =
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
or
// Although flow through arrays is modelled precisely using stores/reads, we still
// allow flow out of a _tainted_ array. This is needed in order to support taint-
// tracking configurations where the source is an array.
readStep(nodeFrom, any(DataFlow::Content::ArrayElementContent c), nodeTo)
}
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
cached
predicate localTaintStepCached(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
defaultAdditionalTaintStep(nodeFrom, nodeTo)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, false)
}
}
import Cached

View File

@@ -20,14 +20,4 @@ predicate localExprTaint(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
}
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
defaultAdditionalTaintStep(nodeFrom, nodeTo)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, false)
}
predicate localTaintStep = localTaintStepCached/2;

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,9 +96,29 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
/**
* Holds if taint propagation into `node` is prohibited when the flow state is
* `state`.
*/
predicate isSanitizerIn(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrierIn(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizerIn(node, state)
}
final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
@@ -89,6 +126,16 @@ abstract class Configuration extends DataFlow::Configuration {
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
/**
* Holds if taint propagation out of `node` is prohibited when the flow state is
* `state`.
*/
predicate isSanitizerOut(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrierOut(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizerOut(node, state)
}
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
@@ -96,6 +143,16 @@ abstract class Configuration extends DataFlow::Configuration {
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**
* Holds if taint propagation through nodes guarded by `guard` is prohibited
* when the flow state is `state`.
*/
predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
this.isSanitizerGuard(guard, state)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
@@ -107,6 +164,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis. This step is only applicable
* in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)

View File

@@ -105,7 +105,7 @@ private class ActionControllerContextCall extends MethodCall {
private ActionControllerControllerClass controllerClass;
ActionControllerContextCall() {
this.getReceiver() instanceof Self and
this.getReceiver() instanceof SelfVariableAccess and
this.getEnclosingModule() = controllerClass
}
@@ -127,9 +127,7 @@ abstract class ParamsCall extends MethodCall {
* ActionController parameters available via the `params` method.
*/
class ParamsSource extends RemoteFlowSource::Range {
ParamsCall call;
ParamsSource() { this.asExpr().getExpr() = call }
ParamsSource() { this.asExpr().getExpr() instanceof ParamsCall }
override string getSourceType() { result = "ActionController::Metal#params" }
}
@@ -146,9 +144,7 @@ abstract class CookiesCall extends MethodCall {
* ActionController parameters available via the `cookies` method.
*/
class CookiesSource extends RemoteFlowSource::Range {
CookiesCall call;
CookiesSource() { this.asExpr().getExpr() = call }
CookiesSource() { this.asExpr().getExpr() instanceof CookiesCall }
override string getSourceType() { result = "ActionController::Metal#cookies" }
}

View File

@@ -573,15 +573,16 @@ module ActionDispatch {
*/
private class ResourcesRoute extends RouteImpl, TResourcesRoute {
RouteBlock parent;
string resource;
string action;
string httpMethod;
string pathComponent;
ResourcesRoute() {
this = TResourcesRoute(parent, method, action) and
method.getArgument(0).getConstantValue().isStringOrSymbol(resource) and
isDefaultResourceRoute(resource, httpMethod, pathComponent, action)
exists(string resource |
this = TResourcesRoute(parent, method, action) and
method.getArgument(0).getConstantValue().isStringOrSymbol(resource) and
isDefaultResourceRoute(resource, httpMethod, pathComponent, action)
)
}
override string getAPrimaryQlClass() { result = "ResourcesRoute" }
@@ -610,15 +611,16 @@ module ActionDispatch {
*/
private class SingularResourceRoute extends RouteImpl, TResourceRoute {
RouteBlock parent;
string resource;
string action;
string httpMethod;
string pathComponent;
SingularResourceRoute() {
this = TResourceRoute(parent, method, action) and
method.getArgument(0).getConstantValue().isStringOrSymbol(resource) and
isDefaultSingularResourceRoute(resource, httpMethod, pathComponent, action)
exists(string resource |
this = TResourceRoute(parent, method, action) and
method.getArgument(0).getConstantValue().isStringOrSymbol(resource) and
isDefaultSingularResourceRoute(resource, httpMethod, pathComponent, action)
)
}
override string getAPrimaryQlClass() { result = "SingularResourceRoute" }

View File

@@ -61,7 +61,7 @@ private class ActionViewHtmlEscapeCall extends HtmlEscapeCall {
// A call in a context where some commonly used `ActionView` methods are available.
private class ActionViewContextCall extends MethodCall {
ActionViewContextCall() {
this.getReceiver() instanceof Self and
this.getReceiver() instanceof SelfVariableAccess and
inActionViewContext(this)
}

View File

@@ -7,6 +7,7 @@ private import codeql.ruby.Concepts
private import codeql.ruby.controlflow.CfgNodes
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.internal.DataFlowDispatch
private import codeql.ruby.dataflow.internal.DataFlowPrivate
private import codeql.ruby.ast.internal.Module
private import codeql.ruby.ApiGraphs
private import codeql.ruby.frameworks.Stdlib
@@ -100,7 +101,7 @@ class ActiveRecordModelClassMethodCall extends MethodCall {
recvCls = this.getReceiver().(ActiveRecordModelClassMethodCall).getReceiverClass()
or
// e.g. self.where(...) within an ActiveRecordModelClass
this.getReceiver() instanceof Self and
this.getReceiver() instanceof SelfVariableAccess and
this.getEnclosingModule() = recvCls
}
@@ -268,29 +269,30 @@ private Expr getUltimateReceiver(MethodCall call) {
// A call to `find`, `where`, etc. that may return active record model object(s)
private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode {
private MethodCall call;
private ActiveRecordModelClass cls;
private Expr recv;
ActiveRecordModelFinderCall() {
call = this.asExpr().getExpr() and
recv = getUltimateReceiver(call) and
resolveConstant(recv) = cls.getAQualifiedName() and
call.getMethodName() = finderMethodName()
exists(MethodCall call, Expr recv |
call = this.asExpr().getExpr() and
recv = getUltimateReceiver(call) and
resolveConstant(recv) = cls.getAQualifiedName() and
call.getMethodName() = finderMethodName()
)
}
final override ActiveRecordModelClass getClass() { result = cls }
}
// A `self` reference that may resolve to an active record model object
private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation {
private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation,
SsaSelfDefinitionNode {
private ActiveRecordModelClass cls;
ActiveRecordModelClassSelfReference() {
exists(Self s |
s.getEnclosingModule() = cls and
s.getEnclosingMethod() = cls.getAMethod() and
s = this.asExpr().getExpr()
exists(MethodBase m |
m = this.getCfgScope() and
m.getEnclosingModule() = cls and
m = cls.getAMethod()
)
}
@@ -314,3 +316,146 @@ private class ActiveRecordInstanceMethodCall extends DataFlow::CallNode {
ActiveRecordInstance getInstance() { result = instance }
}
/**
* Provides modeling relating to the `ActiveRecord::Persistence` module.
*/
private module Persistence {
/**
* Holds if there is a hash literal argument to `call` at `argIndex`
* containing a KV pair with value `value`.
*/
private predicate hashArgumentWithValue(
DataFlow::CallNode call, int argIndex, DataFlow::ExprNode value
) {
exists(ExprNodes::HashLiteralCfgNode hash, ExprNodes::PairCfgNode pair |
hash = call.getArgument(argIndex).asExpr() and
pair = hash.getAKeyValuePair()
|
value.asExpr() = pair.getValue()
)
}
/**
* Holds if `call` has a keyword argument of with value `value`.
*/
private predicate keywordArgumentWithValue(DataFlow::CallNode call, DataFlow::ExprNode value) {
exists(ExprNodes::PairCfgNode pair | pair = call.getArgument(_).asExpr() |
value.asExpr() = pair.getValue()
)
}
/** A call to e.g. `User.create(name: "foo")` */
private class CreateLikeCall extends DataFlow::CallNode, PersistentWriteAccess::Range {
CreateLikeCall() {
exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and
this.getMethodName() =
[
"create", "create!", "create_or_find_by", "create_or_find_by!", "find_or_create_by",
"find_or_create_by!", "insert", "insert!"
]
}
override DataFlow::Node getValue() {
// attrs as hash elements in arg0
hashArgumentWithValue(this, 0, result) or
keywordArgumentWithValue(this, result)
}
}
/** A call to e.g. `User.update(1, name: "foo")` */
private class UpdateLikeClassMethodCall extends DataFlow::CallNode, PersistentWriteAccess::Range {
UpdateLikeClassMethodCall() {
exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and
this.getMethodName() = ["update", "update!", "upsert"]
}
override DataFlow::Node getValue() {
keywordArgumentWithValue(this, result)
or
// Case where 2 array args are passed - the first an array of IDs, and the
// second an array of hashes - each hash corresponding to an ID in the
// first array.
exists(ExprNodes::ArrayLiteralCfgNode hashesArray |
this.getArgument(0).asExpr() instanceof ExprNodes::ArrayLiteralCfgNode and
hashesArray = this.getArgument(1).asExpr()
|
exists(ExprNodes::HashLiteralCfgNode hash, ExprNodes::PairCfgNode pair |
hash = hashesArray.getArgument(_) and
pair = hash.getAKeyValuePair()
|
result.asExpr() = pair.getValue()
)
)
}
}
/** A call to e.g. `User.insert_all([{name: "foo"}, {name: "bar"}])` */
private class InsertAllLikeCall extends DataFlow::CallNode, PersistentWriteAccess::Range {
private ExprNodes::ArrayLiteralCfgNode arr;
InsertAllLikeCall() {
exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and
this.getMethodName() = ["insert_all", "insert_all!", "upsert_all"] and
arr = this.getArgument(0).asExpr()
}
override DataFlow::Node getValue() {
// attrs as hash elements of members of array arg0
exists(ExprNodes::HashLiteralCfgNode hash, ExprNodes::PairCfgNode pair |
hash = arr.getArgument(_) and
pair = hash.getAKeyValuePair()
|
result.asExpr() = pair.getValue()
)
}
}
/** A call to e.g. `user.update(name: "foo")` */
private class UpdateLikeInstanceMethodCall extends PersistentWriteAccess::Range,
ActiveRecordInstanceMethodCall {
UpdateLikeInstanceMethodCall() {
this.getMethodName() = ["update", "update!", "update_attributes", "update_attributes!"]
}
override DataFlow::Node getValue() {
// attrs as hash elements in arg0
hashArgumentWithValue(this, 0, result)
or
// keyword arg
keywordArgumentWithValue(this, result)
}
}
/** A call to e.g. `user.update_attribute(name, "foo")` */
private class UpdateAttributeCall extends PersistentWriteAccess::Range,
ActiveRecordInstanceMethodCall {
UpdateAttributeCall() { this.getMethodName() = "update_attribute" }
override DataFlow::Node getValue() {
// e.g. `foo.update_attribute(key, value)`
result = this.getArgument(1)
}
}
/**
* An assignment like `user.name = "foo"`. Though this does not write to the
* database without a subsequent call to persist the object, it is considered
* as an `PersistentWriteAccess` to avoid missing cases where the path to a
* subsequent write is not clear.
*/
private class AssignAttribute extends PersistentWriteAccess::Range {
private ExprNodes::AssignExprCfgNode assignNode;
AssignAttribute() {
exists(DataFlow::CallNode setter |
assignNode = this.asExpr() and
setter.getArgument(0) = this and
setter instanceof ActiveRecordInstanceMethodCall and
setter.asExpr().getExpr() instanceof SetterMethodCall
)
}
override DataFlow::Node getValue() { assignNode.getRhs() = result.asExpr() }
}
}

View File

@@ -7,6 +7,7 @@ private import codeql.ruby.ApiGraphs
private import codeql.ruby.Concepts
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.FlowSummary
private import codeql.ruby.frameworks.data.ModelsAsData
/** A call to `ActiveStorage::Filename#sanitized`, considered as a path sanitizer. */
class ActiveStorageFilenameSanitizedCall extends Path::PathSanitization::Range, DataFlow::CallNode {
@@ -17,43 +18,13 @@ class ActiveStorageFilenameSanitizedCall extends Path::PathSanitization::Range,
}
}
/** The taint summary for `ActiveStorage::Filename.new`. */
class ActiveStorageFilenameNewSummary extends SummarizedCallable {
ActiveStorageFilenameNewSummary() { this = "ActiveStorage::Filename.new" }
override MethodCall getACall() {
result =
API::getTopLevelMember("ActiveStorage")
.getMember("Filename")
.getAnInstantiation()
.asExpr()
.getExpr()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** The taint summary for `ActiveStorage::Filename#sanitized`. */
class ActiveStorageFilenameSanitizedSummary extends SummarizedCallable {
ActiveStorageFilenameSanitizedSummary() { this = "ActiveStorage::Filename#sanitized" }
override MethodCall getACall() {
result =
API::getTopLevelMember("ActiveStorage")
.getMember("Filename")
.getInstance()
.getAMethodCall("sanitized")
.asExpr()
.getExpr()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[-1]" and
output = "ReturnValue" and
preservesValue = false
/** Taint related to `ActiveStorage::Filename`. */
private class Summaries extends ModelInput::SummaryModelCsv {
override predicate row(string row) {
row =
[
"activestorage;;Member[ActiveStorage].Member[Filename].Method[new];Argument[0];ReturnValue;taint",
"activestorage;;Member[ActiveStorage].Member[Filename].Instance.Method[sanitized];Receiver;ReturnValue;taint",
]
}
}

View File

@@ -69,7 +69,10 @@ abstract private class IOOrFileMethodCall extends DataFlow::CallNode {
}
/** Gets the API used to perform this call, either "IO" or "File" */
abstract string getAPI();
abstract string getApi();
/** DEPRECATED: Alias for getApi */
deprecated string getAPI() { result = this.getApi() }
/** Gets a node representing the data read or written by this call */
abstract DataFlow::Node getADataNodeImpl();
@@ -110,7 +113,10 @@ private class IOOrFileReadMethodCall extends IOOrFileMethodCall {
)
}
override string getAPI() { result = api }
override string getApi() { result = api }
/** DEPRECATED: Alias for getApi */
deprecated override string getAPI() { result = this.getApi() }
override DataFlow::Node getADataNodeImpl() { result = this }
@@ -151,7 +157,10 @@ private class IOOrFileWriteMethodCall extends IOOrFileMethodCall {
)
}
override string getAPI() { result = api }
override string getApi() { result = api }
/** DEPRECATED: Alias for getApi */
deprecated override string getAPI() { result = this.getApi() }
override DataFlow::Node getADataNodeImpl() { result = dataNode }
@@ -180,12 +189,6 @@ module IO {
}
}
// "Direct" `IO` instances, i.e. cases where there is no more specific
// subtype such as `File`
private class IOInstanceStrict extends IOInstance {
IOInstanceStrict() { this = ioInstance() }
}
/**
* A `DataFlow::CallNode` that reads data using the `IO` class. For example,
* the `read` and `readline` calls in:
@@ -202,7 +205,7 @@ module IO {
* that use a subclass of `IO` such as `File`.
*/
class IOReader extends IOOrFileReadMethodCall {
IOReader() { this.getAPI() = "IO" }
IOReader() { this.getApi() = "IO" }
}
/**
@@ -221,7 +224,7 @@ module IO {
* that use a subclass of `IO` such as `File`.
*/
class IOWriter extends IOOrFileWriteMethodCall {
IOWriter() { this.getAPI() = "IO" }
IOWriter() { this.getApi() = "IO" }
}
/**
@@ -306,7 +309,7 @@ module File {
* ```
*/
class FileModuleReader extends IO::FileReader {
FileModuleReader() { this.getAPI() = "File" }
FileModuleReader() { this.getApi() = "File" }
override DataFlow::Node getADataNode() { result = this.getADataNodeImpl() }

View File

@@ -221,7 +221,7 @@ private class GraphqlSchemaObjectClassMethodCall extends MethodCall {
recvCls.getModule() = resolveConstantReadAccess(this.getReceiver())
or
// e.g. self.some_method(...) within a graphql Object or Interface
this.getReceiver() instanceof Self and
this.getReceiver() instanceof SelfVariableAccess and
this.getEnclosingModule() = recvCls
}

View File

@@ -18,20 +18,15 @@ module Kernel {
* providing a specific receiver as in `Kernel.exit`.
*/
class KernelMethodCall extends DataFlow::CallNode {
private MethodCall methodCall;
KernelMethodCall() {
methodCall = this.asExpr().getExpr() and
this = API::getTopLevelMember("Kernel").getAMethodCall(_)
or
this.asExpr().getExpr() instanceof UnknownMethodCall and
(
this = API::getTopLevelMember("Kernel").getAMethodCall(_)
this.getReceiver().asExpr().getExpr() instanceof SelfVariableAccess and
isPrivateKernelMethod(this.getMethodName())
or
methodCall instanceof UnknownMethodCall and
(
this.getReceiver().asExpr().getExpr() instanceof Self and
isPrivateKernelMethod(methodCall.getMethodName())
or
isPublicKernelMethod(methodCall.getMethodName())
)
isPublicKernelMethod(this.getMethodName())
)
}
}

View File

@@ -4,24 +4,16 @@
private import codeql.ruby.ApiGraphs
private import codeql.ruby.dataflow.FlowSummary
private import codeql.ruby.frameworks.data.ModelsAsData
/**
* Provides modeling for the `Regexp` class.
*/
module Regexp {
/** A flow summary for `Regexp.escape` and its alias, `Regexp.quote`. */
class RegexpEscapeSummary extends SummarizedCallable {
RegexpEscapeSummary() { this = "Regexp.escape" }
override MethodCall getACall() {
result =
API::getTopLevelMember("Regexp").getAMethodCall(["escape", "quote"]).asExpr().getExpr()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
class RegexpEscapeSummary extends ModelInput::SummaryModelCsv {
override predicate row(string row) {
row = ";;Member[Regexp].Method[escape,quote];Argument[0];ReturnValue;taint"
}
}
}

View File

@@ -0,0 +1,31 @@
/**
* Provides classes for contributing a model, or using the interpreted results
* of a model represented as data.
*
* - Use the `ModelInput` module to contribute new models.
* - Use the `ModelOutput` module to access the model results in terms of API nodes.
*
* The `package` part of a CSV row should be the name of a Ruby gem, or the empty
* string if it's referring to the standard library.
*
* The `type` part can be one of the following:
* - the empty string, referring to the global scope,
* - the string `any`, referring to any expression, or
* - the name of a type definition from `ModelInput::TypeModelCsv`
*/
private import ruby
private import internal.ApiGraphModels as Shared
private import internal.ApiGraphModelsSpecific as Specific
import Shared::ModelInput as ModelInput
import Shared::ModelOutput as ModelOutput
private import codeql.ruby.dataflow.RemoteFlowSources
/**
* A remote flow source originating from a CSV source row.
*/
private class RemoteFlowSourceFromCsv extends RemoteFlowSource::Range {
RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").getAnImmediateUse() }
override string getSourceType() { result = "Remote flow (from model)" }
}

View File

@@ -0,0 +1,522 @@
/**
* INTERNAL use only. This is an experimental API subject to change without notice.
*
* Provides classes and predicates for dealing with flow models specified in CSV format.
*
* The CSV specification has the following columns:
* - Sources:
* `package; type; path; kind`
* - Sinks:
* `package; type; path; kind`
* - Summaries:
* `package; type; path; input; output; kind`
* - Types:
* `package1; type1; package2; type2; path`
*
* The interpretation of a row is similar to API-graphs with a left-to-right
* reading.
* 1. The `package` column selects a package name, as it would be referenced in the source code,
* such as an NPM package, PIP package, or Ruby gem. (See `ModelsAsData.qll` for language-specific details).
* It may also be a synthetic package used for a type definition (see type definitions below).
* 2. The `type` column selects all instances of a named type originating from that package,
* or the empty string if referring to the package itself.
* It can also be a synthetic type name defined by a type definition (see type definitions below).
* 3. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `package` and `type`.
*
* Every language supports the following tokens:
* - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on.
* - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
* - ReturnValue: the value returned by a function call
* - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list.
*
* The following tokens are common and should be implemented for languages where it makes sense:
* - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names.
* - Instance: an instance of a class
* - Subclass: a subclass of a class
* - ArrayElement: an element of array
* - Element: an element of a collection-like object
* - MapKey: a key in map-like object
* - MapValue: a value in a map-like object
* - Awaited: the value from a resolved promise/future-like object
*
* For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported.
*
* 4. The `input` and `output` columns specify how data enters and leaves the element selected by the
* first `(package, type, path)` tuple. Both strings are `.`-separated access paths
* of the same syntax as the `path` column.
* 5. The `kind` column is a tag that can be referenced from QL to determine to
* which classes the interpreted elements should be added. For example, for
* sources `"remote"` indicates a default remote flow source, and for summaries
* `"taint"` indicates a default additional taint step and `"value"` indicates a
* globally applicable value-preserving step.
*
* ### Types
*
* A type row of form `package1; type1; package2; type2; path` indicates that `package2; type2; path`
* should be seen as an instance of the type `package1; type1`.
*
* A `(package,type)` pair may refer to a static type or a synthetic type name used internally in the model.
* Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same
* element.
* See `ModelsAsData.qll` for the langauge-specific interpretation of packages and static type names.
*
* By convention, if one wants to avoid clashes with static types from the package, the type name
* should be prefixed with a tilde character (`~`). For example, `(foo, ~Bar)` can be used to indicate that
* the type is related to the `foo` package but is not intended to match a static type.
*/
private import ApiGraphModelsSpecific as Specific
private class Unit = Specific::Unit;
private module API = Specific::API;
private import Specific::AccessPathSyntax
/** Module containing hooks for providing input data to be interpreted as a model. */
module ModelInput {
/**
* A unit class for adding additional source model rows.
*
* Extend this class to add additional source definitions.
*/
class SourceModelCsv extends Unit {
/**
* Holds if `row` specifies a source definition.
*
* A row of form
* ```
* package;type;path;kind
* ```
* indicates that the value at `(package, type, path)` should be seen as a flow
* source of the given `kind`.
*
* The kind `remote` represents a general remote flow source.
*/
abstract predicate row(string row);
}
/**
* A unit class for adding additional sink model rows.
*
* Extend this class to add additional sink definitions.
*/
class SinkModelCsv extends Unit {
/**
* Holds if `row` specifies a sink definition.
*
* A row of form
* ```
* package;type;path;kind
* ```
* indicates that the value at `(package, type, path)` should be seen as a sink
* of the given `kind`.
*/
abstract predicate row(string row);
}
/**
* A unit class for adding additional summary model rows.
*
* Extend this class to add additional flow summary definitions.
*/
class SummaryModelCsv extends Unit {
/**
* Holds if `row` specifies a summary definition.
*
* A row of form
* ```
* package;type;path;input;output;kind
* ```
* indicates that for each call to `(package, type, path)`, the value referred to by `input`
* can flow to the value referred to by `output`.
*
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
* respectively.
*/
abstract predicate row(string row);
}
/**
* A unit class for adding additional type model rows.
*
* Extend this class to add additional type definitions.
*/
class TypeModelCsv extends Unit {
/**
* Holds if `row` specifies a type definition.
*
* A row of form,
* ```
* package1;type1;package2;type2;path
* ```
* indicates that `(package2, type2, path)` should be seen as an instance of `(package1, type1)`.
*/
abstract predicate row(string row);
}
}
private import ModelInput
/**
* An empty class, except in specific tests.
*
* If this is non-empty, all models are parsed even if the package is not
* considered relevant for the current database.
*/
abstract class TestAllModels extends Unit { }
/**
* Append `;dummy` to the value of `s` to work around the fact that `string.split(delim,n)`
* does not preserve empty trailing substrings.
*/
bindingset[result]
private string inversePad(string s) { s = result + ";dummy" }
private predicate sourceModel(string row) { any(SourceModelCsv s).row(inversePad(row)) }
private predicate sinkModel(string row) { any(SinkModelCsv s).row(inversePad(row)) }
private predicate summaryModel(string row) { any(SummaryModelCsv s).row(inversePad(row)) }
private predicate typeModel(string row) { any(TypeModelCsv s).row(inversePad(row)) }
/** Holds if a source model exists for the given parameters. */
predicate sourceModel(string package, string type, string path, string kind) {
exists(string row |
sourceModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = kind
)
}
/** Holds if a sink model exists for the given parameters. */
private predicate sinkModel(string package, string type, string path, string kind) {
exists(string row |
sinkModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = kind
)
}
/** Holds if a summary model `row` exists for the given parameters. */
private predicate summaryModel(
string package, string type, string path, string input, string output, string kind
) {
exists(string row |
summaryModel(row) and
row.splitAt(";", 0) = package and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = path and
row.splitAt(";", 3) = input and
row.splitAt(";", 4) = output and
row.splitAt(";", 5) = kind
)
}
/** Holds if an type model exists for the given parameters. */
private predicate typeModel(
string package1, string type1, string package2, string type2, string path
) {
exists(string row |
typeModel(row) and
row.splitAt(";", 0) = package1 and
row.splitAt(";", 1) = type1 and
row.splitAt(";", 2) = package2 and
row.splitAt(";", 3) = type2 and
row.splitAt(";", 4) = path
)
}
/**
* Gets a package that should be seen as an alias for the given other `package`,
* or the `package` itself.
*/
bindingset[package]
bindingset[result]
string getAPackageAlias(string package) {
typeModel(package, "", result, "", "")
or
result = package
}
/**
* Holds if CSV rows involving `package` might be relevant for the analysis of this database.
*/
private predicate isRelevantPackage(string package) {
(
sourceModel(package, _, _, _) or
sinkModel(package, _, _, _) or
summaryModel(package, _, _, _, _, _) or
typeModel(package, _, _, _, _)
) and
(
Specific::isPackageUsed(package)
or
exists(TestAllModels t)
)
or
exists(string other |
isRelevantPackage(other) and
typeModel(package, _, other, _, _)
)
}
/**
* Holds if `package,type,path` is used in some CSV row.
*/
pragma[nomagic]
predicate isRelevantFullPath(string package, string type, string path) {
isRelevantPackage(package) and
(
sourceModel(package, type, path, _) or
sinkModel(package, type, path, _) or
summaryModel(package, type, path, _, _, _) or
typeModel(_, _, package, type, path)
)
}
/** A string from a CSV row that should be parsed as an access path. */
private class AccessPathRange extends AccessPath::Range {
AccessPathRange() {
isRelevantFullPath(_, _, this)
or
exists(string package | isRelevantPackage(package) |
summaryModel(package, _, _, this, _, _) or
summaryModel(package, _, _, _, this, _)
)
}
}
/**
* Gets a successor of `node` in the API graph.
*/
bindingset[token]
API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
// API graphs use the same label for arguments and parameters. An edge originating from a
// use-node represents be an argument, and an edge originating from a def-node represents a parameter.
// We just map both to the same thing.
token.getName() = ["Argument", "Parameter"] and
result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument()))
or
token.getName() = "ReturnValue" and
result = node.getReturn()
or
// Language-specific tokens
result = Specific::getExtraSuccessorFromNode(node, token)
}
/**
* Gets an API-graph successor for the given invocation.
*/
bindingset[token]
API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) {
token.getName() = "Argument" and
result =
invoke
.getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument()))
or
token.getName() = "ReturnValue" and
result = invoke.getReturn()
or
// Language-specific tokens
result = Specific::getExtraSuccessorFromInvoke(invoke, token)
}
/**
* Holds if `invoke` invokes a call-site filter given by `token`.
*/
pragma[inline]
private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) {
token.getName() = "WithArity" and
invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument())
or
Specific::invocationMatchesExtraCallSiteFilter(invoke, token)
}
/**
* Gets the API node identified by the first `n` tokens of `path` in the given `(package, type, path)` tuple.
*/
pragma[nomagic]
private API::Node getNodeFromPath(string package, string type, AccessPath path, int n) {
isRelevantFullPath(package, type, path) and
(
n = 0 and
exists(string package2, string type2, AccessPath path2 |
typeModel(package, type, package2, type2, path2) and
result = getNodeFromPath(package2, type2, path2, path2.getNumToken())
)
or
// Language-specific cases, such as handling of global variables
result = Specific::getExtraNodeFromPath(package, type, path, n)
)
or
result = getSuccessorFromNode(getNodeFromPath(package, type, path, n - 1), path.getToken(n - 1))
or
// Similar to the other recursive case, but where the path may have stepped through one or more call-site filters
result =
getSuccessorFromInvoke(getInvocationFromPath(package, type, path, n - 1), path.getToken(n - 1))
}
/** Gets the node identified by the given `(package, type, path)` tuple. */
API::Node getNodeFromPath(string package, string type, AccessPath path) {
result = getNodeFromPath(package, type, path, path.getNumToken())
}
/**
* Gets an invocation identified by the given `(package, type, path)` tuple.
*
* Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters.
*/
Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path, int n) {
result = Specific::getAnInvocationOf(getNodeFromPath(package, type, path, n))
or
result = getInvocationFromPath(package, type, path, n - 1) and
invocationMatchesCallSiteFilter(result, path.getToken(n - 1))
}
/** Gets an invocation identified by the given `(package, type, path)` tuple. */
Specific::InvokeNode getInvocationFromPath(string package, string type, AccessPath path) {
result = getInvocationFromPath(package, type, path, path.getNumToken())
}
/**
* Holds if `name` is a valid name for an access path token in the identifying access path.
*/
bindingset[name]
predicate isValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Argument", "Parameter", "ReturnValue", "WithArity"]
or
Specific::isExtraValidTokenNameInIdentifyingAccessPath(name)
}
/**
* Holds if `name` is a valid name for an access path token with no arguments, occuring
* in an identifying access path.
*/
bindingset[name]
predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = "ReturnValue"
or
Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name)
}
/**
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
* in an identifying access path.
*/
bindingset[name, argument]
predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Argument", "Parameter"] and
argument.regexpMatch("(N-|-)?\\d+(\\.\\.(N-|-)?\\d+)?")
or
name = "WithArity" and
argument.regexpMatch("\\d+(\\.\\.\\d+)?")
or
Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument)
}
/**
* Module providing access to the imported models in terms of API graph nodes.
*/
module ModelOutput {
/**
* Holds if a CSV source model contributed `source` with the given `kind`.
*/
API::Node getASourceNode(string kind) {
exists(string package, string type, string path |
sourceModel(package, type, path, kind) and
result = getNodeFromPath(package, type, path)
)
}
/**
* Holds if a CSV sink model contributed `sink` with the given `kind`.
*/
API::Node getASinkNode(string kind) {
exists(string package, string type, string path |
sinkModel(package, type, path, kind) and
result = getNodeFromPath(package, type, path)
)
}
/**
* Holds if a relevant CSV summary exists for these parameters.
*/
predicate relevantSummaryModel(
string package, string type, string path, string input, string output, string kind
) {
isRelevantPackage(package) and
summaryModel(package, type, path, input, output, kind)
}
/**
* Holds if a `baseNode` is an invocation identified by the `package,type,path` part of a summary row.
*/
predicate resolvedSummaryBase(
string package, string type, string path, Specific::InvokeNode baseNode
) {
summaryModel(package, type, path, _, _, _) and
baseNode = getInvocationFromPath(package, type, path)
}
/**
* Holds if `node` is seen as an instance of `(package,type)` due to a type definition
* contributed by a CSV model.
*/
API::Node getATypeNode(string package, string type) {
exists(string package2, string type2, AccessPath path |
typeModel(package, type, package2, type2, path) and
result = getNodeFromPath(package2, type2, path)
)
}
/**
* Gets an error message relating to an invalid CSV row in a model.
*/
string getAWarning() {
// Check number of columns
exists(string row, string kind, int expectedArity, int actualArity |
any(SourceModelCsv csv).row(row) and kind = "source" and expectedArity = 4
or
any(SinkModelCsv csv).row(row) and kind = "sink" and expectedArity = 4
or
any(SummaryModelCsv csv).row(row) and kind = "summary" and expectedArity = 6
or
any(TypeModelCsv csv).row(row) and kind = "type" and expectedArity = 5
|
actualArity = count(row.indexOf(";")) + 1 and
actualArity != expectedArity and
result =
"CSV " + kind + " row should have " + expectedArity + " columns but has " + actualArity +
": " + row
)
or
// Check names and arguments of access path tokens
exists(AccessPath path, AccessPathToken token |
isRelevantFullPath(_, _, path) and
token = path.getToken(_)
|
not isValidTokenNameInIdentifyingAccessPath(token.getName()) and
result = "Invalid token name '" + token.getName() + "' in access path: " + path
or
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
exists(string argument |
argument = token.getAnArgument() and
not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and
result =
"Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path
)
or
isValidTokenNameInIdentifyingAccessPath(token.getName()) and
token.getNumArgument() = 0 and
not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and
result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path
)
}
}

View File

@@ -0,0 +1,168 @@
/**
* Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`.
*
* It must export the following members:
* ```ql
* class Unit // a unit type
* class InvokeNode // a type representing an invocation connected to the API graph
* module API // the API graph module
* predicate isPackageUsed(string package)
* API::Node getExtraNodeFromPath(string package, string type, string path, int n)
* API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token)
* API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token)
* predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token)
* InvokeNode getAnInvocationOf(API::Node node)
* ```
*/
private import ruby
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import ApiGraphModels
class Unit = DataFlowPrivate::Unit;
// Re-export libraries needed by ApiGraphModels.qll
import codeql.ruby.ApiGraphs
import codeql.ruby.dataflow.internal.AccessPathSyntax as AccessPathSyntax
private import AccessPathSyntax
/**
* Holds if models describing `package` may be relevant for the analysis of this database.
*
* In the context of Ruby, this is the name of a Ruby gem.
*/
bindingset[package]
predicate isPackageUsed(string package) {
// For now everything is modelled as an access path starting at any top-level, so the package name has no effect.
//
// We allow an arbitrary package name so that the model can record the name of the package in case it's needed in the future.
//
// In principle we should consider a package to be "used" if there is a transitive dependency on it, but we can only
// reliably see the direct dependencies.
//
// In practice, packages try to use unique top-level module names, which mitigates the precision loss of not checking
// the package name.
any()
}
/** Gets a Ruby-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */
bindingset[package, type, path]
API::Node getExtraNodeFromPath(string package, string type, AccessPath path, int n) {
isRelevantFullPath(package, type, path) and
exists(package) and // Allow any package name, see `isPackageUsed`.
type = "" and
n = 0 and
result = API::root()
or
// A row of form `;any;Method[foo]` should match any method named `foo`.
exists(package) and
type = "any" and
n = 1 and
exists(EntryPointFromAnyType entry |
methodMatchedByName(path, entry.getName()) and
result = entry.getANode()
)
}
/**
* Holds if `path` occurs in a CSV row with type `any`, meaning it can start
* matching anywhere, and the path begins with `Method[methodName]`.
*/
private predicate methodMatchedByName(AccessPath path, string methodName) {
isRelevantFullPath(_, "any", path) and
exists(AccessPathToken token |
token = path.getToken(0) and
token.getName() = "Method" and
methodName = token.getAnArgument()
)
}
/**
* An API graph entry point corresponding to a method name such as `foo` in `;any;Method[foo]`.
*
* This ensures that the API graph rooted in that method call is materialized.
*/
private class EntryPointFromAnyType extends API::EntryPoint {
string name;
EntryPointFromAnyType() { this = "AnyMethod[" + name + "]" and methodMatchedByName(_, name) }
override DataFlow::CallNode getACall() { result.getMethodName() = name }
string getName() { result = name }
}
/**
* Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`.
*/
bindingset[token]
API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
token.getName() = "Member" and
result = node.getMember(token.getAnArgument())
or
token.getName() = "Method" and
result = node.getMethod(token.getAnArgument())
or
token.getName() = "Instance" and
result = node.getInstance()
or
token.getName() = "BlockArgument" and
result = node.getBlock()
// Note: The "ArrayElement" token is not implemented yet, as it ultimately requires type-tracking and
// API graphs to be aware of the steps involving ArrayElement contributed by the standard library model.
// Type-tracking cannot summarize function calls on its own, so it doesn't benefit from synthesized callables.
}
/**
* Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`.
*/
bindingset[token]
API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) { none() }
/**
* Holds if `invoke` matches the Ruby-specific call site filter in `token`.
*/
bindingset[token]
predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token) {
token.getName() = "WithBlock" and
exists(invoke.getBlock())
or
token.getName() = "WithoutBlock" and
not exists(invoke.getBlock())
}
/** An API graph node representing a method call. */
class InvokeNode extends API::MethodAccessNode {
/** Gets the number of arguments to the call. */
int getNumArgument() { result = getCallNode().getNumberOfArguments() }
}
/** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */
InvokeNode getAnInvocationOf(API::Node node) { result = node }
/**
* Holds if `name` is a valid name for an access path token in the identifying access path.
*/
bindingset[name]
predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Member", "Method", "Instance", "WithBlock", "WithoutBlock", "BlockArgument"]
}
/**
* Holds if `name` is a valid name for an access path token with no arguments, occuring
* in an identifying access path.
*/
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = ["Instance", "WithBlock", "WithoutBlock", "BlockArgument"]
}
/**
* Holds if `argument` is a valid argument to an access path token with the given `name`, occurring
* in an identifying access path.
*/
bindingset[name, argument]
predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Member", "Method"] and
exists(argument)
}

View File

@@ -176,8 +176,8 @@ private module RegexpMatching {
}
/** A class to test whether a regular expression matches certain HTML tags. */
class HTMLMatchingRegExp extends RegexpMatching::MatchedRegExp {
HTMLMatchingRegExp() {
class HtmlMatchingRegExp extends RegexpMatching::MatchedRegExp {
HtmlMatchingRegExp() {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
@@ -204,12 +204,15 @@ class HTMLMatchingRegExp extends RegexpMatching::MatchedRegExp {
}
}
/** DEPRECATED: Alias for HtmlMatchingRegExp */
deprecated class HTMLMatchingRegExp = HtmlMatchingRegExp;
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*
* When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
*/
predicate isBadRegexpFilter(HTMLMatchingRegExp regexp, string msg) {
predicate isBadRegexpFilter(HtmlMatchingRegExp regexp, string msg) {
// CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
regexp.matches("<!-- foo -->") and
regexp.matches("<!-- foo --!>") and

View File

@@ -13,8 +13,8 @@ import codeql.ruby.TaintTracking
/**
* Provides a taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
module ReflectedXSS {
import XSS::ReflectedXSS
module ReflectedXss {
import XSS::ReflectedXss
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
@@ -33,7 +33,10 @@ module ReflectedXSS {
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalXSSTaintStep(node1, node2)
isAdditionalXssTaintStep(node1, node2)
}
}
}
/** DEPRECATED: Alias for ReflectedXss */
deprecated module ReflectedXSS = ReflectedXss;

View File

@@ -11,8 +11,9 @@ import ruby
import codeql.ruby.DataFlow
import codeql.ruby.TaintTracking
module StoredXSS {
import XSS::StoredXSS
/** Provides a taint-tracking configuration for cross-site scripting vulnerabilities. */
module StoredXss {
import XSS::StoredXss
/**
* A taint-tracking configuration for reasoning about Stored XSS.
@@ -34,7 +35,10 @@ module StoredXSS {
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalXSSTaintStep(node1, node2)
isAdditionalXssTaintStep(node1, node2)
}
}
}
/** DEPRECATED: Alias for StoredXss */
deprecated module StoredXSS = StoredXss;

View File

@@ -245,7 +245,7 @@ private module Shared {
/**
* An additional step that is preserves dataflow in the context of XSS.
*/
predicate isAdditionalXSSFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
predicate isAdditionalXssFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isFlowFromLocals(node1, node2)
or
isFlowFromControllerInstanceVariable(node1, node2)
@@ -254,6 +254,9 @@ private module Shared {
or
isFlowFromHelperMethod(node1, node2)
}
/** DEPRECATED: Alias for isAdditionalXssFlowStep */
deprecated predicate isAdditionalXSSFlowStep = isAdditionalXssFlowStep/2;
}
/**
@@ -261,7 +264,7 @@ private module Shared {
* "reflected cross-site scripting" vulnerabilities, as well as
* extension points for adding your own.
*/
module ReflectedXSS {
module ReflectedXss {
/** A data flow source for stored XSS vulnerabilities. */
abstract class Source extends Shared::Source { }
@@ -277,7 +280,10 @@ module ReflectedXSS {
/**
* An additional step that is preserves dataflow in the context of reflected XSS.
*/
predicate isAdditionalXSSTaintStep = Shared::isAdditionalXSSFlowStep/2;
predicate isAdditionalXssTaintStep = Shared::isAdditionalXssFlowStep/2;
/** DEPRECATED: Alias for isAdditionalXssTaintStep */
deprecated predicate isAdditionalXSSTaintStep = isAdditionalXssTaintStep/2;
/**
* A source of remote user input, considered as a flow source.
@@ -285,6 +291,9 @@ module ReflectedXSS {
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
}
/** DEPRECATED: Alias for ReflectedXss */
deprecated module ReflectedXSS = ReflectedXss;
private module OrmTracking {
/**
* A data flow configuration to track flow from finder calls to field accesses.
@@ -298,7 +307,7 @@ private module OrmTracking {
override predicate isSink(DataFlow2::Node sink) { sink instanceof DataFlow2::CallNode }
override predicate isAdditionalFlowStep(DataFlow2::Node node1, DataFlow2::Node node2) {
Shared::isAdditionalXSSFlowStep(node1, node2)
Shared::isAdditionalXssFlowStep(node1, node2)
or
// Propagate flow through arbitrary method calls
node2.(DataFlow2::CallNode).getReceiver() = node1
@@ -309,7 +318,8 @@ private module OrmTracking {
}
}
module StoredXSS {
/** Provides default sources, sinks and sanitizers for detecting stored cross-site scripting (XSS) vulnerabilities. */
module StoredXss {
/** A data flow source for stored XSS vulnerabilities. */
abstract class Source extends Shared::Source { }
@@ -325,7 +335,10 @@ module StoredXSS {
/**
* An additional step that preserves dataflow in the context of stored XSS.
*/
predicate isAdditionalXSSTaintStep = Shared::isAdditionalXSSFlowStep/2;
predicate isAdditionalXssTaintStep = Shared::isAdditionalXssFlowStep/2;
/** DEPRECATED: Alias for isAdditionalXssTaintStep */
deprecated predicate isAdditionalXSSTaintStep = isAdditionalXssTaintStep/2;
private class OrmFieldAsSource extends Source instanceof DataFlow2::CallNode {
OrmFieldAsSource() {
@@ -341,3 +354,6 @@ module StoredXSS {
private class FileSystemReadAccessAsSource extends Source instanceof FileSystemReadAccess { }
// TODO: Consider `FileNameSource` flowing to script tag `src` attributes and similar
}
/** DEPRECATED: Alias for StoredXss */
deprecated module StoredXSS = StoredXss;

View File

@@ -402,7 +402,8 @@ abstract class RegExp extends AST::StringlikeLiteral {
not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end) and
not exists(int x, int y |
this.pStyleNamedCharacterProperty(x, y, _) and x <= start and y >= end
)
) and
not exists(int x, int y | this.multiples(x, y, _, _) and x <= start and y >= end)
}
predicate normalCharacter(int start, int end) {
@@ -479,6 +480,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
/** Gets the number of the group in start,end */
int getGroupNumber(int start, int end) {
this.group(start, end) and
not this.nonCapturingGroupStart(start, _) and
result =
count(int i | this.group(i, _) and i < start and not this.nonCapturingGroupStart(i, _)) + 1
}
@@ -488,7 +490,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
this.group(start, end) and
exists(int nameEnd |
this.namedGroupStart(start, nameEnd) and
result = this.getText().substring(start + 4, nameEnd - 1)
result = this.getText().substring(start + 3, nameEnd - 1)
)
}
@@ -582,7 +584,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
private predicate nonCapturingGroupStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) = ":" and
this.getChar(start + 2) = [":", "=", "<", "!", "#"] and
end = start + 3
}
@@ -861,6 +863,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
* Whether the text in the range start,end is an alternation
*/
predicate alternation(int start, int end) {
not this.inCharSet(start) and
this.topLevel(start, end) and
exists(int less | this.subalternation(start, less, _) and less < end)
}

View File

@@ -119,18 +119,18 @@ class EmptyPositiveSubPatttern extends RegExpSubPattern {
* whose root node is not a disjunction.
*/
class RegExpRoot extends RegExpTerm {
RegExpParent parent;
RegExpRoot() {
exists(RegExpAlt alt |
alt.isRootTerm() and
this = alt.getAChild() and
parent = alt.getParent()
exists(RegExpParent parent |
exists(RegExpAlt alt |
alt.isRootTerm() and
this = alt.getAChild() and
parent = alt.getParent()
)
or
this.isRootTerm() and
not this instanceof RegExpAlt and
parent = this.getParent()
)
or
this.isRootTerm() and
not this instanceof RegExpAlt and
parent = this.getParent()
}
/**
@@ -466,13 +466,14 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
exists(RegExpTerm cc |
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
)
}
override string getARelevantChar() {
@@ -504,13 +505,14 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
exists(RegExpTerm cc |
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
)
}
override string getARelevantChar() {

View File

@@ -1,5 +1,6 @@
private import codeql.ruby.ast.Literal as AST
private import ParseRegExp
private import codeql.NumberUtils
import codeql.Locations
private import codeql.ruby.DataFlow
@@ -60,13 +61,13 @@ module RegExpFlags {
}
/**
* Provides regular expression patterns.
* Provides utility predicates related to regular expressions.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names in lower case.
*/
string commonTLD() {
string getACommonTld() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
}
@@ -82,7 +83,7 @@ class RegExpParent extends TRegExpParent {
RegExpTerm getChild(int i) { none() }
RegExpTerm getAChild() { result = this.getChild(_) }
final RegExpTerm getAChild() { result = this.getChild(_) }
int getNumChild() { result = count(this.getAChild()) }
@@ -254,12 +255,11 @@ newtype TRegExpParent =
class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
int part_end;
boolean maybe_empty;
boolean may_repeat_forever;
RegExpQuantifier() {
this = TRegExpQuantifier(re, start, end) and
re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
re.qualifiedPart(start, part_end, end, _, may_repeat_forever)
}
override RegExpTerm getChild(int i) {
@@ -398,6 +398,8 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
override string getAPrimaryQlClass() { result = "RegExpAlt" }
}
class RegExpCharEscape = RegExpEscape;
class RegExpEscape extends RegExpNormalChar {
RegExpEscape() { re.escapedCharacter(start, end) }
@@ -418,7 +420,9 @@ class RegExpEscape extends RegExpNormalChar {
result = this.getUnicode()
}
predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t"] }
predicate isIdentityEscape() {
not this.getUnescaped() in ["n", "r", "t"] and not this.isUnicode()
}
/**
* Gets the text for this escape. That is e.g. "\w".
@@ -435,21 +439,8 @@ class RegExpEscape extends RegExpNormalChar {
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
result = parseHexInt(this.getText().suffix(2)).toUnicode()
}
string getUnescaped() { result = this.getText().suffix(1) }
@@ -457,26 +448,6 @@ class RegExpEscape extends RegExpNormalChar {
override string getAPrimaryQlClass() { result = "RegExpEscape" }
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* A word boundary, that is, a regular expression term of the form `\b`.
*/
@@ -637,6 +608,9 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
*/
int getNumber() { result = re.getGroupNumber(start, end) }
/** Holds if this is a capture group. */
predicate isCapture() { exists(this.getNumber()) }
/** Holds if this is a named capture group. */
predicate isNamed() { exists(this.getName()) }

View File

@@ -34,7 +34,8 @@ private module Cached {
CallStep() or
ReturnStep() or
StoreStep(ContentName content) or
LoadStep(ContentName content)
LoadStep(ContentName content) or
JumpStep()
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
cached
@@ -49,6 +50,9 @@ private module Cached {
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
or
step = JumpStep() and
result = MkTypeTracker(false, content)
)
}
@@ -67,6 +71,9 @@ private module Cached {
)
or
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
or
step = JumpStep() and
result = MkTypeBackTracker(false, content)
)
}
@@ -110,12 +117,17 @@ class StepSummary extends TStepSummary {
exists(string content | this = StoreStep(content) | result = "store " + content)
or
exists(string content | this = LoadStep(content) | result = "load " + content)
or
this instanceof JumpStep and result = "jump"
}
}
pragma[noinline]
private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = JumpStep()
or
levelStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
exists(string content |

View File

@@ -11,10 +11,32 @@ class Node = DataFlowPublic::Node;
class TypeTrackingNode = DataFlowPublic::LocalSourceNode;
/** Holds if there is a simple local flow step from `nodeFrom` to `nodeTo` */
predicate simpleLocalFlowStep = DataFlowPrivate::localFlowStepTypeTracker/2;
/**
* Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
*/
predicate jumpStep = DataFlowPrivate::jumpStep/2;
/**
* Holds if there is a summarized local flow step from `nodeFrom` to `nodeTo`,
* because there is direct flow from a parameter to a return. That is, summarized
* steps are not applied recursively.
*/
pragma[nomagic]
private predicate summarizedLocalStep(Node nodeFrom, Node nodeTo) {
exists(DataFlowPublic::ParameterNode param, DataFlowPrivate::ReturningNode returnNode |
DataFlowPrivate::LocalFlow::getParameterDefNode(param.getParameter())
.(TypeTrackingNode)
.flowsTo(returnNode) and
callStep(nodeTo.asExpr(), nodeFrom, param)
)
}
/** Holds if there is a level step from `nodeFrom` to `nodeTo`. */
predicate levelStep(Node nodeFrom, Node nodeTo) { summarizedLocalStep(nodeFrom, nodeTo) }
/**
* Gets the name of a possible piece of content. This will usually include things like
*
@@ -45,6 +67,13 @@ private predicate viableParam(
)
}
private predicate callStep(ExprNodes::CallCfgNode call, Node nodeFrom, Node nodeTo) {
exists(DataFlowDispatch::ParameterPosition pos |
argumentPositionMatch(call, nodeFrom, pos) and
viableParam(call, nodeTo, pos)
)
}
/**
* Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
*
@@ -53,19 +82,13 @@ private predicate viableParam(
* methods is done using API graphs (which uses type tracking).
*/
predicate callStep(Node nodeFrom, Node nodeTo) {
exists(ExprNodes::CallCfgNode call, DataFlowDispatch::ParameterPosition pos |
argumentPositionMatch(call, nodeFrom, pos) and
viableParam(call, nodeTo, pos)
)
callStep(_, nodeFrom, nodeTo)
or
// In normal data-flow, this will be a local flow step. But for type tracking
// we model it as a call step, in order to avoid computing a potential
// self-cross product of all calls to a function that returns one of its parameters
// (only to later filter that flow out using `TypeTracker::append`).
nodeTo =
DataFlowPrivate::LocalFlow::getParameterDefNode(nodeFrom
.(DataFlowPublic::ParameterNode)
.getParameter())
DataFlowPrivate::LocalFlow::localFlowSsaParamInput(nodeFrom, nodeTo)
}
/**

View File

@@ -1,5 +1,5 @@
name: codeql/ruby-all
version: 0.0.11-dev
version: 0.0.12-dev
groups: ruby
extractor: ruby
dbscheme: ruby.dbscheme