Merge branch 'main' into rbPoly

This commit is contained in:
erik-krogh
2023-01-24 20:51:36 +01:00
4021 changed files with 448846 additions and 146690 deletions

View File

@@ -5,78 +5,25 @@
* @id rb/alert-suppression
*/
import codeql.ruby.AST
import codeql.ruby.ast.internal.TreeSitter
private import codeql.util.suppression.AlertSuppression as AS
private import codeql.ruby.ast.internal.TreeSitter
/**
* An alert suppression comment.
*/
class SuppressionComment extends Ruby::Comment {
string annotation;
SuppressionComment() {
// suppression comments must be single-line
this.getLocation().getStartLine() = this.getLocation().getEndLine() and
exists(string text | text = commentText(this) |
// match `lgtm[...]` anywhere in the comment
annotation = text.regexpFind("(?i)\\blgtm\\s*\\[[^\\]]*\\]", _, _)
or
// match `lgtm` at the start of the comment and after semicolon
annotation = text.regexpFind("(?i)(?<=^|;)\\s*lgtm(?!\\B|\\s*\\[)", _, _).trim()
)
}
/**
* Gets the text of this suppression comment.
*/
string getText() { result = commentText(this) }
/** Gets the suppression annotation in this comment. */
string getAnnotation() { result = annotation }
/**
* Holds if this comment applies to the range from column `startcolumn` of line `startline`
* to column `endcolumn` of line `endline` in file `filepath`.
*/
predicate covers(string filepath, int startline, int startcolumn, int endline, int endcolumn) {
this.getLocation().hasLocationInfo(filepath, startline, _, endline, endcolumn) and
startcolumn = 1
}
/** Gets the scope of this suppression. */
SuppressionScope getScope() { this = result.getSuppressionComment() }
}
private string commentText(Ruby::Comment comment) { result = comment.getValue().suffix(1) }
/**
* The scope of an alert suppression comment.
*/
class SuppressionScope extends @ruby_token_comment {
SuppressionScope() { this instanceof SuppressionComment }
/** Gets a suppression comment with this scope. */
SuppressionComment getSuppressionComment() { result = this }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
class AstNode extends Ruby::Token {
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.(SuppressionComment).covers(filepath, startline, startcolumn, endline, endcolumn)
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets a textual representation of this element. */
string toString() { result = "suppression range" }
}
from SuppressionComment c
select c, // suppression comment
c.getText(), // text of suppression comment (excluding delimiters)
c.getAnnotation(), // text of suppression annotation
c.getScope() // scope of suppression
class SingleLineComment extends Ruby::Comment, AstNode {
SingleLineComment() {
// suppression comments must be single-line
this.getLocation().getStartLine() = this.getLocation().getEndLine()
}
/** Gets the suppression annotation in this comment. */
string getText() { result = this.getValue().suffix(1) }
}
import AS::Make<AstNode, SingleLineComment>

View File

@@ -1,3 +1,32 @@
## 0.5.1
### New Queries
* Added a new query, `rb/unsafe-code-construction`, to detect libraries that unsafely construct code from their inputs.
### Minor Analysis Improvements
* The `rb/unsafe-deserialization` query now recognizes input from STDIN as a source.
## 0.5.0
### New Queries
* Added a new query, `rb/stack-trace-exposure`, to detect exposure of stack-traces to users via HTTP responses.
### Minor Analysis Improvements
* The `AlertSuppression.ql` query has been updated to support the new `# codeql[query-id]` supression comments. These comments can be used to suppress an alert and must be placed on a blank line before the alert. In addition the legacy `# lgtm` and `# lgtm[query-id]` comments can now also be placed on the line before an alert.
* Extended the `rb/kernel-open` query with following sinks: `IO.write`, `IO.binread`, `IO.binwrite`, `IO.foreach`, `IO.readlines`, and `URI.open`.
## 0.4.6
No user-facing changes.
## 0.4.5
No user-facing changes.
## 0.4.4
### New Queries

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `rb/html-constructed-from-input`, to detect libraries that unsafely construct HTML from their inputs.

View File

@@ -0,0 +1,3 @@
## 0.4.5
No user-facing changes.

View File

@@ -0,0 +1,3 @@
## 0.4.6
No user-facing changes.

View File

@@ -0,0 +1,10 @@
## 0.5.0
### New Queries
* Added a new query, `rb/stack-trace-exposure`, to detect exposure of stack-traces to users via HTTP responses.
### Minor Analysis Improvements
* The `AlertSuppression.ql` query has been updated to support the new `# codeql[query-id]` supression comments. These comments can be used to suppress an alert and must be placed on a blank line before the alert. In addition the legacy `# lgtm` and `# lgtm[query-id]` comments can now also be placed on the line before an alert.
* Extended the `rb/kernel-open` query with following sinks: `IO.write`, `IO.binread`, `IO.binwrite`, `IO.foreach`, `IO.readlines`, and `URI.open`.

View File

@@ -0,0 +1,9 @@
## 0.5.1
### New Queries
* Added a new query, `rb/unsafe-code-construction`, to detect libraries that unsafely construct code from their inputs.
### Minor Analysis Improvements
* The `rb/unsafe-deserialization` query now recognizes input from STDIN as a source.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.4.4
lastReleaseVersion: 0.5.1

View File

@@ -0,0 +1,4 @@
- description: Extended and experimental security queries for Ruby
- queries: .
- apply: security-experimental-selectors.yml
from: codeql/suite-helpers

View File

@@ -7,6 +7,7 @@
* @precision medium
* @id rb/user-controlled-bypass
* @tags security
* experimental
* external/cwe/cwe-807
* external/cwe/cwe-290
*/

View File

@@ -6,7 +6,9 @@
* @security-severity 7.8
* @precision medium
* @id rb/user-controlled-file-decompression
* @tags security external/cwe/cwe-409
* @tags security
* experimental
* external/cwe/cwe-409
*/
import codeql.ruby.AST

View File

@@ -5,6 +5,7 @@
* @problem.severity warning
* @precision high
* @tags security
* experimental
* @id rb/improper-memoization
*/

View File

@@ -7,6 +7,7 @@
* @precision low
* @id rb/manually-checking-http-verb
* @tags security
* experimental
*/
import codeql.ruby.AST

View File

@@ -7,6 +7,7 @@
* @precision medium
* @id rb/weak-params
* @tags security
* experimental
*/
import codeql.ruby.AST

View File

@@ -1,5 +1,5 @@
name: codeql/ruby-queries
version: 0.4.5-dev
version: 0.5.2-dev
groups:
- ruby
- queries
@@ -8,3 +8,4 @@ defaultSuiteFile: codeql-suites/ruby-code-scanning.qls
dependencies:
codeql/ruby-all: ${workspace}
codeql/suite-helpers: ${workspace}
codeql/util: ${workspace}

View File

@@ -12,6 +12,7 @@
import codeql.ruby.AST
import codeql.ruby.dataflow.SSA
import codeql.ruby.dataflow.internal.DataFlowDispatch
from DefLoc loc, Expr src, Expr target, string kind
where
@@ -38,7 +39,12 @@ newtype DefLoc =
write = definitionOf(read.getAQualifiedName())
} or
/** A method call. */
MethodLoc(MethodCall call, Method meth) { meth = call.getATarget() } or
MethodLoc(MethodCall call, Method meth) {
meth = call.getATarget()
or
// include implicit `initialize` calls
meth = getInitializeTarget(call.getAControlFlowNode())
} or
/** A local variable. */
LocalVariableLoc(VariableReadAccess read, VariableWriteAccess write) {
exists(Ssa::WriteDefinition w |

View File

@@ -1,5 +1,5 @@
private import ruby
private import codeql.files.FileSystem
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.security.CodeInjectionCustomizations
private import codeql.ruby.security.CommandInjectionCustomizations
@@ -34,6 +34,12 @@ DataFlow::Node relevantTaintSink(string kind) {
kind = "UnsafeDeserialization" and result instanceof UnsafeDeserialization::Sink
or
kind = "UrlRedirect" and result instanceof UrlRedirect::Sink
) and
// the sink is not a string literal
not exists(Ast::StringLiteral str |
str = result.asExpr().getExpr() and
// ensure there is no interpolation, as that is not a literal
not str.getComponent(_) instanceof Ast::StringInterpolationComponent
)
}

View File

@@ -3,200 +3,6 @@
* that match URLs and hostname patterns.
*/
private import HostnameRegexpSpecific
/**
* Holds if the given constant is unlikely to occur in the origin part of a URL.
*/
predicate isConstantInvalidInsideOrigin(RegExpConstant term) {
// Look for any of these cases:
// - A character that can't occur in the origin
// - Two dashes in a row
// - A colon that is not part of port or scheme separator
// - A slash that is not part of scheme separator
term.getValue().regexpMatch(".*(?:[^a-zA-Z0-9.:/-]|--|:[^0-9/]|(?<![/:]|^)/).*")
}
/** Holds if `term` is a dot constant of form `\.` or `[.]`. */
predicate isDotConstant(RegExpTerm term) {
term.(RegExpCharEscape).getValue() = "."
or
exists(RegExpCharacterClass cls |
term = cls and
not cls.isInverted() and
cls.getNumChild() = 1 and
cls.getAChild().(RegExpConstant).getValue() = "."
)
}
/** Holds if `term` is a wildcard `.` or an actual `.` character. */
predicate isDotLike(RegExpTerm term) {
term instanceof RegExpDot
or
isDotConstant(term)
}
/** Holds if `term` will only ever be matched against the beginning of the input. */
predicate matchesBeginningOfString(RegExpTerm term) {
term.isRootTerm()
or
exists(RegExpTerm parent | matchesBeginningOfString(parent) |
term = parent.(RegExpSequence).getChild(0)
or
parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
term = parent.(RegExpSequence).getChild(1)
or
term = parent.(RegExpAlt).getAChild()
or
term = parent.(RegExpGroup).getAChild()
)
}
/**
* Holds if the given sequence `seq` contains top-level domain preceded by a dot, such as `.com`,
* excluding cases where this is at the very beginning of the regexp.
*
* `i` is bound to the index of the last child in the top-level domain part.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
seq.getChild(i)
.(RegExpConstant)
.getValue()
.regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
isDotLike(seq.getChild(i - 1)) and
not (i = 1 and matchesBeginningOfString(seq))
}
/**
* Holds if the given regular expression term contains top-level domain preceded by a dot,
* such as `.com`.
*/
predicate hasTopLevelDomainEnding(RegExpSequence seq) { hasTopLevelDomainEnding(seq, _) }
/**
* Holds if `term` will always match a hostname, that is, all disjunctions contain
* a hostname pattern that isn't inside a quantifier.
*/
predicate alwaysMatchesHostname(RegExpTerm term) {
hasTopLevelDomainEnding(term, _)
or
// `localhost` is considered a hostname pattern, but has no TLD
term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
or
not term instanceof RegExpAlt and
not term instanceof RegExpQuantifier and
alwaysMatchesHostname(term.getAChild())
or
alwaysMatchesHostnameAlt(term)
}
/** Holds if every child of `alt` contains a hostname pattern. */
predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
}
/**
* Holds if the first `i` children of `alt` contains a hostname pattern.
*
* This is used instead of `forall` to avoid materializing the set of alternatives
* that don't contains hostnames, which is much larger.
*/
predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
alwaysMatchesHostname(alt.getChild(0)) and i = 0
or
alwaysMatchesHostnameAlt(alt, i - 1) and
alwaysMatchesHostname(alt.getChild(i))
}
/**
* Holds if `term` occurs inside a quantifier or alternative (and thus
* can not be expected to correspond to a unique match), or as part of
* a lookaround assertion (which are rarely used for capture groups).
*/
predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
exists(RegExpParent parent | parent = term.getParent() |
parent instanceof RegExpAlt
or
parent instanceof RegExpQuantifier
or
parent instanceof RegExpSubPattern
or
isInsideChoiceOrSubPattern(parent)
)
}
/**
* Holds if `group` is likely to be used as a capture group.
*/
predicate isLikelyCaptureGroup(RegExpGroup group) {
group.isCapture() and
not isInsideChoiceOrSubPattern(group)
}
/**
* Holds if `seq` contains two consecutive dots `..` or escaped dots.
*
* At least one of these dots is not intended to be a subdomain separator,
* so we avoid flagging the pattern in this case.
*/
predicate hasConsecutiveDots(RegExpSequence seq) {
exists(int i |
isDotLike(seq.getChild(i)) and
isDotLike(seq.getChild(i + 1))
)
}
predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
seq = regexp.getAChild*() and
exists(RegExpDot unescapedDot, int i, string hostname |
hasTopLevelDomainEnding(seq, i) and
not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
not hasConsecutiveDots(unescapedDot.getParent()) and
hostname =
seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
seq.getChild(i).getRawValue()
|
if unescapedDot.getParent() instanceof RegExpQuantifier
then
// `.*\.example.com` can match `evil.com/?x=.example.com`
//
// This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
// We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
// Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
// and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
seq.getChild(0) instanceof RegExpCaret and
not seq.getAChild() instanceof RegExpDollar and
seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
msg =
"has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
+ "' which may cause '" + hostname +
"' to be matched anywhere in the URL, outside the hostname."
else
msg =
"has an unescaped '.' before '" + hostname +
"', so it might match more hosts than expected."
)
}
predicate incompleteHostnameRegExp(
RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
) {
exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
regexp = re.getRegExpTerm() and
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
)
|
message = "This " + kind + " " + msg and label = "here"
)
}
// HostnameRegexp should be used directly from the shared regex pack, and not from this file.
deprecated import codeql.ruby.security.regexp.HostnameRegexp as Dep
import Dep

View File

@@ -1,2 +0,0 @@
import codeql.ruby.Regexp
import codeql.ruby.DataFlow

View File

@@ -11,6 +11,6 @@
* external/cwe/cwe-020
*/
import HostnameRegexpShared
private import codeql.ruby.security.regexp.HostnameRegexp as HostnameRegxp
query predicate problems = incompleteHostnameRegExp/4;
query predicate problems = HostnameRegxp::incompleteHostnameRegExp/4;

View File

@@ -3,6 +3,7 @@
*/
private import IncompleteUrlSubstringSanitizationSpecific
private import codeql.regex.HostnameRegexp::Utils
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
@@ -30,9 +31,7 @@ query predicate problems(
mayHaveStringValue(substring, target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::getACommonTld() +
"(:[0-9]+)?/?")
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + getACommonTld() + "(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")

View File

@@ -1,6 +1,5 @@
import codeql.ruby.DataFlow
import codeql.ruby.StringOps
import codeql.ruby.Regexp::RegExpPatterns as RegExpPatterns
/** Holds if `node` may evaluate to `value` */
predicate mayHaveStringValue(DataFlow::Node node, string value) {

View File

@@ -11,238 +11,32 @@
* external/cwe/cwe-020
*/
import HostnameRegexpShared
import codeql.ruby.DataFlow
import codeql.ruby.regexp.RegExpTreeView
import codeql.ruby.Regexp
private import codeql.ruby.security.regexp.HostnameRegexp as HostnameRegexp
private import codeql.regex.MissingRegExpAnchor as MissingRegExpAnchor
private import codeql.ruby.regexp.RegExpTreeView::RegexTreeView as TreeImpl
/**
* Holds if `term` is a final term, that is, no term will match anything after this one.
*/
predicate isFinalRegExpTerm(RegExpTerm term) {
term.isRootTerm()
or
exists(RegExpSequence seq |
isFinalRegExpTerm(seq) and
term = seq.getLastChild()
)
or
exists(RegExpTerm parent |
isFinalRegExpTerm(parent) and
term = parent.getAChild() and
not parent instanceof RegExpSequence and
not parent instanceof RegExpQuantifier
)
}
/** Holds if `term` is one of the transitive left children of a regexp. */
predicate isLeftArmTerm(RegExpTerm term) {
term.isRootTerm()
or
exists(RegExpTerm parent |
term = parent.getChild(0) and
isLeftArmTerm(parent)
)
}
/** Holds if `term` is one of the transitive right children of a regexp. */
predicate isRightArmTerm(RegExpTerm term) {
term.isRootTerm()
or
exists(RegExpTerm parent |
term = parent.getLastChild() and
isRightArmTerm(parent)
)
}
/**
* Holds if `term` is an anchor that is not the first or last node
* in its tree.
*/
predicate isInteriorAnchor(RegExpAnchor term) {
not isLeftArmTerm(term) and
not isRightArmTerm(term)
}
/**
* Holds if `term` contains an anchor that is not the first or last node
* in its tree, such as `(foo|bar$|baz)`.
*/
predicate containsInteriorAnchor(RegExpTerm term) { isInteriorAnchor(term.getAChild*()) }
/**
* Holds if `term` starts with a word boundary or lookbehind assertion,
* indicating that it's not intended to be anchored on that side.
*/
predicate containsLeadingPseudoAnchor(RegExpSequence term) {
exists(RegExpTerm child | child = term.getChild(0) |
child instanceof RegExpWordBoundary or
child instanceof RegExpNonWordBoundary or
child instanceof RegExpLookbehind
)
}
/**
* Holds if `term` ends with a word boundary or lookahead assertion,
* indicating that it's not intended to be anchored on that side.
*/
predicate containsTrailingPseudoAnchor(RegExpSequence term) {
exists(RegExpTerm child | child = term.getLastChild() |
child instanceof RegExpWordBoundary or
child instanceof RegExpNonWordBoundary or
child instanceof RegExpLookahead
)
}
/**
* Holds if `term` is an empty sequence, usually arising from
* literals with a trailing alternative such as `foo|`.
*/
predicate isEmpty(RegExpSequence term) { term.getNumChild() = 0 }
/**
* Holds if `term` contains a letter constant.
*
* We use this as a heuristic to filter out uninteresting results.
*/
predicate containsLetters(RegExpTerm term) {
term.getAChild*().(RegExpConstant).getValue().regexpMatch(".*[a-zA-Z].*")
}
/**
* Holds if `alt` has an explicitly anchored group, such as `^(foo|bar)|baz`
* and doesn't have any unnecessary groups, such as in `^(foo)|(bar)`.
*/
predicate hasExplicitAnchorPrecedence(RegExpAlt alt) {
isAnchoredGroup(alt.getAChild()) and
not alt.getAChild() instanceof RegExpGroup
}
/**
* Holds if `term` consists only of an anchor and a parenthesized term,
* such as the left side of `^(foo|bar)|baz`.
*
* The precedence of the anchor is likely to be intentional in this case,
* as the group wouldn't be needed otherwise.
*/
predicate isAnchoredGroup(RegExpSequence term) {
term.getNumChild() = 2 and
term.getAChild() instanceof RegExpAnchor and
term.getAChild() instanceof RegExpGroup
}
/**
* Holds if `src` is a pattern for a collection of alternatives where
* only the first or last alternative is anchored, indicating a
* precedence mistake explained by `msg`.
*
* The canonical example of such a mistake is: `^a|b|c`, which is
* parsed as `(^a)|(b)|(c)`.
*/
predicate hasMisleadingAnchorPrecedence(RegExpPatternSource src, string msg) {
exists(RegExpAlt root, RegExpSequence anchoredTerm, string direction |
root = src.getRegExpTerm() and
not containsInteriorAnchor(root) and
not isEmpty(root.getAChild()) and
not hasExplicitAnchorPrecedence(root) and
containsLetters(anchoredTerm) and
(
anchoredTerm = root.getChild(0) and
anchoredTerm.getChild(0) instanceof RegExpCaret and
not containsLeadingPseudoAnchor(root.getChild([1 .. root.getNumChild() - 1])) and
containsLetters(root.getChild([1 .. root.getNumChild() - 1])) and
direction = "beginning"
or
anchoredTerm = root.getLastChild() and
anchoredTerm.getLastChild() instanceof RegExpDollar and
not containsTrailingPseudoAnchor(root.getChild([0 .. root.getNumChild() - 2])) and
containsLetters(root.getChild([0 .. root.getNumChild() - 2])) and
direction = "end"
) and
// that is not used for string replacement
not exists(DataFlow::CallNode mcn, DataFlow::Node arg, string name |
private module Impl implements
MissingRegExpAnchor::MissingRegExpAnchorSig<TreeImpl, HostnameRegexp::Impl> {
predicate isUsedAsReplace(RegExpPatternSource pattern) {
exists(DataFlow::CallNode mcn, DataFlow::Node arg, string name |
name = mcn.getMethodName() and
arg = mcn.getArgument(0)
|
(
src.getAParse().(DataFlow::LocalSourceNode).flowsTo(arg) or
src.getAParse() = arg
pattern.getAParse().(DataFlow::LocalSourceNode).flowsTo(arg) or
pattern.getAParse() = arg
) and
name = ["sub", "sub!", "gsub", "gsub!"]
) and
msg =
"Misleading operator precedence. The subexpression '" + anchoredTerm.getRawValue() +
"' is anchored at the " + direction +
", but the other parts of this regular expression are not"
)
)
}
string getEndAnchorText() { result = "\\z" }
}
/**
* Holds if `src` contains a hostname pattern that uses the `^/$` line anchors
* rather than `\A/\z` which match the start/end of the whole string.
*/
predicate isLineAnchoredHostnameRegExp(RegExpPatternSource src, string msg) {
// avoid double reporting
not (
isSemiAnchoredHostnameRegExp(src, _) or
hasMisleadingAnchorPrecedence(src, _)
) and
exists(RegExpTerm term, RegExpSequence tld, int i | term = src.getRegExpTerm() |
not isConstantInvalidInsideOrigin(term.getAChild*()) and
tld = term.getAChild*() and
hasTopLevelDomainEnding(tld, i) and
isFinalRegExpTerm(tld.getChild(i)) and // nothing is matched after the TLD
(
tld.getChild(0).(RegExpCaret).getChar() = "^" or
tld.getLastChild().(RegExpDollar).getChar() = "$"
) and
msg =
"This hostname pattern uses anchors such as '^' and '$', which match the start and end of a line, not the whole string. Use '\\A' and '\\z' instead."
)
}
/**
* Holds if `src` contains a hostname pattern that is missing a `$` anchor.
*/
predicate isSemiAnchoredHostnameRegExp(RegExpPatternSource src, string msg) {
not hasMisleadingAnchorPrecedence(src, _) and // avoid double reporting
exists(RegExpTerm term, RegExpSequence tld, int i | term = src.getRegExpTerm() |
not isConstantInvalidInsideOrigin(term.getAChild*()) and
tld = term.getAChild*() and
hasTopLevelDomainEnding(tld, i) and
isFinalRegExpTerm(tld.getChild(i)) and // nothing is matched after the TLD
tld.getChild(0) instanceof RegExpCaret and
msg =
"This hostname pattern may match any domain name, as it is missing a '\\z' or '/' at the end."
)
}
/**
* Holds if `src` is an unanchored pattern for a URL, indicating a
* mistake explained by `msg`.
*/
predicate isUnanchoredHostnameRegExp(RegExpPatternSource src, string msg) {
exists(RegExpTerm term, RegExpSequence tld | term = src.getRegExpTerm() |
alwaysMatchesHostname(term) and
tld = term.getAChild*() and
hasTopLevelDomainEnding(tld) and
not isConstantInvalidInsideOrigin(term.getAChild*()) and
not term.getAChild*() instanceof RegExpAnchor and
// that is not used for string replacement
not exists(DataFlow::CallNode mcn, DataFlow::Node arg, string name |
name = mcn.getMethodName() and
arg = mcn.getArgument(0)
|
(
src.getAParse().(DataFlow::LocalSourceNode).flowsTo(arg) or
src.getAParse() = arg
) and
name = ["sub", "sub!", "gsub", "gsub!"]
) and
msg =
"When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it."
)
}
import MissingRegExpAnchor::Make<TreeImpl, HostnameRegexp::Impl, Impl>
from DataFlow::Node nd, string msg
where

View File

@@ -15,9 +15,8 @@
* external/cwe/cwe-099
*/
import codeql.ruby.AST
import ruby
import codeql.ruby.security.PathInjectionQuery
import codeql.ruby.DataFlow
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink

View File

@@ -6,15 +6,19 @@
<p>If <code>Kernel.open</code> is given a file name that starts with a <code>|</code>
character, it will execute the remaining string as a shell command. If a
malicious user can control the file name, they can execute arbitrary code.
The same vulnerability applies to <code>IO.read</code>.
The same vulnerability applies to <code>IO.read</code>, <code>IO.write</code>,
<code>IO.binread</code>, <code>IO.binwrite</code>, <code>IO.foreach</code>,
<code>IO.readlines</code> and <code>URI.open</code>.
</p>
</overview>
<recommendation>
<p>Use <code>File.open</code> instead of <code>Kernel.open</code>, as the former
does not have this vulnerability. Similarly, use <code>File.read</code> instead
does not have this vulnerability. Similarly, use the methods from the <code>File</code>
class instead of the <code>IO</code> class e.g. <code>File.read</code> instead
of <code>IO.read</code>.</p>
<p>Instead of <code>URI.open</code> use <code>URI(..).open</code> or an HTTP Client.</p>
</recommendation>
<example>
@@ -36,6 +40,7 @@ user-supplied file path.
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Command_Injection">Command Injection</a>.
<a href="https://cheatsheetseries.owasp.org/cheatsheets/Ruby_on_Rails_Cheat_Sheet.html#command-injection">Ruby on Rails Cheat Sheet: Command Injection</a>.
</li>
<li>

View File

@@ -1,6 +1,7 @@
/**
* @name Use of `Kernel.open` or `IO.read` with user-controlled input
* @description Using `Kernel.open` or `IO.read` may allow a malicious
* @name Use of `Kernel.open`, `IO.read` or similar sinks with user-controlled input
* @description Using `Kernel.open`, `IO.read`, `IO.write`, `IO.binread`, `IO.binwrite`,
* `IO.foreach`, `IO.readlines`, or `URI.open` may allow a malicious
* user to execute arbitrary system commands.
* @kind path-problem
* @problem.severity error
@@ -32,7 +33,8 @@ class Configuration extends TaintTracking::Configuration {
override predicate isSanitizer(DataFlow::Node node) {
node instanceof StringConstCompareBarrier or
node instanceof StringConstArrayInclusionCallBarrier
node instanceof StringConstArrayInclusionCallBarrier or
node instanceof Sanitizer
}
}

View File

@@ -1,6 +1,7 @@
/**
* @name Use of `Kernel.open` or `IO.read` with a non-constant value
* @description Using `Kernel.open` or `IO.read` may allow a malicious
* @name Use of `Kernel.open` or `IO.read` or similar sinks with a non-constant value
* @description Using `Kernel.open`, `IO.read`, `IO.write`, `IO.binread`, `IO.binwrite`,
* `IO.foreach`, `IO.readlines`, or `URI.open` may allow a malicious
* user to execute arbitrary system commands.
* @kind problem
* @problem.severity warning
@@ -15,15 +16,25 @@
*/
import codeql.ruby.security.KernelOpenQuery
import codeql.ruby.ast.Literal
import codeql.ruby.AST
import codeql.ruby.ApiGraphs
from AmbiguousPathCall call
where
// there is not a constant string argument
not exists(call.getPathArgument().getConstantValue()) and
// if it's a format string, then the first argument is not a constant string
not call.getPathArgument().getALocalSource().asExpr().getExpr().(StringLiteral).getComponent(0)
instanceof StringTextComponent
not hasConstantPrefix(call.getPathArgument().getALocalSource().asExpr().getExpr()) and
not call.getPathArgument().getALocalSource() =
API::getTopLevelMember("File").getAMethodCall("join")
select call,
"Call to " + call.getName() + " with a non-constant value. Consider replacing it with " +
call.getReplacement() + "."
predicate hasConstantPrefix(Expr e) {
// if it's a format string, then the first argument is not a constant string
e.(StringlikeLiteral).getComponent(0) instanceof StringTextComponent
or
// it is not a constant string argument
exists(e.getConstantValue())
or
// not a concatenation that starts with a constant string
hasConstantPrefix(e.(AddExpr).getLeftOperand())
}

View File

@@ -0,0 +1,73 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
When a library function dynamically constructs HTML in a potentially unsafe
way, then it's important to document to clients of the library that the function
should only be used with trusted inputs.
If the function is not documented as being potentially unsafe, then a client
may inadvertently use inputs containing unsafe HTML fragments, and thereby leave
the client vulnerable to cross-site scripting attacks.
</p>
</overview>
<recommendation>
<p>
Document all library functions that can lead to cross-site scripting
attacks, and guard against unsafe inputs where dynamic HTML
construction is not intended.
</p>
</recommendation>
<example>
<p>
The following example has a library function that renders a boldface name
by creating a string containing a <code>&lt;b&gt;</code> with the name
embedded in it.
</p>
<sample src="examples/unsafe-html-construction.rb" />
<p>
This library function, however, does not escape unsafe HTML, and a client
that calls the function with user-supplied input may be vulnerable to
cross-site scripting attacks.
</p>
<p>
The library could either document that this function should not be used
with unsafe inputs, or escape the input before embedding it in the
HTML fragment.
</p>
<sample src="examples/unsafe-html-construction_safe.rb" />
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/DOM_based_XSS_Prevention_Cheat_Sheet">DOM based
XSS Prevention Cheat Sheet</a>.
</li>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet">XSS
(Cross Site Scripting) Prevention Cheat Sheet</a>.
</li>
<li>
OWASP
<a href="https://www.owasp.org/index.php/DOM_Based_XSS">DOM Based XSS</a>.
</li>
<li>
OWASP
<a href="https://www.owasp.org/index.php/Types_of_Cross-Site_Scripting">Types of Cross-Site
Scripting</a>.
</li>
<li>
Wikipedia: <a href="http://en.wikipedia.org/wiki/Cross-site_scripting">Cross-site scripting</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,22 @@
/**
* @name Unsafe HTML constructed from library input
* @description Using externally controlled strings to construct HTML might allow a malicious
* user to perform a cross-site scripting attack.
* @kind path-problem
* @problem.severity error
* @security-severity 6.1
* @precision high
* @id rb/html-constructed-from-input
* @tags security
* external/cwe/cwe-079
* external/cwe/cwe-116
*/
import codeql.ruby.security.UnsafeHtmlConstructionQuery
import DataFlow::PathGraph
from DataFlow::Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
where cfg.hasFlowPath(source, sink) and sink.getNode() = sinkNode
select sinkNode, source, sink,
"This " + sinkNode.getSinkType() + " which depends on $@ might later allow $@.", source.getNode(),
"library input", sinkNode.getXssSink(), "cross-site scripting"

View File

@@ -0,0 +1,6 @@
class UsersController < ActionController::Base
# BAD - create a user description, where the name is not escaped
def create_user_description (name)
"<b>#{name}</b>".html_safe
end
end

View File

@@ -0,0 +1,6 @@
class UsersController < ActionController::Base
# Good - create a user description, where the name is escaped
def create_user_description (name)
"<b>#{ERB::Util.html_escape(name)}</b>".html_safe
end
end

View File

@@ -0,0 +1,111 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
When a library function dynamically constructs code in a potentially unsafe way,
it's important to document to clients of the library that the function should only be
used with trusted inputs.
If the function is not documented as being potentially unsafe, then a client may
incorrectly use inputs containing unsafe code fragments, and thereby leave the
client vulnerable to code-injection attacks.
</p>
</overview>
<recommendation>
<p>
Properly document library functions that construct code from unsanitized
inputs, or avoid constructing code in the first place.
</p>
</recommendation>
<example>
<p>
The following example shows two methods implemented using <code>eval</code>: a simple
deserialization routine and a getter method.
If untrusted inputs are used with these methods,
then an attacker might be able to execute arbitrary code on the system.
</p>
<sample src="examples/UnsafeCodeConstruction.rb" />
<p>
To avoid this problem, either properly document that the function is potentially
unsafe, or use an alternative solution such as <code>JSON.parse</code> or another library
that does not allow arbitrary code to be executed.
</p>
<sample src="examples/UnsafeCodeConstructionSafe.rb" />
</example>
<example>
<p>
As another example, consider the below code which dynamically constructs
a class that has a getter method with a custom name.
</p>
<sample src="examples/UnsafeCodeConstruction2.rb" />
<p>
The example dynamically constructs a string which is then executed using <code>module_eval</code>.
This code will break if the specified name is not a valid Ruby identifier, and
if the value is controlled by an attacker, then this could lead to code-injection.
</p>
<p>
A more robust implementation, that is also immune to code-injection,
can be made by using <code>module_eval</code> with a block and using <code>define_method</code>
to define the getter method.
</p>
<sample src="examples/UnsafeCodeConstruction2Safe.rb" />
</example>
<example>
<p>
This example dynamically registers a method on another class which
forwards its arguments to a target object. This approach uses
<code>module_eval</code> and string interpolation to construct class variables
and methods.
</p>
<sample src="examples/UnsafeCodeConstruction3.rb" />
<p>
A safer approach is to use <code>class_variable_set</code> and
<code>class_variable_get</code> along with <code>define_method</code>. String
interpolation is still used to construct the class variable name, but this is
safe because <code>class_variable_set</code> is not susceptible to code-injection.
</p>
<p>
<code>send</code> is used to dynamically call the method specified by <code>name</code>.
This is a more robust alternative than the previous example, because it does not allow
arbitrary code to be executed, but it does still allow for any method to be called
on the target object.
</p>
<sample src="examples/UnsafeCodeConstruction3Safe.rb" />
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Code_Injection">Code Injection</a>.
</li>
<li>
Wikipedia: <a href="https://en.wikipedia.org/wiki/Code_injection">Code Injection</a>.
</li>
<li>
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.2/Module.html#method-i-define_method">define_method</a>.
</li>
<li>
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.2/Module.html#method-i-class_variable_set">class_variable_set</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,23 @@
/**
* @name Unsafe code constructed from library input
* @description Using externally controlled strings to construct code may allow a malicious
* user to execute arbitrary code.
* @kind path-problem
* @problem.severity warning
* @security-severity 6.1
* @precision medium
* @id rb/unsafe-code-construction
* @tags security
* external/cwe/cwe-094
* external/cwe/cwe-079
* external/cwe/cwe-116
*/
import codeql.ruby.security.UnsafeCodeConstructionQuery
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
where cfg.hasFlowPath(source, sink) and sinkNode = sink.getNode()
select sink.getNode(), source, sink,
"This " + sinkNode.getSinkType() + " which depends on $@ is later $@.", source.getNode(),
"library input", sinkNode.getCodeSink(), "interpreted as code"

View File

@@ -0,0 +1,10 @@
module MyLib
def unsafeDeserialize(value)
eval("foo = #{value}")
foo
end
def unsafeGetter(obj, path)
eval("obj.#{path}")
end
end

View File

@@ -0,0 +1,17 @@
require 'json'
module BadMakeGetter
# Makes a class with a method named `getter_name` that returns `val`
def self.define_getter_class(getter_name, val)
new_class = Class.new
new_class.module_eval <<-END
def #{getter_name}
#{JSON.dump(val)}
end
END
new_class
end
end
one = BadMakeGetter.define_getter_class(:one, "foo")
puts "One is #{one.new.one}"

View File

@@ -0,0 +1,13 @@
# Uses `define_method` instead of constructing a string
module GoodMakeGetter
def self.define_getter_class(getter_name, val)
new_class = Class.new
new_class.module_eval do
define_method(getter_name) { val }
end
new_class
end
end
two = GoodMakeGetter.define_getter_class(:two, "bar")
puts "Two is #{two.new.two}"

View File

@@ -0,0 +1,11 @@
module Invoker
def attach(klass, name, target)
klass.module_eval <<-CODE
@@#{name} = target
def #{name}(*args)
@@#{name}.#{name}(*args)
end
CODE
end
end

View File

@@ -0,0 +1,9 @@
module Invoker
def attach(klass, name, target)
var = :"@@#{name}"
klass.class_variable_set(var, target)
klass.define_method(name) do |*args|
self.class.class_variable_get(var).send(name, *args)
end
end
end

View File

@@ -0,0 +1,11 @@
require 'json'
module MyLib
def safeDeserialize(value)
JSON.parse(value)
end
def safeGetter(obj, path)
obj.dig(*path.split("."))
end
end

View File

@@ -0,0 +1,49 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Software developers often add stack traces to error messages, as a debugging
aid. Whenever that error message occurs for an end user, the developer can use
the stack trace to help identify how to fix the problem. In particular, stack
traces can tell the developer more about the sequence of events that led to a
failure, as opposed to merely the final state of the software when the error
occurred.
</p>
<p>
Unfortunately, the same information can be useful to an attacker. The sequence
of class or method names in a stack trace can reveal the structure of the
application as well as any internal components it relies on. Furthermore, the
error message at the top of a stack trace can include information such as
server-side file names and SQL code that the application relies on, allowing an
attacker to fine-tune a subsequent injection attack.
</p>
</overview>
<recommendation>
<p>
Send the user a more generic error message that reveals less information.
Either suppress the stack trace entirely, or log it only on the server.
</p>
</recommendation>
<example>
<p>
In the following example, an exception is handled in two different ways. In the
first version, labeled BAD, the exception is exposed to the remote user by
rendering it as an HTTP response. As such, the user is able to see a detailed
stack trace, which may contain sensitive information. In the second version, the
error message is logged only on the server, and a generic error message is
displayed to the user. That way, the developers can still access and use the
error log, but remote users will not see the information. </p>
<sample src="examples/StackTraceExposure.rb" />
</example>
<references>
<li>OWASP: <a href="https://owasp.org/www-community/Improper_Error_Handling">Improper Error Handling</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,23 @@
/**
* @name Information exposure through an exception
* @description Leaking information about an exception, such as messages and stack traces, to an
* external user can expose implementation details that are useful to an attacker for
* developing a subsequent exploit.
* @kind path-problem
* @problem.severity error
* @security-severity 5.4
* @precision high
* @id rb/stack-trace-exposure
* @tags security
* external/cwe/cwe-209
* external/cwe/cwe-497
*/
import codeql.ruby.DataFlow
import codeql.ruby.security.StackTraceExposureQuery
import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ can be exposed to an external user.", source.getNode(),
"Error information"

View File

@@ -0,0 +1,18 @@
class UsersController < ApplicationController
def update_bad(id)
do_computation()
rescue => e
# BAD
render body: e.backtrace, content_type: "text/plain"
end
def update_good(id)
do_computation()
rescue => e
# GOOD
logger.error e.backtrace
render body: "Computation failed", content_type: "text/plain"
end
end

View File

@@ -11,12 +11,11 @@
* external/cwe/cwe-502
*/
import codeql.ruby.AST
import DataFlow::PathGraph
import codeql.ruby.DataFlow
import ruby
import codeql.ruby.security.UnsafeDeserializationQuery
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Unsafe deserialization depends on a $@.", source.getNode(),
"user-provided value"
source.getNode().(UnsafeDeserialization::Source).describe()

View File

@@ -92,9 +92,9 @@ private predicate maybeCredentialName(string name) {
// Positional parameter
private DataFlow::Node credentialParameter() {
exists(Method m, NamedParameter p, int idx |
exists(Method m, NamedParameter p |
result.asParameter() = p and
p = m.getParameter(idx) and
p = m.getAParameter() and
maybeCredentialName(p.getName())
)
}