Merge pull request #11824 from erik-krogh/secondMissAnchor

RB: add query detecting validators that use badly anchored regular expressions on library/remote input
This commit is contained in:
Erik Krogh Kristensen
2023-02-13 11:26:05 +01:00
committed by GitHub
11 changed files with 225 additions and 0 deletions

View File

@@ -0,0 +1,79 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about
* missing full-anchored regular expressions, as well as extension
* points for adding your own.
*/
private import ruby
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.frameworks.core.Gem::Gem as Gem
private import codeql.ruby.Regexp as Regexp
private import codeql.ruby.security.regexp.HostnameRegexp
private import codeql.ruby.Concepts
private class RegExpTerm = Regexp::RegExpTerm;
/**
* Provides default sources, sinks and sanitizers for detecting
* missing full-anchored regular expressions, as well as extension
* points for adding your own.
*/
module MissingFullAnchor {
/** A data flow source for missing full-anchored regular expressions. */
abstract class Source extends DataFlow::Node {
/** Gets a description of the source. */
string describe() { result = "user-provided value" }
}
/** A data flow sink for missing full-anchored regular expressions. */
abstract class Sink extends DataFlow::Node {
/** Gets the node where the regexp computation happens. */
abstract DataFlow::Node getCallNode();
/** Gets the regular expression term. */
abstract RegExpTerm getRegex();
}
/** A sanitizer for missing full-anchored regular expressions. */
abstract class Sanitizer extends DataFlow::Node { }
private class RemoteFlowAsSource extends Source instanceof RemoteFlowSource { }
private class LibrayInputAsSource extends Source {
LibrayInputAsSource() { this = Gem::getALibraryInput() }
override string describe() { result = "library input" }
}
private RegExpTerm getABadlyAnchoredTerm() {
exists(RegExpTerm left | left.getRootTerm() = result |
left.(Regexp::RegExpAnchor).getChar() = "^" and
isLeftArmTerm(left)
) and
exists(RegExpTerm right | right.getRootTerm() = result |
right.(Regexp::RegExpAnchor).getChar() = "$" and
isRightArmTerm(right)
)
}
private class DefaultSink extends Sink {
RegexExecution exec;
RegExpTerm term;
DefaultSink() {
exec.getString() = this and
term = Regexp::getTermForExecution(exec) and
term = getABadlyAnchoredTerm() and
// looks like a sanitizer, not just input transformation
exists(Ast::ConditionalExpr ifExpr |
[ifExpr.getCondition(), ifExpr.getCondition().(Ast::UnaryLogicalOperation).getOperand()] =
exec.asExpr().getExpr() and
ifExpr.getBranch(_).(Ast::MethodCall).getMethodName() = "raise"
)
}
override DataFlow::Node getCallNode() { result = exec }
override RegExpTerm getRegex() { result = term }
}
}

View File

@@ -0,0 +1,26 @@
/**
* Provides a taint tracking configuration for reasoning about
* missing full-anchored regular expressions.
*
* Note, for performance reasons: only import this file if
* `MissingFullAnchor::Configuration` is needed, otherwise
* `MissingFullAnchorCustomizations` should be imported instead.
*/
import ruby
import codeql.ruby.TaintTracking
import MissingFullAnchorCustomizations::MissingFullAnchor
/**
* A taint tracking configuration for reasoning about
* missing full-anchored regular expressions.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "MissingFullAnchor" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
}

View File

@@ -0,0 +1,5 @@
---
category: newQuery
---
* Added a new query, `rb/regex/badly-anchored-regexp`, to detect regular expression validators that use `^` and `$`
as anchors and therefore might match only a single line of a multi-line string.

View File

@@ -0,0 +1,45 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Regular expressions in Ruby can use anchors to match the beginning and end of a string.
However, if the <code>^</code> and <code>$</code> anchors are used,
the regular expression can match a single line of a multi-line string.
This allows bad actors to bypass your regular expression checks and inject malicious input.
</p>
</overview>
<recommendation>
<p>
Use the <code>\A</code> and <code>\z</code> anchors since these anchors will always
match the beginning and end of the string, even if the string contains newlines.
</p>
</recommendation>
<example>
<p>
The following (bad) example code uses a regular expression to check that a string contains only digits.
</p>
<sample src="examples/missing_full_anchor_bad.rb" />
<p>
The regular expression <code>/^[0-9]+$/</code> will match a single line of a multi-line string,
which may not be the intended behavior.
The following (good) example code uses the regular expression <code>\A[0-9]+\z</code> to match the entire input string.
</p>
<sample src="examples/missing_full_anchor_good.rb" />
</example>
<references>
<li>
Ruby documentation: <a href="https://ruby-doc.org/3.2.0/Regexp.html#class-Regexp-label-Anchors">Anchors</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Badly anchored regular expression
* @description Regular expressions anchored using `^` or `$` are vulnerable to bypassing.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id rb/regex/badly-anchored-regexp
* @tags correctness
* security
* external/cwe/cwe-020
*/
import codeql.ruby.security.regexp.MissingFullAnchorQuery
import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
where config.hasFlowPath(source, sink) and sink.getNode() = sinkNode
select sink, source, sink, "This value depends on $@, and is $@ against a $@.", source.getNode(),
source.getNode().(Source).describe(), sinkNode.getCallNode(), "checked", sinkNode.getRegex(),
"badly anchored regular expression"

View File

@@ -0,0 +1,5 @@
def bad(input)
raise "Bad input" unless input =~ /^[0-9]+$/
# ....
end

View File

@@ -0,0 +1,5 @@
def good(input)
raise "Bad input" unless input =~ /\A[0-9]+\z/
# ....
end

View File

@@ -0,0 +1,16 @@
edges
| impl/miss-anchor.rb:2:12:2:15 | name : | impl/miss-anchor.rb:3:39:3:42 | name |
| impl/miss-anchor.rb:6:12:6:15 | name : | impl/miss-anchor.rb:7:43:7:46 | name |
| impl/miss-anchor.rb:14:12:14:15 | name : | impl/miss-anchor.rb:15:47:15:50 | name |
nodes
| impl/miss-anchor.rb:2:12:2:15 | name : | semmle.label | name : |
| impl/miss-anchor.rb:3:39:3:42 | name | semmle.label | name |
| impl/miss-anchor.rb:6:12:6:15 | name : | semmle.label | name : |
| impl/miss-anchor.rb:7:43:7:46 | name | semmle.label | name |
| impl/miss-anchor.rb:14:12:14:15 | name : | semmle.label | name : |
| impl/miss-anchor.rb:15:47:15:50 | name | semmle.label | name |
subpaths
#select
| impl/miss-anchor.rb:3:39:3:42 | name | impl/miss-anchor.rb:2:12:2:15 | name : | impl/miss-anchor.rb:3:39:3:42 | name | This value depends on $@, and is $@ against a $@. | impl/miss-anchor.rb:2:12:2:15 | name | library input | impl/miss-anchor.rb:3:39:3:89 | ... !~ ... | checked | impl/miss-anchor.rb:3:48:3:88 | ^[A-Za-z0-9\\+\\-_]+(\\/[A-Za-z0-9\\+\\-_]+)*$ | badly anchored regular expression |
| impl/miss-anchor.rb:7:43:7:46 | name | impl/miss-anchor.rb:6:12:6:15 | name : | impl/miss-anchor.rb:7:43:7:46 | name | This value depends on $@, and is $@ against a $@. | impl/miss-anchor.rb:6:12:6:15 | name | library input | impl/miss-anchor.rb:7:43:7:93 | ... !~ ... | checked | impl/miss-anchor.rb:7:52:7:92 | ^[A-Za-z0-9\\+\\-_]+(\\/[A-Za-z0-9\\+\\-_]+)*$ | badly anchored regular expression |
| impl/miss-anchor.rb:15:47:15:50 | name | impl/miss-anchor.rb:14:12:14:15 | name : | impl/miss-anchor.rb:15:47:15:50 | name | This value depends on $@, and is $@ against a $@. | impl/miss-anchor.rb:14:12:14:15 | name | library input | impl/miss-anchor.rb:15:47:15:97 | ... !~ ... | checked | impl/miss-anchor.rb:15:56:15:96 | ^[A-Za-z0-9\\+\\-_]+(\\/[A-Za-z0-9\\+\\-_]+)*$ | badly anchored regular expression |

View File

@@ -0,0 +1 @@
queries/security/cwe-020/MissingFullAnchor.ql

View File

@@ -0,0 +1,17 @@
class Foobar
def foo1(name)
raise Blabity, 'Invalid thing' if name !~ /^[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*$/ # NOT OK
end
def foo2(name)
raise Blabity, 'Invalid thing' unless name !~ /^[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*$/ # NOT OK
end
def foo3(name)
raise Blabity, 'Invalid thing' unless name !~ /\A[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*\z/ # OK
end
def foo4(name)
raise Blabity, 'Invalid thing' unless not name !~ /^[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*$/ # NOT OK
end
end

View File

@@ -0,0 +1,5 @@
Gem::Specification.new do |s|
s.name = 'miss-anchor'
s.require_path = "impl"
end