Merge pull request #8607 from github/nickrolfe/incomplete_sanitization

Ruby: port of `js/incomplete-sanitization`
2025-12-22 19:56:32 +01:00 · 2022-04-26 17:10:24 +01:00
parent eacfceb6ce 9b2a98326c
commit 649d7dd022
14 changed files with 774 additions and 1 deletions
--- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll
+++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll
@@ -40,9 +40,20 @@ class Node extends TNode {
  }
  /**
-   * Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
+   * Gets a local source node from which data may flow to this node in zero or
   * more local data-flow steps.
   */
  LocalSourceNode getALocalSource() { result.flowsTo(this) }
  /**
   * Gets a data flow node from which data may flow to this node in one local step.
   */
  Node getAPredecessor() { localFlowStep(result, this) }
  /**
   * Gets a data flow node to which data may flow from this node in one local step.
   */
  Node getASuccessor() { localFlowStep(this, result) }
 }
 /** A data-flow node corresponding to a call in the control-flow graph. */
--- a/ruby/ql/lib/codeql/ruby/frameworks/core/String.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/core/String.qll
@@ -5,6 +5,81 @@ private import codeql.ruby.ApiGraphs
 private import codeql.ruby.DataFlow
 private import codeql.ruby.dataflow.FlowSummary
 private import codeql.ruby.dataflow.internal.DataFlowDispatch
 private import codeql.ruby.controlflow.CfgNodes
 private import codeql.ruby.Regexp as RE
 /**
 * A call to a string substitution method, i.e. `String#sub`, `String#sub!`,
 * `String#gsub`, or `String#gsub!`.
 *
 * We heuristically include any call to a method matching the above names,
 * provided it has exactly two arguments and a receiver.
 */
 class StringSubstitutionCall extends DataFlow::CallNode {
  StringSubstitutionCall() {
    this.getMethodName() = ["sub", "sub!", "gsub", "gsub!"] and
    exists(this.getReceiver()) and
    this.getNumberOfArguments() = 2
  }
  /**
   * Holds if this is a global substitution, i.e. this is a call to `gsub` or
   * `gsub!`.
   */
  predicate isGlobal() { this.getMethodName() = ["gsub", "gsub!"] }
  /**
   * Holds if this is a destructive substitution, i.e. this is a call to `sub!`
   * or `gsub!`.
   */
  predicate isDestructive() { this.getMethodName() = ["sub!", "gsub!"] }
  /** Gets the first argument to this call. */
  DataFlow::Node getPatternArgument() { result = this.getArgument(0) }
  /** Gets the second argument to this call. */
  DataFlow::Node getReplacementArgument() { result = this.getArgument(1) }
  /**
   * Gets the regular expression passed as the first (pattern) argument in this
   * call, if any.
   */
  RE::RegExpPatternSource getPatternRegExp() {
    // TODO: using local flow means we miss regexps defined as constants outside
    // of the function scope.
    result.(DataFlow::LocalSourceNode).flowsTo(this.getPatternArgument())
  }
  /**
   * Gets the string value passed as the first (pattern) argument in this call,
   * if any.
   */
  string getPatternString() {
    result = this.getPatternArgument().asExpr().getConstantValue().getString()
  }
  /**
   * Gets the string value passed as the second (replacement) argument in this
   * call, if any.
   */
  string getReplacementString() {
    result = this.getReplacementArgument().asExpr().getConstantValue().getString()
  }
  /** Gets a string that is being replaced by this call. */
  string getAReplacedString() {
    result = this.getPatternRegExp().getRegExpTerm().getAMatchedString()
    or
    result = this.getPatternString()
  }
  /** Holds if this call to `replace` replaces `old` with `new`. */
  predicate replaces(string old, string new) {
    old = this.getAReplacedString() and
    new = this.getReplacementString()
    // TODO: handle block-variant of the call
  }
 }
 /**
 * Provides flow summaries for the `String` class.
--- a/ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
+++ b/ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
@@ -246,6 +246,23 @@ class RegExpTerm extends RegExpParent {
    )
  }
  /**
   * Gets the single string this regular-expression term matches.
   *
   * This predicate is only defined for (sequences/groups of) constant regular
   * expressions.  In particular, terms involving zero-width assertions like `^`
   * or `\b` are not considered to have a constant value.
   *
   * Note that this predicate does not take flags of the enclosing
   * regular-expression literal into account.
   */
  string getConstantValue() { none() }
  /**
   * Gets a string that is matched by this regular-expression term.
   */
  string getAMatchedString() { result = this.getConstantValue() }
  /** Gets the primary QL class for this term. */
  override string getAPrimaryQlClass() { result = "RegExpTerm" }
 }
@@ -406,6 +423,19 @@ class RegExpSequence extends RegExpTerm, TRegExpSequence {
    )
  }
  override string getConstantValue() { result = this.getConstantValue(0) }
  /**
   * Gets the single string matched by the `i`th child and all following
   * children of this sequence, if any.
   */
  private string getConstantValue(int i) {
    i = this.getNumChild() and
    result = ""
    or
    result = this.getChild(i).getConstantValue() + this.getConstantValue(i + 1)
  }
  override string getAPrimaryQlClass() { result = "RegExpSequence" }
 }
@@ -466,6 +496,11 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
    )
  }
  /** Gets an alternative of this term. */
  RegExpTerm getAlternative() { result = this.getAChild() }
  override string getAMatchedString() { result = this.getAlternative().getAMatchedString() }
  override string getAPrimaryQlClass() { result = "RegExpAlt" }
 }
@@ -611,6 +646,10 @@ class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
    )
  }
  override string getAMatchedString() {
    not this.isInverted() and result = this.getAChild().getAMatchedString()
  }
  override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
 }
@@ -719,6 +758,8 @@ class RegExpConstant extends RegExpTerm {
  override RegExpTerm getChild(int i) { none() }
  override string getConstantValue() { result = this.getValue() }
  override string getAPrimaryQlClass() { result = "RegExpConstant" }
 }
@@ -762,6 +803,10 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
    re.groupContents(start, end, result.getStart(), result.getEnd())
  }
  override string getConstantValue() { result = this.getAChild().getConstantValue() }
  override string getAMatchedString() { result = this.getAChild().getAMatchedString() }
  override string getAPrimaryQlClass() { result = "RegExpGroup" }
 }
--- a/ruby/ql/src/change-notes/2022-04-13-incomplete-sanitization.md
+++ b/ruby/ql/src/change-notes/2022-04-13-incomplete-sanitization.md
@@ -0,0 +1,4 @@
 ---
 category: newQuery
 ---
 * Added a new query, `rb/incomplete-sanitization`. The query finds string transformations that do not replace or escape all occurrences of a meta-character.
--- a/ruby/ql/src/queries/security/cwe-116/IncompleteSanitization.qhelp
+++ b/ruby/ql/src/queries/security/cwe-116/IncompleteSanitization.qhelp
@@ -0,0 +1,87 @@
 <!DOCTYPE qhelp PUBLIC
  "-//Semmle//qhelp//EN"
  "qhelp.dtd">
 <qhelp>
 <overview>
 <p>
 Sanitizing untrusted input is a common technique for preventing injection attacks such as SQL
 injection or cross-site scripting. Usually, this is done by escaping meta-characters such as quotes
 in a domain-specific way so that they are treated as normal characters.
 </p>
 <p>
 However, directly using the <code>String#sub</code> method to perform escaping is notoriously
 error-prone. Common mistakes include only replacing the first occurrence of a meta-character, or
 backslash-escaping various meta-characters but not the backslash itself.
 </p>
 <p>
 In the former case, later meta-characters are left undisturbed and can be used to subvert the
 sanitization. In the latter case, preceding a meta-character with a backslash leads to the backslash
 being escaped, but the meta-character appearing un-escaped, which again makes the sanitization
 ineffective.
 </p>
 <p>
 Even if the escaped string is not used in a security-critical context, incomplete escaping may still
 have undesirable effects, such as badly rendered or confusing output.
 </p>
 </overview>
 <recommendation>
 <p>
 Use a (well-tested) sanitization library if at all possible. These libraries are much more likely to
 handle corner cases correctly than a custom implementation.
 </p>
 <p>
 An even safer alternative is to design the application so that sanitization is not needed.
 Otherwise, make sure to use <code>String#gsub</code> rather than <code>String#sub</code>, to ensure
 that all occurrences are replaced, and remember to escape backslashes if applicable.
 </p>
 <p>
 Note, however, that this is generally <i>not</i> sufficient for replacing multi-character strings:
 the <code>String#gsub</code> method performs only one pass over the input string, and will not
 replace further instances of the string that result from earlier replacements.
 </p>
 <p>
 For example, consider the code snippet <code>s.gsub /\/\.\.\//, ""</code>, which attempts to strip
 out all occurences of <code>/../</code> from <code>s</code>. This will not work as expected: for the
 string <code>/./.././</code>, for example, it will remove the single occurrence of <code>/../</code>
 in the middle, but the remainder of the string then becomes <code>/../</code>, which is another
 instance of the substring we were trying to remove.
 </p>
 </recommendation>
 <example>
 <p>
 As an example, assume that we want to embed a user-controlled string <code>account_number</code> into
 a SQL query as part of a string literal. To avoid SQL injection, we need to ensure that the string
 does not contain un-escaped single-quote characters. The following method attempts to ensure this by
 doubling single quotes, and thereby escaping them:
 </p>
 <sample src="examples/IncompleteSanitization.rb" />
 <p>
 As written, this sanitizer is ineffective: <code>String#sub</code> will replace only the
 <i>first</i> occurrence of that string.
 </p>
 <p>
 As mentioned above, the method <code>escape_quotes</code> should be replaced with a purpose-built
 sanitizer, such as <code>ActiveRecord::Base::sanitize_sql</code> in Rails, or by using ORM methods
 that automatically sanitize parameters.
 </p>
 <p>
 If this is not an option, <code>escape_quotes</code> should be rewritten to use the
 <code>String#gsub</code> method instead:
 </p>
 <sample src="examples/IncompleteSanitizationGood.rb" />
 </example>
 <references>
 <li>OWASP Top 10: <a href="https://www.owasp.org/index.php/Top_10-2017_A1-Injection">A1 Injection</a>.</li>
 <li>Rails: <a href="https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html">ActiveRecord::Base::sanitize_sql</a>.</li>
 </references>
 </qhelp>
--- a/ruby/ql/src/queries/security/cwe-116/IncompleteSanitization.ql
+++ b/ruby/ql/src/queries/security/cwe-116/IncompleteSanitization.ql
@@ -0,0 +1,220 @@
 /**
 * @name Incomplete string escaping or encoding
 * @description A string transformer that does not replace or escape all occurrences of a
 *              meta-character may be ineffective.
 * @kind problem
 * @problem.severity warning
 * @security-severity 7.8
 * @precision high
 * @id rb/incomplete-sanitization
 * @tags correctness
 *       security
 *       external/cwe/cwe-020
 *       external/cwe/cwe-080
 *       external/cwe/cwe-116
 */
 import ruby
 import codeql.ruby.DataFlow
 import codeql.ruby.controlflow.CfgNodes
 import codeql.ruby.frameworks.core.String
 import codeql.ruby.Regexp as RE
 import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
 /** Gets a character that is commonly used as a meta-character. */
 string metachar() { result = "'\"\\&<>\n\r\t*|{}[]%$".charAt(_) }
 /** Holds if `t` is simple, that is, a union of constants. */
 predicate isSimple(RE::RegExpTerm t) {
  t instanceof RE::RegExpConstant
  or
  isSimple(t.(RE::RegExpGroup).getAChild()) // N.B. a group has only one child
  or
  isSimpleCharacterClass(t)
  or
  isSimpleAlt(t)
 }
 /** Holds if `t` is a non-inverted character class that contains no ranges. */
 predicate isSimpleCharacterClass(RE::RegExpCharacterClass t) {
  not t.isInverted() and
  forall(RE::RegExpTerm ch | ch = t.getAChild() | isSimple(ch))
 }
 /** Holds if `t` is an alternation of simple terms. */
 predicate isSimpleAlt(RE::RegExpAlt t) {
  forall(RE::RegExpTerm ch | ch = t.getAChild() | isSimple(ch))
 }
 /**
 * Holds if `sub` is of the form `x.gsub[!](pattern, replacement)`, where
 * `pattern` has a known constant value (either a string or a regexp), and
 * `replacement` prefixes the matched string with a backslash.
 */
 predicate isBackslashEscape(StringSubstitutionCall sub) {
  sub.isGlobal() and
  (exists(sub.getPatternString()) or exists(sub.getPatternRegExp().getRegExpTerm())) and
  (
    // Replacement with `\` followed by a backref such as `\&`, `\1`, etc. The
    // replacement argument to the substitution call will look like '\\\\\0',
    // '\\\\\\0', or "\\\\\\0". Those examples all have the same string value
    // (i.e. after Ruby's unescaping) of `\\\0`. Then, to account for the
    // backslash escaping in both QL's string syntax and its regexp engine, each
    // of those three backslashes becomes `\\\\` in the following:
    sub.getReplacementString().regexpMatch("\\\\\\\\\\\\(&|\\d)")
    or
    // replacement of `c` with `\c`
    exists(string c | sub.replaces(c, "\\" + c))
  )
 }
 /**
 * Holds if data flowing into `node` has no un-escaped backslashes.
 */
 predicate allBackslashesEscaped(DataFlow::Node node) {
  exists(StringSubstitutionCall sub | node = sub |
    // check whether `sub` itself escapes backslashes
    isBackslashEscape(sub) and
    (
      sub.getAReplacedString() = "\\"
      or
      // if it's a complex regexp, we conservatively assume that it probably escapes backslashes
      exists(RE::RegExpTerm root | root = sub.getPatternRegExp().getRegExpTerm() |
        not isSimple(root)
      )
    )
  )
  or
  // flow through string methods
  exists(ExprNodes::MethodCallCfgNode mc, string m, DataFlow::Node receiver |
    m =
      [
        "sub", "gsub", "slice", "[]", "strip", "lstrip", "rstrip", "chomp", "chop", "downcase",
        "upcase", "sub!", "gsub!", "slice!", "strip!", "lstrip!", "rstrip!", "chomp!", "chop!",
        "downcase!", "upcase!",
      ]
  |
    mc = node.asExpr() and
    m = mc.getExpr().getMethodName() and
    receiver.asExpr() = mc.getReceiver() and
    allBackslashesEscaped(receiver)
  )
  or
  // general data flow
  allBackslashesEscaped(node.getAPredecessor())
  or
  // general data flow from a (destructive) [g]sub!
  exists(DataFlow::PostUpdateNode post, StringSubstitutionCall sub |
    sub.isDestructive() and
    allBackslashesEscaped(sub) and
    post.getPreUpdateNode() = sub.getReceiver() and
    post.getASuccessor() = node
  )
 }
 /**
 * Holds if `sub` looks like a string substitution call that deliberately
 * removes the first occurrence of `str`.
 */
 predicate removesFirstOccurence(StringSubstitutionCall sub, string str) {
  not sub.isGlobal() and sub.replaces(str, "")
 }
 /**
 * Gets a method call where the receiver is the result of a string substitution
 * call.
 */
 DataFlow::CallNode getAMethodCall(StringSubstitutionCall call) {
  exists(DataFlow::Node receiver |
    receiver = result.getReceiver() and
    (
      // for a non-destructive string substitution, is there flow from it to the
      // receiver of another method call?
      not call.isDestructive() and call.(DataFlow::LocalSourceNode).flowsTo(receiver)
      or
      // for a destructive string substitution, is there flow from its
      // post-update receiver to the receiver of another method call?
      call.isDestructive() and
      exists(DataFlow::PostUpdateNode post | post.getPreUpdateNode() = call.getReceiver() |
        post.(DataFlow::LocalSourceNode).flowsTo(receiver)
      )
    )
  )
 }
 /**
 * Holds if `leftUnwrap` and `rightUnwrap` unwraps a string from a pair of
 * surrounding delimiters.
 */
 predicate isDelimiterUnwrapper(StringSubstitutionCall leftUnwrap, StringSubstitutionCall rightUnwrap) {
  exists(string left, string right |
    left = "[" and right = "]"
    or
    left = "{" and right = "}"
    or
    left = "(" and right = ")"
    or
    left = "\"" and right = "\""
    or
    left = "'" and right = "'"
  |
    removesFirstOccurence(leftUnwrap, left) and
    removesFirstOccurence(rightUnwrap, right) and
    rightUnwrap = getAMethodCall(leftUnwrap)
  )
 }
 /**
 * Holds if `sub` is a standalone use of a string substitution to remove a single
 * newline, dollar or percent character.
 *
 * This is often done on inputs that are known to only contain a single instance
 * of the character, such as output from a shell command that is known to end
 * with a single newline, or strings like "$1.20" or "50%".
 */
 predicate whitelistedRemoval(StringSubstitutionCall sub) {
  not sub.isGlobal() and
  exists(string s | s = "\n" or s = "%" or s = "$" |
    sub.replaces(s, "") and
    not exists(StringSubstitutionCall other |
      other = getAMethodCall(sub) or
      sub = getAMethodCall(other)
    )
  )
 }
 from StringSubstitutionCall sub, DataFlow::Node old, string msg
 where
  old = sub.getPatternArgument() and
  (
    not sub.isGlobal() and
    msg = "This replaces only the first occurrence of " + old + "." and
    // only flag if this is likely to be a sanitizer or URL encoder or decoder
    exists(string m | m = sub.getAReplacedString() |
      // sanitizer
      m = metachar()
      or
      exists(string urlEscapePattern | urlEscapePattern = "(%[0-9A-Fa-f]{2})+" |
        // URL decoder
        m.regexpMatch(urlEscapePattern)
        or
        // URL encoder
        sub.getReplacementString().regexpMatch(urlEscapePattern)
      )
      or
      // path sanitizer
      (m = ".." or m = "/.." or m = "../" or m = "/../")
    ) and
    // don't flag replace operations in a loop
    not sub.asExpr().(ExprNodes::MethodCallCfgNode).getReceiver() = sub.asExpr().getASuccessor+() and
    // don't flag unwrapper
    not isDelimiterUnwrapper(sub, _) and
    not isDelimiterUnwrapper(_, sub) and
    // don't flag replacements of certain characters with whitespace
    not whitelistedRemoval(sub)
    or
    isBackslashEscape(sub) and
    not allBackslashesEscaped(sub) and
    msg = "This does not escape backslash characters in the input."
  )
 select sub, msg
--- a/ruby/ql/src/queries/security/cwe-116/examples/IncompleteSanitization.rb
+++ b/ruby/ql/src/queries/security/cwe-116/examples/IncompleteSanitization.rb
@@ -0,0 +1,3 @@
 def escape_quotes(s)
  s.sub "'", "''"
 end
--- a/ruby/ql/src/queries/security/cwe-116/examples/IncompleteSanitizationGood.rb
+++ b/ruby/ql/src/queries/security/cwe-116/examples/IncompleteSanitizationGood.rb
@@ -0,0 +1,3 @@
 def escape_quotes(s)
  s.gsub "'", "''"
 end
--- a/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter/BadTagFilter.expected
+++ b/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter/BadTagFilter.expected
--- a/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter/BadTagFilter.qlref
+++ b/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter/BadTagFilter.qlref
--- a/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter/test.rb
+++ b/ruby/ql/test/query-tests/security/cwe-116/BadTagFilter/test.rb
--- a/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/IncompleteSanitization.expected
+++ b/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/IncompleteSanitization.expected
@@ -0,0 +1,54 @@
 | tst.rb:3:3:3:16 | call to sub | This replaces only the first occurrence of "'". |
 | tst.rb:4:3:4:16 | call to sub! | This replaces only the first occurrence of "'". |
 | tst.rb:8:3:8:16 | call to sub | This replaces only the first occurrence of /'/. |
 | tst.rb:9:3:9:16 | call to sub! | This replaces only the first occurrence of /'/. |
 | tst.rb:13:3:13:21 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:14:3:14:22 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:15:3:15:21 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:16:3:16:22 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:20:3:20:25 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:21:3:21:24 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:22:3:22:25 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:23:3:23:24 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:27:3:27:26 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:28:3:28:26 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:32:3:32:29 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:33:3:33:29 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:37:3:37:27 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:38:3:38:27 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:42:3:42:16 | call to sub | This replaces only the first occurrence of "\|". |
 | tst.rb:43:3:43:16 | call to sub! | This replaces only the first occurrence of "\|". |
 | tst.rb:47:3:47:22 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:48:3:48:21 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:49:3:49:21 | call to gsub | This does not escape backslash characters in the input. |
 | tst.rb:50:3:50:22 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:51:3:51:21 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:52:3:52:21 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:56:3:56:19 | call to sub | This replaces only the first occurrence of "/". |
 | tst.rb:57:3:57:19 | call to sub! | This replaces only the first occurrence of "/". |
 | tst.rb:61:3:61:19 | call to sub | This replaces only the first occurrence of "%25". |
 | tst.rb:62:3:62:19 | call to sub! | This replaces only the first occurrence of "%25". |
 | tst.rb:66:3:66:20 | call to sub | This replaces only the first occurrence of "'". |
 | tst.rb:67:3:67:20 | call to sub! | This replaces only the first occurrence of "'". |
 | tst.rb:71:3:71:21 | call to sub | This replaces only the first occurrence of ... + .... |
 | tst.rb:72:3:72:21 | call to sub! | This replaces only the first occurrence of ... + .... |
 | tst.rb:76:3:76:21 | call to sub | This replaces only the first occurrence of "'". |
 | tst.rb:77:3:77:21 | call to sub! | This replaces only the first occurrence of "'". |
 | tst.rb:81:3:81:26 | call to sub | This replaces only the first occurrence of ... + .... |
 | tst.rb:82:3:82:26 | call to sub! | This replaces only the first occurrence of ... + .... |
 | tst.rb:87:3:87:21 | call to sub | This replaces only the first occurrence of indirect. |
 | tst.rb:88:3:88:22 | call to sub! | This replaces only the first occurrence of indirect. |
 | tst.rb:215:3:215:16 | call to sub | This replaces only the first occurrence of "<". |
 | tst.rb:215:3:215:29 | call to sub | This replaces only the first occurrence of ">". |
 | tst.rb:217:3:217:19 | call to sub | This replaces only the first occurrence of "[". |
 | tst.rb:217:3:217:35 | call to sub | This replaces only the first occurrence of "]". |
 | tst.rb:218:3:218:19 | call to sub | This replaces only the first occurrence of "{". |
 | tst.rb:218:3:218:35 | call to sub | This replaces only the first occurrence of "}". |
 | tst.rb:223:3:223:16 | call to sub | This replaces only the first occurrence of "]". |
 | tst.rb:223:3:223:29 | call to sub | This replaces only the first occurrence of "[". |
 | tst.rb:248:3:248:17 | call to sub | This replaces only the first occurrence of "\\n". |
 | tst.rb:249:3:249:27 | call to sub | This replaces only the first occurrence of "\\n". |
 | tst.rb:258:3:258:18 | call to sub! | This replaces only the first occurrence of "\\n". |
 | tst.rb:263:3:263:18 | call to sub! | This replaces only the first occurrence of "\\n". |
 | tst.rb:268:3:268:20 | call to sub! | This replaces only the first occurrence of "/../". |
 | tst.rb:269:3:269:20 | call to sub | This replaces only the first occurrence of "/../". |
--- a/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/IncompleteSanitization.qlref
+++ b/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/IncompleteSanitization.qlref
@@ -0,0 +1 @@
 queries/security/cwe-116/IncompleteSanitization.ql
--- a/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/tst.rb
+++ b/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/tst.rb
@@ -0,0 +1,270 @@
 def bad1(s)
  s.sub  "'", "" # NOT OK
  s.sub! "'", "" # NOT OK
 end
 def bad2(s)
  s.sub  /'/, "" # NOT OK
  s.sub! /'/, "" # NOT OK
 end
 def bad3(s1, s2, s3)
  s1.gsub  /'/, "\\'"  # NOT OK
  s1.gsub  /'/, '\\\'' # NOT OK
  s2.gsub! /'/, "\\'"  # NOT OK
  s3.gsub! /'/, '\\\'' # NOT OK
 end
 def bad4(s1, s2, s3)
  s1.gsub  /'/, "\\\\\\&" # NOT OK
  s1.gsub  /'/, '\\\\\&'  # NOT OK
  s2.gsub! /'/, "\\\\\\&" # NOT OK
  s3.gsub! /'/, '\\\\\&'  # NOT OK
 end
 def bad5(s)
  s.gsub  /['"]/, '\\\\\&' # NOT OK
  s.gsub! /['"]/, '\\\\\&' # NOT OK
 end
 def bad6(s)
  s.gsub  /(['"])/, '\\\\\\1' # NOT OK
  s.gsub! /(['"])/, '\\\\\\1' # NOT OK
 end
 def bad7(s)
  s.gsub  /('|")/, '\\\\\1' # NOT OK
  s.gsub! /('|")/, '\\\\\1' # NOT OK
 end
 def bad8(s)
  s.sub  '|', '' # NOT OK
  s.sub! '|', '' # NOT OK
 end
 def bad9(s1, s2, s3, s4)
  s1.gsub  /"/, "\\\"" # NOT OK
  s1.gsub  /"/, '\\"'  # NOT OK
  s1.gsub  '"', '\\"'  # NOT OK
  s2.gsub! /"/, "\\\"" # NOT OK
  s3.gsub! /"/, '\\"'  # NOT OK
  s4.gsub! '"', '\\"'  # NOT OK
 end
 def bad10(s)
  s.sub  "/", "%2F" # NOT OK
  s.sub! "/", "%2F" # NOT OK
 end
 def bad11(s)
  s.sub  "%25", "%" # NOT OK
  s.sub! "%25", "%" # NOT OK
 end
 def bad12(s)
  s.sub  %q['], %q[] # NOT OK
  s.sub! %q['], %q[] # NOT OK
 end
 def bad13(s)
  s.sub  "'" + "", "" # NOT OK
  s.sub! "'" + "", "" # NOT OK
 end
 def bad14(s)
  s.sub  "'", "" + "" # NOT OK
  s.sub! "'", "" + "" # NOT OK
 end
 def bad15(s)
  s.sub  "'" + "", "" + "" # NOT OK
  s.sub! "'" + "", "" + "" # NOT OK
 end
 def bad16(s)
  indirect = /'/
  s.sub(indirect, "")  # NOT OK
  s.sub!(indirect, "") # NOT OK
 end
 def good1a(s)
  until s.index("'").nil?
    s = s.sub "'", "" # OK
  end
  s
 end
 def good1b(s)
  until s.index("'").nil?
    s.sub! "'", "" # OK
  end
  s
 end
 def good2a(s)
  while s.index("'") != nil
    s = s.sub /'/, "" # OK
  end
  s
 end
 def good2b(s)
  while s.index("'") != nil
    s.sub! /'/, "" # OK
  end
  s
 end
 def good3a(s)
  s.sub  "@user", "alice" # OK
 end
 def good3b(s)
  s.sub! "@user", "bob" # OK
 end
 def good4a(s)
  s.gsub  /#/, "\\d+" # OK
 end
 def good4b(s)
  s.gsub! /#/, "\\d+" # OK
 end
 def good5a(s)
  s.gsub(/\\/, "\\\\").gsub(/['"]/, '\\\\\&') # OK
 end
 def good5b(s)
  s.gsub!(/\\/, "\\\\")
  s.gsub!(/['"]/, '\\\\\&') # OK
 end
 def good6a(s)
  s.gsub(/[\\]/, '\\\\').gsub(/[\"]/, '\\"') # OK
 end
 def good6b(s)
  s.gsub!(/[\\]/, '\\\\')
  s.gsub!(/[\"]/, '\\"') # OK
 end
 def good7a(s)
  s = s.gsub /[\\]/, '\\\\'
  s.gsub /[\"]/, '\\"' # OK
 end
 def good7b(s)
  s.gsub! /[\\]/, '\\\\'
  s.gsub! /[\"]/, '\\"' # OK
 end
 def good8a(s)
  s.gsub /\W/, '\\\\\&' # OK
 end
 def good8b(s)
  s.gsub! /\W/, '\\\\\&' # OK
 end
 def good9a(s)
  s.gsub /[^\w\s]/, '\\\\\&' # OK
 end
 def good9b(s)
  s.gsub! /[^\w\s]/, '\\\\\&' # OK
 end
 def good10a(s)
  s = s.gsub '\\', '\\\\'
  s = s.slice 1..(-1)
  s = s.gsub /\\"/, '"'
  s = s.gsub /'/, "\\'" # OK
  "'" + s + "'"
 end
 def good10b(s)
  s.gsub! '\\', '\\\\'
  s.slice! 1..(-1)
  s.gsub! /\\"/, '"'
  s.gsub! /'/, "\\'" # OK
  "'" + s + "'"
 end
 def good11a(s)
  s.gsub '#', '💩' # OK
 end
 def good11b(s)
  s.gsub! '#', '💩' # OK
 end
 def good12a(s)
  s.sub "%d", "42" # OK
 end
 def good12b(s)
  s.sub! "%d", "42" # OK
 end
 def good13a(s)
  s.sub('[', '').sub(']', '') # OK
  s.sub('(', '').sub(')', '') # OK
  s.sub('{', '').sub('}', '') # OK
  s.sub('<', '').sub('>', '') # NOT OK: too common as a bad HTML sanitizer
  s.sub('[', '\\[').sub(']', '\\]') # NOT OK
  s.sub('{', '\\{').sub('}', '\\}') # NOT OK
  s = s.sub('[', '') # OK
  s = s.sub(']', '') # OK
  s.sub(/{/, '').sub(/}/, '') # OK
  s.sub(']', '').sub('[', '') # probably OK, but still flagged
 end
 def good13b(s1)
  s1.sub! '[', ''
  s1.sub! ']', '' # OK
 end
 def good14a(s)
  s.sub('"', '').sub('"', '') # OK
  s.sub("'", "").sub("'", "") # OK
 end
 def good14b(s1, s2)
  s1.sub!('"', '')
  s1.sub!('"', '') # OK
  s2.sub!("'", "")
  s2.sub!("'", "") # OK
 end
 def newlines_a(a, b, c)
  # motivation for whitelist
  `which emacs`.sub("\n", "") # OK
  a.sub("\n", "").sub(b, c) # NOT OK
  a.sub(b, c).sub("\n", "") # NOT OK
 end
 def newlines_b(a, b, c)
  # motivation for whitelist
  output = `which emacs`
  output.sub!("\n", "") # OK
  d = a.dup
  d.sub!("\n", "") # NOT OK
  d.sub!(b, c)
  e = a.dup
  d.sub!(b, c)
  d.sub!("\n", "") # NOT OK
 end
 def bad_path_sanitizer(p1, p2)
  # attempt at path sanitization
  p1.sub! "/../", "" # NOT OK
  p2.sub  "/../", "" # NOT OK
 end
		`@@ -0,0 +1 @@`
							`queries/security/cwe-116/IncompleteSanitization.ql`