Ruby: improve handling of [g]sub!

rb/incomplete-sanitization has a few cases where we find flow from one one string substitution call to another, e.g. a.sub(...).sub(...) But this didn't find typical chained uses of the destructive variants, e.g. a.sub!(...) a.sub!(...) We now handle those cases by tracking flow from the post-update node for the receiver of the first call.
2025-12-18 01:33:15 +01:00 · 2022-04-13 17:19:25 +01:00
parent bbb8177176
commit fdca896614
3 changed files with 40 additions and 24 deletions
--- a/ruby/ql/src/queries/security/cwe-116/IncompleteSanitization.ql
+++ b/ruby/ql/src/queries/security/cwe-116/IncompleteSanitization.ql
@@ -19,6 +19,7 @@ import codeql.ruby.DataFlow
 import codeql.ruby.controlflow.CfgNodes
 import codeql.ruby.frameworks.core.String
 import codeql.ruby.Regexp as RE
+import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate

 /** Gets a character that is commonly used as a meta-character. */
 string metachar() { result = "'\"\\&<>\n\r\t*|{}[]%$".charAt(_) }
@@ -89,7 +90,8 @@ predicate allBackslashesEscaped(DataFlow::Node node) {
    m =
      [
        "sub", "gsub", "slice", "[]", "strip", "lstrip", "rstrip", "chomp", "chop", "downcase",
-        "upcase",
+        "upcase", "sub!", "gsub!", "slice!", "strip!", "lstrip!", "rstrip!", "chomp!", "chop!",
+        "downcase!", "upcase!",
      ]
  |
    mc = node.asExpr() and
@@ -100,6 +102,14 @@ predicate allBackslashesEscaped(DataFlow::Node node) {
  or
  // general data flow
  allBackslashesEscaped(node.getAPredecessor())
+  or
+  // general data flow from a (destructive) [g]sub!
+  exists(DataFlowPrivate::PostUpdateNode post, StringSubstitutionCall sub |
+    sub.isDestructive() and
+    allBackslashesEscaped(sub) and
+    post.getPreUpdateNode() = sub.getReceiver() and
+    post.getASuccessor() = node
+  )
 }

 /**
@@ -110,11 +120,25 @@ predicate removesFirstOccurence(StringSubstitutionCall sub, string str) {
  not sub.isGlobal() and sub.replaces(str, "")
 }

-/** Gets a method call where `node` is the receiver. */
-DataFlow::Node getAMethodCall(DataFlow::LocalSourceNode node) {
+/**
+ * Gets a method call where the receiver is the result of a string subtitution
+ * call.
+ */
+DataFlow::Node getAMethodCall(StringSubstitutionCall call) {
  exists(DataFlow::Node receiver |
    receiver.asExpr() = result.asExpr().(ExprNodes::MethodCallCfgNode).getReceiver() and
-    node.flowsTo(receiver)
+    (
+      // for a non-destructive string substitution, is there flow from it to the
+      // receiver of another method call?
+      not call.isDestructive() and call.(DataFlow::LocalSourceNode).flowsTo(receiver)
+      or
+      // for a destructive string substitution, is there flow from its
+      // post-update receivver to the receiver of another method call?
+      call.isDestructive() and
+      exists(DataFlowPrivate::PostUpdateNode post | post.getPreUpdateNode() = call.getReceiver() |
+        post.(DataFlow::LocalSourceNode).flowsTo(receiver)
+      )
+    )
  )
 }

--- a/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/IncompleteSanitization.expected
+++ b/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/IncompleteSanitization.expected
@@ -38,10 +38,6 @@
 | tst.rb:82:3:82:26 | call to sub! | This replaces only the first occurrence of ... + .... |
 | tst.rb:87:3:87:21 | call to sub | This replaces only the first occurrence of indirect. |
 | tst.rb:88:3:88:22 | call to sub! | This replaces only the first occurrence of indirect. |
-| tst.rb:141:3:141:27 | call to gsub! | This does not escape backslash characters in the input. |
-| tst.rb:150:3:150:24 | call to gsub! | This does not escape backslash characters in the input. |
-| tst.rb:160:3:160:23 | call to gsub! | This does not escape backslash characters in the input. |
-| tst.rb:191:3:191:20 | call to gsub! | This does not escape backslash characters in the input. |
 | tst.rb:215:3:215:16 | call to sub | This replaces only the first occurrence of "<". |
 | tst.rb:215:3:215:29 | call to sub | This replaces only the first occurrence of ">". |
 | tst.rb:217:3:217:19 | call to sub | This replaces only the first occurrence of "[". |
@@ -50,13 +46,9 @@
 | tst.rb:218:3:218:35 | call to sub | This replaces only the first occurrence of "}". |
 | tst.rb:223:3:223:16 | call to sub | This replaces only the first occurrence of "]". |
 | tst.rb:223:3:223:29 | call to sub | This replaces only the first occurrence of "[". |
-| tst.rb:227:3:227:17 | call to sub! | This replaces only the first occurrence of "[". |
-| tst.rb:228:3:228:17 | call to sub! | This replaces only the first occurrence of "]". |
-| tst.rb:237:3:237:18 | call to sub! | This replaces only the first occurrence of """. |
-| tst.rb:238:3:238:18 | call to sub! | This replaces only the first occurrence of """. |
-| tst.rb:240:3:240:18 | call to sub! | This replaces only the first occurrence of "'". |
-| tst.rb:241:3:241:18 | call to sub! | This replaces only the first occurrence of "'". |
 | tst.rb:248:3:248:17 | call to sub | This replaces only the first occurrence of "\\n". |
 | tst.rb:249:3:249:27 | call to sub | This replaces only the first occurrence of "\\n". |
+| tst.rb:258:3:258:18 | call to sub! | This replaces only the first occurrence of "\\n". |
+| tst.rb:263:3:263:18 | call to sub! | This replaces only the first occurrence of "\\n". |
 | tst.rb:268:3:268:20 | call to sub! | This replaces only the first occurrence of "/../". |
 | tst.rb:269:3:269:20 | call to sub | This replaces only the first occurrence of "/../". |
--- a/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/tst.rb
+++ b/ruby/ql/test/query-tests/security/cwe-116/IncompleteSanitization/tst.rb
@@ -138,7 +138,7 @@ end

 def good5b(s)
  s.gsub!(/\\/, "\\\\")
-  s.gsub!(/['"]/, '\\\\\&') # OK, but still flagged
+  s.gsub!(/['"]/, '\\\\\&') # OK
 end

 def good6a(s)
@@ -147,7 +147,7 @@ end

 def good6b(s)
  s.gsub!(/[\\]/, '\\\\')
-  s.gsub!(/[\"]/, '\\"') # OK, but still flagged
+  s.gsub!(/[\"]/, '\\"') # OK
 end

 def good7a(s)
@@ -157,7 +157,7 @@ end

 def good7b(s)
  s.gsub! /[\\]/, '\\\\'
-  s.gsub! /[\"]/, '\\"' # OK, but still flagged
+  s.gsub! /[\"]/, '\\"' # OK
 end

 def good8a(s)
@@ -188,7 +188,7 @@ def good10b(s)
  s.gsub! '\\', '\\\\'
  s.slice! 1..(-1)
  s.gsub! /\\"/, '"'
-  s.gsub! /'/, "\\'" # OK, but still flagged
+  s.gsub! /'/, "\\'" # OK
  "'" + s + "'"
 end

@@ -225,7 +225,7 @@ end

 def good13b(s1)
  s1.sub! '[', ''
-  s1.sub! ']', '' # OK, but still flagged
+  s1.sub! ']', '' # OK
 end

 def good14a(s)
@@ -235,10 +235,10 @@ end

 def good14b(s1, s2)
  s1.sub!('"', '')
-  s1.sub!('"', '') # OK, but still flagged
+  s1.sub!('"', '') # OK

  s2.sub!("'", "")
-  s2.sub!("'", "") # OK, but still flagged
+  s2.sub!("'", "") # OK
 end

 def newlines_a(a, b, c)
@@ -255,12 +255,12 @@ def newlines_b(a, b, c)
  output.sub!("\n", "") # OK

  d = a.dup
-  d.sub!("\n", "")
-  d.sub!(b, c) # NOT OK, but not flagged
+  d.sub!("\n", "") # NOT OK
+  d.sub!(b, c)

  e = a.dup
  d.sub!(b, c)
-  d.sub!("\n", "") # NOT OK, but not flagged
+  d.sub!("\n", "") # NOT OK
 end

 def bad_path_sanitizer(p1, p2)