add a rb/unsafe-code-construction query

rebase
2026-07-21 03:08:25 +02:00 · 2022-10-17 16:52:08 +02:00
parent 5f6cb1684b
commit f1668801d3
10 changed files with 267 additions and 0 deletions
--- a/ruby/ql/lib/codeql/ruby/security/UnsafeCodeConstructionCustomizations.qll
+++ b/ruby/ql/lib/codeql/ruby/security/UnsafeCodeConstructionCustomizations.qll
@@ -0,0 +1,94 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * code constructed from library input vulnerabilities, as
+ * well as extension points for adding your own.
+ */
+
+private import codeql.ruby.DataFlow
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.frameworks.core.Gem::Gem as Gem
+private import codeql.ruby.AST as Ast
+private import codeql.ruby.Concepts as Concepts
+
+/**
+ * Module containing sources, sinks, and sanitizers for code constructed from library input.
+ */
+module UnsafeCodeConstruction {
+  /** A source for code constructed from library input vulnerabilities. */
+  abstract class Source extends DataFlow::Node { }
+
+  /** An input parameter to a gem seen as a source. */
+  private class LibraryInputAsSource extends Source instanceof DataFlow::ParameterNode {
+    LibraryInputAsSource() { this = Gem::getALibraryInput() }
+  }
+
+  /** A sink for code constructed from library input vulnerabilities. */
+  abstract class Sink extends DataFlow::Node {
+    /**
+     * Gets the node where the unsafe code is executed.
+     */
+    abstract DataFlow::Node getCodeSink();
+
+    /**
+     * Gets the type of sink.
+     */
+    string getSinkType() { result = "code construction" }
+  }
+
+  /** Gets a node that is eventually executed as code at `codeExec`. */
+  DataFlow::Node getANodeExecutedAsCode(Concepts::CodeExecution codeExec) {
+    result = getANodeExecutedAsCode(TypeTracker::TypeBackTracker::end(), codeExec)
+  }
+
+  import codeql.ruby.typetracking.TypeTracker as TypeTracker
+
+  /** Gets a node that is eventually executed as code at `codeExec`, type-tracked with `t`. */
+  private DataFlow::LocalSourceNode getANodeExecutedAsCode(
+    TypeTracker::TypeBackTracker t, Concepts::CodeExecution codeExec
+  ) {
+    t.start() and
+    result = codeExec.getCode().getALocalSource()
+    or
+    exists(TypeTracker::TypeBackTracker t2 |
+      result = getANodeExecutedAsCode(t2, codeExec).backtrack(t2, t)
+    )
+  }
+
+  /**
+   * A string constructed from a string-literal (e.g. `"foo #{sink}"`),
+   * where the resulting string ends up being executed as a code.
+   */
+  class StringFormatAsSink extends Sink {
+    Concepts::CodeExecution s;
+    Ast::StringLiteral lit;
+
+    StringFormatAsSink() {
+      any(DataFlow::Node n | n.asExpr().getExpr() = lit) = getANodeExecutedAsCode(s) and
+      this.asExpr().getExpr() = lit.getComponent(_)
+    }
+
+    override DataFlow::Node getCodeSink() { result = s }
+
+    override string getSinkType() { result = "string interpolation" }
+  }
+
+  import codeql.ruby.security.TaintedFormatStringSpecific as TaintedFormat
+
+  /**
+   * A string constructed from a printf-style call,
+   * where the resulting string ends up being executed as a code.
+   */
+  class TaintedFormatStringAsSink extends Sink {
+    Concepts::CodeExecution s;
+    TaintedFormat::PrintfStyleCall call;
+
+    TaintedFormatStringAsSink() {
+      call = getANodeExecutedAsCode(s) and
+      this = [call.getFormatArgument(_), call.getFormatString()]
+    }
+
+    override DataFlow::Node getCodeSink() { result = s }
+
+    override string getSinkType() { result = "string format" }
+  }
+}
--- a/ruby/ql/lib/codeql/ruby/security/UnsafeCodeConstructionQuery.qll
+++ b/ruby/ql/lib/codeql/ruby/security/UnsafeCodeConstructionQuery.qll
@@ -0,0 +1,33 @@
+/**
+ * Provides a taint-tracking configuration for reasoning about code
+ * constructed from library input vulnerabilities.
+ *
+ * Note, for performance reasons: only import this file if `Configuration` is needed,
+ * otherwise `UnsafeCodeConstructionCustomizations` should be imported instead.
+ */
+
+import codeql.ruby.DataFlow
+import UnsafeCodeConstructionCustomizations::UnsafeCodeConstruction
+private import codeql.ruby.TaintTracking
+private import codeql.ruby.dataflow.BarrierGuards
+
+/**
+ * A taint-tracking configuration for detecting shell command constructed from library input vulnerabilities.
+ */
+class Configuration extends TaintTracking::Configuration {
+  Configuration() { this = "UnsafeShellCommandConstruction" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    node instanceof StringConstCompareBarrier or
+    node instanceof StringConstArrayInclusionCallBarrier
+  }
+
+  // override to require the path doesn't have unmatched return steps
+  override DataFlow::FlowFeature getAFeature() {
+    result instanceof DataFlow::FeatureHasSourceCallContext
+  }
+}
--- a/ruby/ql/src/queries/security/cwe-094/UnsafeCodeConstruction.qhelp
+++ b/ruby/ql/src/queries/security/cwe-094/UnsafeCodeConstruction.qhelp
@@ -0,0 +1,55 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+When a library function dynamically constructs code in a potentially unsafe way, then 
+it's important to document to clients of the library that the function should only be
+used with trusted inputs.
+
+If the function is not documented as being potentially unsafe, then a client may
+incorrectly use inputs containing unsafe code fragments, and thereby leave the
+client vulnerable to code-injection attacks.
+</p>
+
+</overview>
+
+<recommendation>
+<p>
+Properly document library functions that construct code from unsanitized 
+inputs, or avoid constructing code in the first place.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following example shows two methods implemented using `eval`: a simple
+deserialization routine and a getter method.
+If untrusted inputs are used with these methods,
+then an attacker might be able to execute arbitrary code on the system. 
+</p>
+
+<sample src="examples/UnsafeCodeConstruction.rb" />
+
+<p>
+To avoid this problem, either properly document that the function is potentially
+unsafe, or use an alternative solution such as `JSON.parse` or another library, like in the examples below, 
+that does not allow arbitrary code to be executed.
+</p>
+
+<sample src="examples/UnsafeCodeConstructionSafe.rb" />
+
+</example>
+
+<references>
+<li>
+OWASP:
+<a href="https://www.owasp.org/index.php/Code_Injection">Code Injection</a>.
+</li>
+<li>
+Wikipedia: <a href="https://en.wikipedia.org/wiki/Code_injection">Code Injection</a>.
+</li>
+</references>
+</qhelp>
--- a/ruby/ql/src/queries/security/cwe-094/UnsafeCodeConstruction.ql
+++ b/ruby/ql/src/queries/security/cwe-094/UnsafeCodeConstruction.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Unsafe code constructed from library input
+ * @description Using externally controlled strings to construct code may allow a malicious
+ *              user to execute arbitrary code.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 6.1
+ * @precision medium
+ * @id rb/unsafe-code-construction
+ * @tags security
+ *       external/cwe/cwe-094
+ *       external/cwe/cwe-079
+ *       external/cwe/cwe-116
+ */
+
+import codeql.ruby.security.UnsafeCodeConstructionQuery
+import DataFlow::PathGraph
+
+from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
+where cfg.hasFlowPath(source, sink) and sinkNode = sink.getNode()
+select sink.getNode(), source, sink,
+  "This " + sinkNode.getSinkType() + " which depends on $@ is later $@.", source.getNode(),
+  "library input", sinkNode.getCodeSink(), "interpreted as code"
--- a/ruby/ql/src/queries/security/cwe-094/examples/UnsafeCodeConstruction.rb
+++ b/ruby/ql/src/queries/security/cwe-094/examples/UnsafeCodeConstruction.rb
@@ -0,0 +1,10 @@
+module MyLib
+    def unsafeDeserialize(value)
+        eval("foo = #{value}")
+        foo
+    end
+
+    def unsafeGetter(obj, path)
+        eval("obj.#{path}")
+    end
+end
--- a/ruby/ql/src/queries/security/cwe-094/examples/UnsafeCodeConstructionSafe.rb
+++ b/ruby/ql/src/queries/security/cwe-094/examples/UnsafeCodeConstructionSafe.rb
@@ -0,0 +1,11 @@
+require 'json'
+
+module MyLib
+    def safeDeserialize(value)
+        JSON.parse(value)
+    end
+
+    def safeGetter(obj, path)
+        obj.dig(*path.split("."))
+    end
+end
--- a/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/UnsafeCodeConstruction.expected
+++ b/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/UnsafeCodeConstruction.expected
@@ -0,0 +1,16 @@
+edges
+| impl/unsafeCode.rb:2:12:2:17 | target :  | impl/unsafeCode.rb:3:17:3:25 | #{...} |
+| impl/unsafeCode.rb:7:12:7:12 | x :  | impl/unsafeCode.rb:8:30:8:30 | x |
+| impl/unsafeCode.rb:12:12:12:12 | x :  | impl/unsafeCode.rb:13:33:13:33 | x |
+nodes
+| impl/unsafeCode.rb:2:12:2:17 | target :  | semmle.label | target :  |
+| impl/unsafeCode.rb:3:17:3:25 | #{...} | semmle.label | #{...} |
+| impl/unsafeCode.rb:7:12:7:12 | x :  | semmle.label | x :  |
+| impl/unsafeCode.rb:8:30:8:30 | x | semmle.label | x |
+| impl/unsafeCode.rb:12:12:12:12 | x :  | semmle.label | x :  |
+| impl/unsafeCode.rb:13:33:13:33 | x | semmle.label | x |
+subpaths
+#select
+| impl/unsafeCode.rb:3:17:3:25 | #{...} | impl/unsafeCode.rb:2:12:2:17 | target :  | impl/unsafeCode.rb:3:17:3:25 | #{...} | This string interpolation which depends on $@ is later $@. | impl/unsafeCode.rb:2:12:2:17 | target | library input | impl/unsafeCode.rb:3:5:3:27 | call to eval | interpreted as code |
+| impl/unsafeCode.rb:8:30:8:30 | x | impl/unsafeCode.rb:7:12:7:12 | x :  | impl/unsafeCode.rb:8:30:8:30 | x | This string format which depends on $@ is later $@. | impl/unsafeCode.rb:7:12:7:12 | x | library input | impl/unsafeCode.rb:8:5:8:32 | call to eval | interpreted as code |
+| impl/unsafeCode.rb:13:33:13:33 | x | impl/unsafeCode.rb:12:12:12:12 | x :  | impl/unsafeCode.rb:13:33:13:33 | x | This string format which depends on $@ is later $@. | impl/unsafeCode.rb:12:12:12:12 | x | library input | impl/unsafeCode.rb:13:5:13:35 | call to eval | interpreted as code |
--- a/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/UnsafeCodeConstruction.qlref
+++ b/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/UnsafeCodeConstruction.qlref
@@ -0,0 +1 @@
+queries/security/cwe-094/UnsafeCodeConstruction.ql
--- a/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/impl/unsafeCode.rb
+++ b/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/impl/unsafeCode.rb
@@ -0,0 +1,19 @@
+class Foobar
+  def foo1(target)
+    eval("foo = #{target}") # NOT OK
+  end
+
+  # sprintf
+  def foo2(x) 
+    eval(sprintf("foo = %s", x)) # NOT OK
+  end
+
+  # String#%
+  def foo3(x)
+    eval("foo = %{foo}" % {foo: x}) # NOT OK
+  end   
+
+  def indirect_eval(x)
+    eval(x) # OK - no construction.
+  end
+end
--- a/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/unsafe-code.gemspec
+++ b/ruby/ql/test/query-tests/security/cwe-094/UnsafeCodeConstruction/unsafe-code.gemspec
@@ -0,0 +1,5 @@
+Gem::Specification.new do |s|
+    s.name = 'unsafe-code'
+    s.require_path = "impl"
+  end
+
				`@@ -0,0 +1 @@`
				`queries/security/cwe-094/UnsafeCodeConstruction.ql`