Add SystemCommandExecution concept

A SystemCommandExecution is a method call or builtin that executes a system command, either directly or via a subshell.
2026-04-26 17:25:19 +02:00 · 2021-08-25 15:26:29 +01:00
parent 1fd91ab9bd
commit a8f0bce1d1
6 changed files with 337 additions and 0 deletions
--- a/ql/lib/codeql/ruby/Concepts.qll
+++ b/ql/lib/codeql/ruby/Concepts.qll
@@ -9,6 +9,7 @@ private import codeql.ruby.CFG
 private import codeql.ruby.DataFlow
 private import codeql.ruby.Frameworks
 private import codeql.ruby.dataflow.RemoteFlowSources
+private import codeql.ruby.ApiGraphs

 /**
 * A data-flow node that executes SQL statements.
@@ -312,3 +313,36 @@ module HTTP {
    }
  }
 }
+
+/**
+ * A data flow node that executes an operating system command,
+ * for instance by spawning a new process.
+ */
+class SystemCommandExecution extends DataFlow::Node {
+  SystemCommandExecution::Range range;
+
+  SystemCommandExecution() { this = range }
+
+  /** Holds if a shell interprets `arg`. */
+  predicate isShellInterpreted(DataFlow::Node arg) { range.isShellInterpreted(arg) }
+
+  /** Gets an argument to this execution that specifies the command or an argument to it. */
+  DataFlow::Node getAnArgument() { result = range.getAnArgument() }
+}
+
+module SystemCommandExecution {
+  /**
+   * A data flow node that executes an operating system command, for instance by spawning a new
+   * process.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `SystemCommandExecution` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /** Gets an argument to this execution that specifies the command or an argument to it. */
+    abstract DataFlow::Node getAnArgument();
+
+    /** Holds if a shell interprets `arg`. */
+    predicate isShellInterpreted(DataFlow::Node arg) { none() }
+  }
+}
--- a/ql/lib/codeql/ruby/Frameworks.qll
+++ b/ql/lib/codeql/ruby/Frameworks.qll
@@ -5,3 +5,4 @@
 private import codeql.ruby.frameworks.ActionController
 private import codeql.ruby.frameworks.ActiveRecord
 private import codeql.ruby.frameworks.ActionView
+private import codeql.ruby.frameworks.StandardLibrary
--- a/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll
+++ b/ql/lib/codeql/ruby/frameworks/StandardLibrary.qll
@@ -0,0 +1,169 @@
+private import codeql.ruby.AST
+private import codeql.ruby.Concepts
+private import codeql.ruby.DataFlow
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.dataflow.internal.DataFlowDispatch
+private import codeql.ruby.dataflow.internal.DataFlowImplCommon
+
+/**
+ * A system command executed via subshell literal syntax.
+ * E.g.
+ * ```ruby
+ * `cat foo.txt`
+ * %x(cat foo.txt)
+ * %x[cat foo.txt]
+ * %x{cat foo.txt}
+ * %x/cat foo.txt/
+ * ```
+ */
+class SubshellLiteralExecution extends SystemCommandExecution::Range {
+  SubshellLiteral literal;
+
+  SubshellLiteralExecution() { this.asExpr().getExpr() = literal }
+
+  override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = literal.getComponent(_) }
+
+  override predicate isShellInterpreted(DataFlow::Node arg) { arg = getAnArgument() }
+}
+
+/**
+ * A system command executed via the `Kernel.system` method.
+ * `Kernel.system` accepts three argument forms:
+ * - A single string. If it contains no shell meta characters, keywords or builtins, it is executed directly in a subprocess.
+ *   Otherwise, it is executed in a subshell.
+ *   ```ruby
+ *   system("cat foo.txt | tail")
+ *   ```
+ * - A command and one or more arguments.
+ *   The command is executed in a subprocess.
+ *   ```ruby
+ *   system("cat", "foo.txt")
+ *   ```
+ * - An array containing the command name and argv[0], followed by zero or more arguments.
+ *   The command is executed in a subprocess.
+ *   ```ruby
+ *   system(["cat", "cat"], "foo.txt")
+ *   ```
+ * In addition, `Kernel.system` accepts an optional environment hash as the first argument and and optional options hash as the last argument.
+ * We don't yet distinguish between these arguments and the command arguments.
+ * ```ruby
+ * system({"FOO" => "BAR"}, "cat foo.txt | tail", {unsetenv_others: true})
+ * ```
+ * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-system
+ */
+class KernelSystemCall extends SystemCommandExecution::Range {
+  MethodCall methodCall;
+
+  KernelSystemCall() {
+    methodCall.getMethodName() = "system" and
+    this.asExpr().getExpr() = methodCall and
+    // `Kernel.system` can be reached via `Kernel.system` or just `system`
+    // (if there's no other method by the same name in scope).
+    (
+      this = API::getTopLevelMember("Kernel").getAMethodCall("system")
+      or
+      // we assume that if there's no obvious target for this method call, then it must refer to Kernel.system.
+      not exists(DataFlowCallable method, DataFlowCall call |
+        viableCallable(call) = method and call.getExpr() = methodCall
+      )
+    )
+  }
+
+  override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
+
+  override predicate isShellInterpreted(DataFlow::Node arg) {
+    // Kernel.system invokes a subshell if you provide a single string as argument
+    methodCall.getNumberOfArguments() = 1 and arg.asExpr().getExpr() = methodCall.getAnArgument()
+  }
+}
+
+/**
+ * A system command executed via the `Kernel.exec` method.
+ * `Kernel.exec` takes the same argument forms as `Kernel.system`. See `KernelSystemCall` for details.
+ * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-exec
+ */
+class KernelExecCall extends SystemCommandExecution::Range {
+  MethodCall methodCall;
+
+  KernelExecCall() {
+    methodCall.getMethodName() = "exec" and
+    this.asExpr().getExpr() = methodCall and
+    // `Kernel.exec` can be reached via `Kernel.exec`, `Process.exec` or just `exec`
+    // (if there's no other method by the same name in scope).
+    (
+      this = API::getTopLevelMember("Kernel").getAMethodCall("exec")
+      or
+      this = API::getTopLevelMember("Process").getAMethodCall("exec")
+      or
+      // we assume that if there's no obvious target for this method call, then it must refer to Kernel.exec.
+      not exists(DataFlowCallable method, DataFlowCall call |
+        viableCallable(call) = method and call.getExpr() = methodCall
+      )
+    )
+  }
+
+  override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
+
+  override predicate isShellInterpreted(DataFlow::Node arg) {
+    // Kernel.exec invokes a subshell if you provide a single string as argument
+    methodCall.getNumberOfArguments() = 1 and arg.asExpr().getExpr() = methodCall.getAnArgument()
+  }
+}
+
+/**
+ * A system command executed via the `Kernel.spawn` method.
+ * `Kernel.spawn` takes the same argument forms as `Kernel.system`. See `KernelSystemCall` for details.
+ * Ruby documentation: https://docs.ruby-lang.org/en/3.0.0/Kernel.html#method-i-spawn
+ * TODO: document and handle the env and option arguments.
+ * ```
+ * spawn([env,] command... [,options]) → pid
+ * ```
+ */
+class KernelSpawnCall extends SystemCommandExecution::Range {
+  MethodCall methodCall;
+
+  KernelSpawnCall() {
+    methodCall.getMethodName() = "spawn" and
+    this.asExpr().getExpr() = methodCall and
+    // `Kernel.spawn` can be reached via `Kernel.spawn`, `Process.spawn` or just `spawn`
+    // (if there's no other method by the same name in scope).
+    (
+      this = API::getTopLevelMember("Kernel").getAMethodCall("spawn")
+      or
+      this = API::getTopLevelMember("Process").getAMethodCall("spawn")
+      or
+      not exists(DataFlowCallable method, DataFlowCall call |
+        viableCallable(call) = method and call.getExpr() = methodCall
+      )
+    )
+  }
+
+  override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
+
+  override predicate isShellInterpreted(DataFlow::Node arg) {
+    // Kernel.spawn invokes a subshell if you provide a single string as argument
+    methodCall.getNumberOfArguments() = 1 and arg.asExpr().getExpr() = methodCall.getAnArgument()
+  }
+}
+
+class Open3Call extends SystemCommandExecution::Range {
+  MethodCall methodCall;
+
+  Open3Call() {
+    this.asExpr().getExpr() = methodCall and
+    exists(string methodName |
+      methodName in [
+          "popen3", "popen2", "popen2e", "capture3", "capture2", "capture2e", "pipeline_rw",
+          "pipeline_r", "pipeline_w", "pipeline_start", "pipeline"
+        ] and
+      this = API::getTopLevelMember("Open3").getAMethodCall(methodName)
+    )
+  }
+
+  override DataFlow::Node getAnArgument() { result.asExpr().getExpr() = methodCall.getAnArgument() }
+
+  override predicate isShellInterpreted(DataFlow::Node arg) {
+    // These Open3 methods invoke a subshell if you provide a single string as argument
+    methodCall.getNumberOfArguments() = 1 and arg.asExpr().getExpr() = methodCall.getAnArgument()
+  }
+}
--- a/ql/test/library-tests/frameworks/CommandExecution.rb
+++ b/ql/test/library-tests/frameworks/CommandExecution.rb
@@ -0,0 +1,65 @@
+`echo foo`
+%x(echo foo)
+%x{echo foo}
+%x[echo foo]
+%x/echo foo/
+
+system("echo foo")
+system("echo", "foo")
+system(["echo", "echo"], "foo")
+
+system({"FOO" => "BAR"}, "echo foo")
+system({"FOO" => "BAR"}, "echo", "foo")
+system({"FOO" => "BAR"}, ["echo", "echo"], "foo")
+
+system("echo foo", unsetenv_others: true)
+system("echo", "foo", unsetenv_others: true)
+system(["echo", "echo"], "foo", unsetenv_others: true)
+
+system({"FOO" => "BAR"}, "echo foo", unsetenv_others: true)
+system({"FOO" => "BAR"}, "echo", "foo", unsetenv_others: true)
+system({"FOO" => "BAR"}, ["echo", "echo"], "foo", unsetenv_others: true)
+
+exec("echo foo")
+exec("echo", "foo")
+exec(["echo", "echo"], "foo")
+
+exec({"FOO" => "BAR"}, "echo foo")
+exec({"FOO" => "BAR"}, "echo", "foo")
+exec({"FOO" => "BAR"}, ["echo", "echo"], "foo")
+
+exec("echo foo", unsetenv_others: true)
+exec("echo", "foo", unsetenv_others: true)
+exec(["echo", "echo"], "foo", unsetenv_others: true)
+
+exec({"FOO" => "BAR"}, "echo foo", unsetenv_others: true)
+exec({"FOO" => "BAR"}, "echo", "foo", unsetenv_others: true)
+exec({"FOO" => "BAR"}, ["echo", "echo"], "foo", unsetenv_others: true)
+
+spawn("echo foo")
+spawn("echo", "foo")
+spawn(["echo", "echo"], "foo")
+
+spawn({"FOO" => "BAR"}, "echo foo")
+spawn({"FOO" => "BAR"}, "echo", "foo")
+spawn({"FOO" => "BAR"}, ["echo", "echo"], "foo")
+
+spawn("echo foo", unsetenv_others: true)
+spawn("echo", "foo", unsetenv_others: true)
+spawn(["echo", "echo"], "foo", unsetenv_others: true)
+
+spawn({"FOO" => "BAR"}, "echo foo", unsetenv_others: true)
+spawn({"FOO" => "BAR"}, "echo", "foo", unsetenv_others: true)
+spawn({"FOO" => "BAR"}, ["echo", "echo"], "foo", unsetenv_others: true)
+
+Open3.popen3("echo foo")
+Open3.popen2("echo foo")
+Open3.popen2e("echo foo")
+Open3.capture3("echo foo")
+Open3.capture2("echo foo")
+Open3.capture2e("echo foo")
+Open3.pipeline_rw("echo foo")
+Open3.pipeline_r("echo foo")
+Open3.pipeline_w("echo foo")
+Open3.pipeline_start("echo foo")
+Open3.pipeline("echo foo")
--- a/ql/test/library-tests/frameworks/StandardLibrary.expected
+++ b/ql/test/library-tests/frameworks/StandardLibrary.expected
@@ -0,0 +1,57 @@
+subshellLiteralExecutions
+| CommandExecution.rb:1:1:1:10 | `echo foo` |
+| CommandExecution.rb:2:1:2:12 | `echo foo` |
+| CommandExecution.rb:3:1:3:12 | `echo foo` |
+| CommandExecution.rb:4:1:4:12 | `echo foo` |
+| CommandExecution.rb:5:1:5:12 | `echo foo` |
+kernelSystemCallExecutions
+| CommandExecution.rb:7:1:7:18 | call to system |
+| CommandExecution.rb:8:1:8:21 | call to system |
+| CommandExecution.rb:9:1:9:31 | call to system |
+| CommandExecution.rb:11:1:11:36 | call to system |
+| CommandExecution.rb:12:1:12:39 | call to system |
+| CommandExecution.rb:13:1:13:49 | call to system |
+| CommandExecution.rb:15:1:15:41 | call to system |
+| CommandExecution.rb:16:1:16:44 | call to system |
+| CommandExecution.rb:17:1:17:54 | call to system |
+| CommandExecution.rb:19:1:19:59 | call to system |
+| CommandExecution.rb:20:1:20:62 | call to system |
+| CommandExecution.rb:21:1:21:72 | call to system |
+kernelExecCallExecutions
+| CommandExecution.rb:23:1:23:16 | call to exec |
+| CommandExecution.rb:24:1:24:19 | call to exec |
+| CommandExecution.rb:25:1:25:29 | call to exec |
+| CommandExecution.rb:27:1:27:34 | call to exec |
+| CommandExecution.rb:28:1:28:37 | call to exec |
+| CommandExecution.rb:29:1:29:47 | call to exec |
+| CommandExecution.rb:31:1:31:39 | call to exec |
+| CommandExecution.rb:32:1:32:42 | call to exec |
+| CommandExecution.rb:33:1:33:52 | call to exec |
+| CommandExecution.rb:35:1:35:57 | call to exec |
+| CommandExecution.rb:36:1:36:60 | call to exec |
+| CommandExecution.rb:37:1:37:70 | call to exec |
+kernelSpawnCallExecutions
+| CommandExecution.rb:39:1:39:17 | call to spawn |
+| CommandExecution.rb:40:1:40:20 | call to spawn |
+| CommandExecution.rb:41:1:41:30 | call to spawn |
+| CommandExecution.rb:43:1:43:35 | call to spawn |
+| CommandExecution.rb:44:1:44:38 | call to spawn |
+| CommandExecution.rb:45:1:45:48 | call to spawn |
+| CommandExecution.rb:47:1:47:40 | call to spawn |
+| CommandExecution.rb:48:1:48:43 | call to spawn |
+| CommandExecution.rb:49:1:49:53 | call to spawn |
+| CommandExecution.rb:51:1:51:58 | call to spawn |
+| CommandExecution.rb:52:1:52:61 | call to spawn |
+| CommandExecution.rb:53:1:53:71 | call to spawn |
+open3CallExecutions
+| CommandExecution.rb:55:1:55:24 | call to popen3 |
+| CommandExecution.rb:56:1:56:24 | call to popen2 |
+| CommandExecution.rb:57:1:57:25 | call to popen2e |
+| CommandExecution.rb:58:1:58:26 | call to capture3 |
+| CommandExecution.rb:59:1:59:26 | call to capture2 |
+| CommandExecution.rb:60:1:60:27 | call to capture2e |
+| CommandExecution.rb:61:1:61:29 | call to pipeline_rw |
+| CommandExecution.rb:62:1:62:28 | call to pipeline_r |
+| CommandExecution.rb:63:1:63:28 | call to pipeline_w |
+| CommandExecution.rb:64:1:64:32 | call to pipeline_start |
+| CommandExecution.rb:65:1:65:26 | call to pipeline |
--- a/ql/test/library-tests/frameworks/StandardLibrary.ql
+++ b/ql/test/library-tests/frameworks/StandardLibrary.ql
@@ -0,0 +1,11 @@
+import codeql.ruby.frameworks.StandardLibrary
+
+query predicate subshellLiteralExecutions(SubshellLiteralExecution e) { any() }
+
+query predicate kernelSystemCallExecutions(KernelSystemCall c) { any() }
+
+query predicate kernelExecCallExecutions(KernelExecCall c) { any() }
+
+query predicate kernelSpawnCallExecutions(KernelSpawnCall c) { any() }
+
+query predicate open3CallExecutions(Open3Call c) { any() }