Merge pull request #12047 from erik-krogh/py-shell

Py: add unsafe-shell-command-construction
2026-02-05 09:41:10 +01:00 · 2023-03-14 07:48:38 +01:00
parent 5792b4d363 6a5d6eb5c2
commit 04f422ea5d
23 changed files with 547 additions and 30 deletions
--- a/python/ql/lib/semmle/python/Concepts.qll
+++ b/python/ql/lib/semmle/python/Concepts.qll
@@ -19,6 +19,9 @@ private import semmle.python.security.internal.EncryptionKeySizes
 * extend `SystemCommandExecution::Range` instead.
 */
 class SystemCommandExecution extends DataFlow::Node instanceof SystemCommandExecution::Range {
+  /** Holds if a shell interprets `arg`. */
+  predicate isShellInterpreted(DataFlow::Node arg) { super.isShellInterpreted(arg) }
+
  /** Gets the argument that specifies the command to be executed. */
  DataFlow::Node getCommand() { result = super.getCommand() }
 }
@@ -35,6 +38,9 @@ module SystemCommandExecution {
  abstract class Range extends DataFlow::Node {
    /** Gets the argument that specifies the command to be executed. */
    abstract DataFlow::Node getCommand();
+
+    /** Holds if a shell interprets `arg`. */
+    predicate isShellInterpreted(DataFlow::Node arg) { none() }
  }
 }

--- a/python/ql/lib/semmle/python/Frameworks.qll
+++ b/python/ql/lib/semmle/python/Frameworks.qll
@@ -51,6 +51,7 @@ private import semmle.python.frameworks.Simplejson
 private import semmle.python.frameworks.SqlAlchemy
 private import semmle.python.frameworks.Starlette
 private import semmle.python.frameworks.Stdlib
+private import semmle.python.frameworks.Setuptools
 private import semmle.python.frameworks.Toml
 private import semmle.python.frameworks.Tornado
 private import semmle.python.frameworks.Twisted
--- a/python/ql/lib/semmle/python/frameworks/Fabric.qll
+++ b/python/ql/lib/semmle/python/frameworks/Fabric.qll
@@ -43,14 +43,22 @@ private module FabricV1 {
       * - https://docs.fabfile.org/en/1.14/api/core/operations.html#fabric.operations.run
       * - https://docs.fabfile.org/en/1.14/api/core/operations.html#fabric.operations.sudo
       */
-      private class FabricApiLocalRunSudoCall extends SystemCommandExecution::Range,
-        DataFlow::CallCfgNode
-      {
+      private class FabricApiLocalRunSudoCall extends SystemCommandExecution::Range, API::CallNode {
        FabricApiLocalRunSudoCall() { this = api().getMember(["local", "run", "sudo"]).getACall() }

        override DataFlow::Node getCommand() {
          result = [this.getArg(0), this.getArgByName("command")]
        }
+
+        override predicate isShellInterpreted(DataFlow::Node arg) {
+          arg = this.getCommand() and
+          // defaults to running in a shell
+          not this.getParameter(1, "shell")
+              .getAValueReachingSink()
+              .asExpr()
+              .(ImmutableLiteral)
+              .booleanValue() = false
+        }
      }
    }
  }
@@ -163,6 +171,8 @@ private module FabricV2 {
      override DataFlow::Node getCommand() {
        result = [this.getArg(0), this.getArgByName("command")]
      }
+
+      override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
    }

    // -------------------------------------------------------------------------
@@ -246,6 +256,8 @@ private module FabricV2 {
        override DataFlow::Node getCommand() {
          result = [this.getArg(0), this.getArgByName("command")]
        }
+
+        override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
      }

      /**
--- a/python/ql/lib/semmle/python/frameworks/Invoke.qll
+++ b/python/ql/lib/semmle/python/frameworks/Invoke.qll
@@ -81,5 +81,7 @@ private module Invoke {
    override DataFlow::Node getCommand() {
      result in [this.getArg(0), this.getArgByName("command")]
    }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
  }
 }
--- a/python/ql/lib/semmle/python/frameworks/Setuptools.qll
+++ b/python/ql/lib/semmle/python/frameworks/Setuptools.qll
@@ -0,0 +1,74 @@
+/**
+ * Provides classes modeling package setup as defined by `setuptools`.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+
+/** Provides models for the use of `setuptools` in setup scripts, and the APIs exported by the library defined using `setuptools`. */
+module Setuptools {
+  /**
+   * Gets a file that sets up a package using `setuptools` (or the deprecated `distutils`).
+   */
+  private File setupFile() {
+    // all of these might not be extracted, but the support is ready for when they are
+    result.getBaseName() = ["setup.py", "setup.cfg", "pyproject.toml"]
+  }
+
+  /**
+   * Gets a file or folder that is exported by a library.
+   */
+  private Container getALibraryExportedContainer() {
+    // a child folder of the root that has a setup.py file
+    result = setupFile().getParent().(Folder).getAFolder() and
+    // where the folder has __init__.py file
+    exists(result.(Folder).getFile("__init__.py")) and
+    // and is not a test folder
+    not result.(Folder).getBaseName() = ["test", "tests", "testing"]
+    or
+    // child of a library exported container
+    result = getALibraryExportedContainer().getAChildContainer() and
+    (
+      // either any file
+      not result instanceof Folder
+      or
+      // or a folder with an __init__.py file
+      exists(result.(Folder).getFile("__init__.py"))
+    )
+  }
+
+  /**
+   * Gets an AST node that is exported by a library.
+   */
+  private AstNode getAnExportedLibraryFeature() {
+    result.(Module).getFile() = getALibraryExportedContainer()
+    or
+    result = getAnExportedLibraryFeature().(Module).getAStmt()
+    or
+    result = getAnExportedLibraryFeature().(ClassDef).getDefinedClass().getAMethod()
+    or
+    result = getAnExportedLibraryFeature().(ClassDef).getDefinedClass().getInitMethod()
+    or
+    result = getAnExportedLibraryFeature().(FunctionDef).getDefinedFunction()
+  }
+
+  /**
+   * Gets a public function (or __init__) that is exported by a library.
+   */
+  private Function getAnExportedFunction() {
+    result = getAnExportedLibraryFeature() and
+    (
+      result.isPublic()
+      or
+      result.isInitMethod()
+    )
+  }
+
+  /**
+   * Gets a parameter to a public function that is exported by a library.
+   */
+  DataFlow::ParameterNode getALibraryInput() {
+    result.getParameter() = getAnExportedFunction().getAnArg() and
+    not result.getParameter().isSelf()
+  }
+}
--- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
@@ -1060,7 +1060,11 @@ private module StdlibPrivate {
  private class OsSystemCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
    OsSystemCall() { this = os().getMember("system").getACall() }

-    override DataFlow::Node getCommand() { result = this.getArg(0) }
+    override DataFlow::Node getCommand() {
+      result in [this.getArg(0), this.getArgByName("command")]
+    }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
  }

  /**
@@ -1071,7 +1075,7 @@ private module StdlibPrivate {
   * Although deprecated since version 2.6, they still work in 2.7.
   * See https://docs.python.org/2.7/library/os.html#os.popen2
   */
-  private class OsPopenCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
+  private class OsPopenCall extends SystemCommandExecution::Range, API::CallNode {
    string name;

    OsPopenCall() {
@@ -1085,6 +1089,8 @@ private module StdlibPrivate {
      not name = "popen" and
      result = this.getArgByName("cmd")
    }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
  }

  /**
@@ -1104,6 +1110,10 @@ private module StdlibPrivate {
    override DataFlow::Node getCommand() { result = this.getArg(0) }

    override DataFlow::Node getAPathArgument() { result = this.getCommand() }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) {
+      none() // this is a safe API.
+    }
  }

  /**
@@ -1131,6 +1141,10 @@ private module StdlibPrivate {
    }

    override DataFlow::Node getAPathArgument() { result = this.getCommand() }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) {
+      none() // this is a safe API.
+    }
  }

  /**
@@ -1145,6 +1159,10 @@ private module StdlibPrivate {
    override DataFlow::Node getCommand() { result in [this.getArg(0), this.getArgByName("path")] }

    override DataFlow::Node getAPathArgument() { result = this.getCommand() }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) {
+      none() // this is a safe API.
+    }
  }

  /** An additional taint step for calls to `os.path.join` */
@@ -1170,7 +1188,7 @@ private module StdlibPrivate {
   * See https://docs.python.org/3.8/library/subprocess.html#subprocess.Popen
   * ref: https://docs.python.org/3/library/subprocess.html#legacy-shell-invocation-functions
   */
-  private class SubprocessPopenCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
+  private class SubprocessPopenCall extends SystemCommandExecution::Range, API::CallNode {
    SubprocessPopenCall() {
      exists(string name |
        name in [
@@ -1180,43 +1198,33 @@ private module StdlibPrivate {
      )
    }

-    /** Gets the ControlFlowNode for the `args` argument, if any. */
-    private DataFlow::Node get_args_arg() { result in [this.getArg(0), this.getArgByName("args")] }
+    /** Gets the API-node for the `args` argument, if any. */
+    private API::Node get_args_arg() { result = this.getParameter(0, "args") }

-    /** Gets the ControlFlowNode for the `shell` argument, if any. */
-    private DataFlow::Node get_shell_arg() {
-      result in [this.getArg(8), this.getArgByName("shell")]
-    }
+    /** Gets the API-node for the `shell` argument, if any. */
+    private API::Node get_shell_arg() { result = this.getParameter(8, "shell") }

    private boolean get_shell_arg_value() {
      not exists(this.get_shell_arg()) and
      result = false
      or
-      exists(DataFlow::Node shell_arg | shell_arg = this.get_shell_arg() |
-        result = shell_arg.asCfgNode().getNode().(ImmutableLiteral).booleanValue()
-        or
-        // TODO: Track the "shell" argument to determine possible values
-        not shell_arg.asCfgNode().getNode() instanceof ImmutableLiteral and
-        (
-          result = true
-          or
-          result = false
-        )
-      )
+      result =
+        this.get_shell_arg().getAValueReachingSink().asExpr().(ImmutableLiteral).booleanValue()
+      or
+      not this.get_shell_arg().getAValueReachingSink().asExpr() instanceof ImmutableLiteral and
+      result = false // defaults to `False`
    }

-    /** Gets the ControlFlowNode for the `executable` argument, if any. */
-    private DataFlow::Node get_executable_arg() {
-      result in [this.getArg(2), this.getArgByName("executable")]
-    }
+    /** Gets the API-node for the `executable` argument, if any. */
+    private API::Node get_executable_arg() { result = this.getParameter(2, "executable") }

    override DataFlow::Node getCommand() {
      // TODO: Track arguments ("args" and "shell")
      // TODO: Handle using `args=["sh", "-c", <user-input>]`
-      result = this.get_executable_arg()
+      result = this.get_executable_arg().asSink()
      or
      exists(DataFlow::Node arg_args, boolean shell |
-        arg_args = this.get_args_arg() and
+        arg_args = this.get_args_arg().asSink() and
        shell = this.get_shell_arg_value()
      |
        // When "executable" argument is set, and "shell" argument is `False`, the
@@ -1242,6 +1250,11 @@ private module StdlibPrivate {
        )
      )
    }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) {
+      arg = [this.get_executable_arg(), this.get_args_arg()].asSink() and
+      this.get_shell_arg_value() = true
+    }
  }

  // ---------------------------------------------------------------------------
@@ -1389,6 +1402,8 @@ private module StdlibPrivate {
    }

    override DataFlow::Node getCommand() { result in [this.getArg(0), this.getArgByName("cmd")] }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
  }

  // ---------------------------------------------------------------------------
@@ -1405,6 +1420,8 @@ private module StdlibPrivate {
    PlatformPopenCall() { this = platform().getMember("popen").getACall() }

    override DataFlow::Node getCommand() { result in [this.getArg(0), this.getArgByName("cmd")] }
+
+    override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
  }

  // ---------------------------------------------------------------------------
--- a/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionCustomizations.qll
@@ -0,0 +1,159 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * shell command constructed from library input vulnerabilities, as
+ * well as extension points for adding your own.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+private import semmle.python.dataflow.new.TaintTracking
+private import CommandInjectionCustomizations::CommandInjection as CommandInjection
+private import semmle.python.Concepts as Concepts
+
+/**
+ * Module containing sources, sinks, and sanitizers for shell command constructed from library input.
+ */
+module UnsafeShellCommandConstruction {
+  /** A source for shell command constructed from library input vulnerabilities. */
+  abstract class Source extends DataFlow::Node { }
+
+  private import semmle.python.frameworks.Setuptools
+
+  /** An input parameter to a gem seen as a source. */
+  private class LibraryInputAsSource extends Source instanceof DataFlow::ParameterNode {
+    LibraryInputAsSource() {
+      this = Setuptools::getALibraryInput() and
+      not this.getParameter().getName().matches(["cmd%", "command%", "%_command", "%_cmd"])
+    }
+  }
+
+  /** A sink for shell command constructed from library input vulnerabilities. */
+  abstract class Sink extends DataFlow::Node {
+    Sink() { not this.asExpr() instanceof StrConst } // filter out string constants, makes testing easier
+
+    /** Gets a description of how the string in this sink was constructed. */
+    abstract string describe();
+
+    /** Gets the dataflow node where the string is constructed. */
+    DataFlow::Node getStringConstruction() { result = this }
+
+    /** Gets the dataflow node that executed the string as a shell command. */
+    abstract DataFlow::Node getCommandExecution();
+  }
+
+  /** Holds if the string constructed at `source` is executed at `shellExec` */
+  predicate isUsedAsShellCommand(DataFlow::Node source, Concepts::SystemCommandExecution shellExec) {
+    source = backtrackShellExec(TypeTracker::TypeBackTracker::end(), shellExec)
+  }
+
+  import semmle.python.dataflow.new.TypeTracker as TypeTracker
+
+  private DataFlow::LocalSourceNode backtrackShellExec(
+    TypeTracker::TypeBackTracker t, Concepts::SystemCommandExecution shellExec
+  ) {
+    t.start() and
+    result = any(DataFlow::Node n | shellExec.isShellInterpreted(n)).getALocalSource()
+    or
+    exists(TypeTracker::TypeBackTracker t2 |
+      result = backtrackShellExec(t2, shellExec).backtrack(t2, t)
+    )
+  }
+
+  /**
+   * A string constructed from a string-literal (e.g. `f'foo {sink}'`),
+   * where the resulting string ends up being executed as a shell command.
+   */
+  class StringInterpolationAsSink extends Sink {
+    Concepts::SystemCommandExecution s;
+    Fstring fstring;
+
+    StringInterpolationAsSink() {
+      isUsedAsShellCommand(DataFlow::exprNode(fstring), s) and
+      this.asExpr() = fstring.getASubExpression()
+    }
+
+    override string describe() { result = "f-string" }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override DataFlow::Node getStringConstruction() { result.asExpr() = fstring }
+  }
+
+  /**
+   * A component of a string-concatenation (e.g. `"foo " + sink`),
+   * where the resulting string ends up being executed as a shell command.
+   */
+  class StringConcatAsSink extends Sink {
+    Concepts::SystemCommandExecution s;
+    BinaryExpr add;
+
+    StringConcatAsSink() {
+      add.getOp() instanceof Add and
+      isUsedAsShellCommand(any(DataFlow::Node n | n.asExpr() = add), s) and
+      this.asExpr() = add.getASubExpression()
+    }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override string describe() { result = "string concatenation" }
+
+    override DataFlow::Node getStringConstruction() { result.asExpr() = add }
+  }
+
+  /**
+   * A string constructed using a `" ".join(...)` call, where the resulting string ends up being executed as a shell command.
+   */
+  class ArrayJoin extends Sink {
+    Concepts::SystemCommandExecution s;
+    DataFlow::MethodCallNode call;
+
+    ArrayJoin() {
+      call.getMethodName() = "join" and
+      unique( | | call.getArg(_)).asExpr().(Str).getText() = " " and
+      isUsedAsShellCommand(call, s) and
+      (
+        this = call.getArg(0) and
+        not call.getArg(0).asExpr() instanceof List
+        or
+        this.asExpr() = call.getArg(0).asExpr().(List).getASubExpression()
+      )
+    }
+
+    override string describe() { result = "array" }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override DataFlow::Node getStringConstruction() { result = call }
+  }
+
+  /**
+   * A string constructed from a format call,
+   * where the resulting string ends up being executed as a shell command.
+   * Either a call to `.format(..)` or a string-interpolation with a `%` operator.
+   */
+  class TaintedFormatStringAsSink extends Sink {
+    Concepts::SystemCommandExecution s;
+    DataFlow::Node formatCall;
+
+    TaintedFormatStringAsSink() {
+      (
+        formatCall.asExpr().(BinaryExpr).getOp() instanceof Mod and
+        this.asExpr() = formatCall.asExpr().(BinaryExpr).getASubExpression()
+        or
+        formatCall.(DataFlow::MethodCallNode).getMethodName() = "format" and
+        this =
+          [
+            formatCall.(DataFlow::MethodCallNode).getArg(_),
+            formatCall.(DataFlow::MethodCallNode).getObject()
+          ]
+      ) and
+      isUsedAsShellCommand(formatCall, s)
+    }
+
+    override string describe() { result = "formatted string" }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override DataFlow::Node getStringConstruction() { result = formatCall }
+  }
+}
--- a/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll
@@ -0,0 +1,34 @@
+/**
+ * Provides a taint tracking configuration for reasoning about shell command
+ * constructed from library input vulnerabilities
+ *
+ * Note, for performance reasons: only import this file if `Configuration` is needed,
+ * otherwise `UnsafeShellCommandConstructionCustomizations` should be imported instead.
+ */
+
+import python
+import semmle.python.dataflow.new.DataFlow
+import UnsafeShellCommandConstructionCustomizations::UnsafeShellCommandConstruction
+private import semmle.python.dataflow.new.TaintTracking
+private import CommandInjectionCustomizations::CommandInjection as CommandInjection
+private import semmle.python.dataflow.new.BarrierGuards
+
+/**
+ * A taint-tracking configuration for detecting shell command constructed from library input vulnerabilities.
+ */
+class Configuration extends TaintTracking::Configuration {
+  Configuration() { this = "UnsafeShellCommandConstruction" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    node instanceof CommandInjection::Sanitizer // using all sanitizers from `rb/command-injection`
+  }
+
+  // override to require the path doesn't have unmatched return steps
+  override DataFlow::FlowFeature getAFeature() {
+    result instanceof DataFlow::FeatureHasSourceCallContext
+  }
+}
--- a/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.qhelp
+++ b/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.qhelp
@@ -0,0 +1,73 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+<overview>
+	<p>
+		Dynamically constructing a shell command with inputs from library 
+		functions may inadvertently change the meaning of the shell command.
+		
+		Clients using the exported function may use inputs containing
+		characters that the shell interprets in a special way, for instance
+		quotes and spaces.
+
+		This can result in the shell command misbehaving, or even
+		allowing a malicious user to execute arbitrary commands on the system.
+	</p>
+
+
+</overview>
+<recommendation>
+
+	<p>
+		If possible, provide the dynamic arguments to the shell as an array 
+		to APIs such as <code>subprocess.run</code> to avoid interpretation by the shell.
+	</p>
+
+	<p>
+		Alternatively, if the shell command must be constructed
+		dynamically, then add code to ensure that special characters 
+		do not alter the shell command unexpectedly.
+	</p>
+
+</recommendation>
+<example>
+
+	<p>
+		The following example shows a dynamically constructed shell
+		command that downloads a file from a remote URL.
+	</p>
+
+	<sample src="examples/unsafe-shell-command-construction.py" />
+
+	<p>
+		The shell command will, however, fail to work as intended if the
+		input contains spaces or other special characters interpreted in a 
+		special way by the shell. 
+	</p>
+
+	<p>
+		Even worse, a client might pass in user-controlled
+		data, not knowing that the input is interpreted as a shell command. 
+		This could allow a malicious user to provide the input <code>http://example.org; cat /etc/passwd</code>
+		in order to execute the command <code>cat /etc/passwd</code>.
+	</p>
+
+	<p>
+		To avoid such potentially catastrophic behaviors, provide the
+		input from library functions as an argument that does not
+		get interpreted by a shell:
+	</p>
+
+	<sample src="examples/unsafe-shell-command-construction_fixed.py" />
+
+</example>
+<references>
+
+	<li>
+		OWASP:
+		<a href="https://www.owasp.org/index.php/Command_Injection">Command Injection</a>.
+	</li>
+
+</references>
+</qhelp>
--- a/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.ql
+++ b/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Unsafe shell command constructed from library input
+ * @description Using externally controlled strings in a command line may allow a malicious
+ *              user to change the meaning of the command.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 6.3
+ * @precision medium
+ * @id py/shell-command-constructed-from-input
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-078
+ *       external/cwe/cwe-088
+ *       external/cwe/cwe-073
+ */
+
+import python
+import semmle.python.security.dataflow.UnsafeShellCommandConstructionQuery
+import DataFlow::PathGraph
+
+from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
+where
+  config.hasFlowPath(source, sink) and
+  sinkNode = sink.getNode()
+select sinkNode.getStringConstruction(), source, sink,
+  "This " + sinkNode.describe() + " which depends on $@ is later used in a $@.", source.getNode(),
+  "library input", sinkNode.getCommandExecution(), "shell command"
--- a/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction.py
+++ b/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction.py
@@ -0,0 +1,4 @@
+import os
+
+def download(path): 
+    os.system("wget " + path) # NOT OK
--- a/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction_fixed.py
+++ b/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction_fixed.py
@@ -0,0 +1,4 @@
+import subprocess
+
+def download(path): 
+    subprocess.run(["wget", path]) # OK
--- a/python/ql/src/change-notes/2023-02-03-unsafe-shell-command-construction.md
+++ b/python/ql/src/change-notes/2023-02-03-unsafe-shell-command-construction.md
@@ -0,0 +1,4 @@
+---
+category: newQuery
+---
+* Added a new query, `py/shell-command-constructed-from-input`, to detect libraries that unsafely construct shell commands from their inputs.
--- a/python/ql/test/library-tests/frameworks/stdlib/SystemCommandExecution.py
+++ b/python/ql/test/library-tests/frameworks/stdlib/SystemCommandExecution.py
@@ -140,7 +140,7 @@ subprocess.Popen(args)  # $getCommand=args
 args = "<progname>"
 use_shell = False
 exe = "executable"
-subprocess.Popen(args, shell=use_shell, executable=exe)  # $getCommand=exe SPURIOUS: getCommand=args
+subprocess.Popen(args, shell=use_shell, executable=exe)  # $getCommand=exe


 ################################################################################
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/DataflowQueryTest.expected
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/DataflowQueryTest.expected
@@ -0,0 +1,2 @@
+missingAnnotationOnSink
+failures
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/DataflowQueryTest.ql
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/DataflowQueryTest.ql
@@ -0,0 +1,3 @@
+import python
+import experimental.dataflow.TestUtil.DataflowQueryTest
+import semmle.python.security.dataflow.UnsafeShellCommandConstructionQuery
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/UnsafeShellCommandConstruction.expected
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/UnsafeShellCommandConstruction.expected
@@ -0,0 +1,40 @@
+edges
+| src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:5:25:5:28 | ControlFlowNode for name |
+| src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:8:23:8:26 | ControlFlowNode for name |
+| src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:11:25:11:38 | ControlFlowNode for Attribute() |
+| src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:14:34:14:39 | ControlFlowNode for List |
+| src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:17:32:17:35 | ControlFlowNode for name |
+| src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:20:27:20:30 | ControlFlowNode for name |
+| src/unsafe_shell_test.py:14:34:14:39 | ControlFlowNode for List | src/unsafe_shell_test.py:14:25:14:40 | ControlFlowNode for Attribute() |
+| src/unsafe_shell_test.py:26:20:26:23 | ControlFlowNode for name | src/unsafe_shell_test.py:29:30:29:33 | ControlFlowNode for name |
+| src/unsafe_shell_test.py:36:22:36:25 | ControlFlowNode for name | src/unsafe_shell_test.py:39:30:39:33 | ControlFlowNode for name |
+| src/unsafe_shell_test.py:36:22:36:25 | ControlFlowNode for name | src/unsafe_shell_test.py:44:20:44:23 | ControlFlowNode for name |
+| src/unsafe_shell_test.py:41:24:41:24 | ControlFlowNode for x | src/unsafe_shell_test.py:42:34:42:34 | ControlFlowNode for x |
+| src/unsafe_shell_test.py:44:20:44:23 | ControlFlowNode for name | src/unsafe_shell_test.py:41:24:41:24 | ControlFlowNode for x |
+nodes
+| src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:5:25:5:28 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:8:23:8:26 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:11:25:11:38 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| src/unsafe_shell_test.py:14:25:14:40 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
+| src/unsafe_shell_test.py:14:34:14:39 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
+| src/unsafe_shell_test.py:17:32:17:35 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:20:27:20:30 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:26:20:26:23 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:29:30:29:33 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:36:22:36:25 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:39:30:39:33 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+| src/unsafe_shell_test.py:41:24:41:24 | ControlFlowNode for x | semmle.label | ControlFlowNode for x |
+| src/unsafe_shell_test.py:42:34:42:34 | ControlFlowNode for x | semmle.label | ControlFlowNode for x |
+| src/unsafe_shell_test.py:44:20:44:23 | ControlFlowNode for name | semmle.label | ControlFlowNode for name |
+subpaths
+#select
+| src/unsafe_shell_test.py:5:15:5:28 | ControlFlowNode for BinaryExpr | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:5:25:5:28 | ControlFlowNode for name | This string concatenation which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:5:5:5:29 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:8:15:8:28 | ControlFlowNode for Fstring | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:8:23:8:26 | ControlFlowNode for name | This f-string which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:8:5:8:29 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:11:15:11:38 | ControlFlowNode for BinaryExpr | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:11:25:11:38 | ControlFlowNode for Attribute() | This string concatenation which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:11:5:11:39 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:14:15:14:40 | ControlFlowNode for BinaryExpr | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:14:25:14:40 | ControlFlowNode for Attribute() | This string concatenation which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:14:5:14:41 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:17:15:17:36 | ControlFlowNode for Attribute() | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:17:32:17:35 | ControlFlowNode for name | This formatted string which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:17:5:17:37 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:20:15:20:30 | ControlFlowNode for BinaryExpr | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | src/unsafe_shell_test.py:20:27:20:30 | ControlFlowNode for name | This formatted string which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:4:22:4:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:20:5:20:31 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:29:20:29:33 | ControlFlowNode for BinaryExpr | src/unsafe_shell_test.py:26:20:26:23 | ControlFlowNode for name | src/unsafe_shell_test.py:29:30:29:33 | ControlFlowNode for name | This string concatenation which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:26:20:26:23 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:29:5:29:46 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:39:20:39:33 | ControlFlowNode for BinaryExpr | src/unsafe_shell_test.py:36:22:36:25 | ControlFlowNode for name | src/unsafe_shell_test.py:39:30:39:33 | ControlFlowNode for name | This string concatenation which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:36:22:36:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:39:5:39:46 | ControlFlowNode for Attribute() | shell command |
+| src/unsafe_shell_test.py:42:24:42:34 | ControlFlowNode for BinaryExpr | src/unsafe_shell_test.py:36:22:36:25 | ControlFlowNode for name | src/unsafe_shell_test.py:42:34:42:34 | ControlFlowNode for x | This string concatenation which depends on $@ is later used in a $@. | src/unsafe_shell_test.py:36:22:36:25 | ControlFlowNode for name | library input | src/unsafe_shell_test.py:42:9:42:47 | ControlFlowNode for Attribute() | shell command |
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/UnsafeShellCommandConstruction.qlref
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/UnsafeShellCommandConstruction.qlref
@@ -0,0 +1 @@
+Security/CWE-078/UnsafeShellCommandConstruction.ql
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/options
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/options
@@ -0,0 +1 @@
+semmle-extractor-options: --lang=3 --max-import-depth=0 -r src
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/setup.py
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/setup.py
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/setup_posix.py
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/setup_posix.py
@@ -0,0 +1,4 @@
+import os 
+
+def unsafe_setup(name):
+    os.system("ping " + name) # $result=OK - this is inside a setyp script, so it's fine.
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/src/init.py
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/src/init.py
--- a/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/src/unsafe_shell_test.py
+++ b/python/ql/test/query-tests/Security/CWE-078-UnsafeShellCommandConstruction/src/unsafe_shell_test.py
@@ -0,0 +1,49 @@
+import os
+import subprocess
+
+def unsafe_shell_one(name):
+    os.system("ping " + name) # $result=BAD
+
+    # f-strings
+    os.system(f"ping {name}") # $result=BAD
+
+    # array.join
+    os.system("ping " + " ".join(name)) # $result=BAD
+
+    # array.join, with a list
+    os.system("ping " + " ".join([name])) # $result=BAD
+
+    # format, using .format
+    os.system("ping {}".format(name)) # $result=BAD
+
+    # format, using %
+    os.system("ping %s" % name) # $result=BAD
+
+    os.system(name) # OK - seems intentional.
+
+import fabric
+
+def facbric_stuff (name): 
+    fabric.api.run("ping " + name, shell=False) # OK
+
+    fabric.api.run("ping " + name, shell=True) # $result=BAD
+
+    def indirect(flag): 
+        fabric.api.run("ping " + name, shell=flag) # OK
+
+    indirect(False)
+
+def subprocess_flag (name): 
+    subprocess.run("ping " + name, shell=False) # OK - and nonsensical
+
+    subprocess.run("ping " + name, shell=True) # $result=BAD
+
+    def indirect(flag, x): 
+        subprocess.run("ping " + x, shell=flag) # $result=BAD
+
+    indirect(True, name)
+
+    subprocess.Popen("ping " + name, shell=unknownValue) # OK - shell assumed to be False
+
+def intentional(command): 
+    os.system("fish -ic " + command) # $result=OK - intentional
				`@@ -0,0 +1 @@`
				`Security/CWE-078/UnsafeShellCommandConstruction.ql`
				`@@ -0,0 +1 @@`
				`semmle-extractor-options: --lang=3 --max-import-depth=0 -r src`