add py/shell-command-constructed-from-input, but without a source.

It's a very direct port from Ruby, with only minor adjustments to fit the Python APIs
2026-04-29 18:55:14 +02:00 · 2023-01-31 14:51:38 +01:00
parent 187cfd7be7
commit 7fcc548665
6 changed files with 301 additions and 0 deletions
--- a/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionCustomizations.qll
@@ -0,0 +1,158 @@
+/**
+ * Provides default sources, sinks and sanitizers for reasoning about
+ * shell command constructed from library input vulnerabilities, as
+ * well as extension points for adding your own.
+ */
+
+private import python
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import CommandInjectionCustomizations::CommandInjection as CommandInjection
+private import semmle.python.Concepts as Concepts
+
+/**
+ * Module containing sources, sinks, and sanitizers for shell command constructed from library input.
+ */
+module UnsafeShellCommandConstruction {
+  /** A source for shell command constructed from library input vulnerabilities. */
+  abstract class Source extends DataFlow::Node { }
+
+  /** An input parameter to a gem seen as a source. */
+  private class LibraryInputAsSource extends Source instanceof DataFlow::ParameterNode {
+    LibraryInputAsSource() {
+      none() // TODO: Do something here, put it in a shared library.
+    }
+  }
+
+  /** A sink for shell command constructed from library input vulnerabilities. */
+  abstract class Sink extends DataFlow::Node {
+    /** Gets a description of how the string in this sink was constructed. */
+    abstract string describe();
+
+    /** Gets the dataflow node where the string is constructed. */
+    DataFlow::Node getStringConstruction() { result = this }
+
+    /** Gets the dataflow node that executed the string as a shell command. */
+    abstract DataFlow::Node getCommandExecution();
+  }
+
+  /** Holds if the string constructed at `source` is executed at `shellExec` */
+  predicate isUsedAsShellCommand(DataFlow::Node source, Concepts::SystemCommandExecution shellExec) {
+    source = backtrackShellExec(TypeTracker::TypeBackTracker::end(), shellExec)
+  }
+
+  import semmle.python.dataflow.new.TypeTracker as TypeTracker
+
+  private DataFlow::LocalSourceNode backtrackShellExec(
+    TypeTracker::TypeBackTracker t, Concepts::SystemCommandExecution shellExec
+  ) {
+    t.start() and
+    result = any(DataFlow::Node n | shellExec.isShellInterpreted(n)).getALocalSource()
+    or
+    exists(TypeTracker::TypeBackTracker t2 |
+      result = backtrackShellExec(t2, shellExec).backtrack(t2, t)
+    )
+  }
+
+  /**
+   * A string constructed from a string-literal (e.g. `f'foo {sink}'`),
+   * where the resulting string ends up being executed as a shell command.
+   */
+  class StringInterpolationAsSink extends Sink {
+    // TODO: Add test.
+    Concepts::SystemCommandExecution s;
+    Fstring fstring;
+
+    StringInterpolationAsSink() {
+      isUsedAsShellCommand(any(DataFlow::Node n | n.asExpr() = fstring), s) and
+      this.asExpr() = fstring.getASubExpression()
+    }
+
+    override string describe() { result = "string construction" }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override DataFlow::Node getStringConstruction() { result.asExpr() = fstring }
+  }
+
+  /**
+   * A component of a string-concatenation (e.g. `"foo " + sink`),
+   * where the resulting string ends up being executed as a shell command.
+   */
+  class StringConcatAsSink extends Sink {
+    // TODO: Add test.
+    Concepts::SystemCommandExecution s;
+    BinaryExpr add;
+
+    StringConcatAsSink() {
+      add.getOp() instanceof Add and
+      isUsedAsShellCommand(any(DataFlow::Node n | n.asExpr() = add), s) and
+      this.asExpr() = add.getASubExpression()
+    }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override string describe() { result = "string concatenation" }
+
+    override DataFlow::Node getStringConstruction() { result.asExpr() = add }
+  }
+
+  /**
+   * A string constructed using a `.join(" ")` call, where the resulting string ends up being executed as a shell command.
+   */
+  class ArrayJoin extends Sink {
+    // TODO: Add test.
+    Concepts::SystemCommandExecution s;
+    DataFlow::MethodCallNode call;
+
+    ArrayJoin() {
+      call.getMethodName() = "join" and
+      unique( | | call.getArg(_)).asExpr().(Str).getText() = " " and
+      isUsedAsShellCommand(call, s) and
+      (
+        this = call.getObject() and
+        not call.getObject().asExpr() instanceof List
+        or
+        this.asExpr() = call.getObject().asExpr().(List).getASubExpression()
+      )
+    }
+
+    override string describe() { result = "array" }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override DataFlow::Node getStringConstruction() { result = call }
+  }
+
+  /**
+   * A string constructed from a format call,
+   * where the resulting string ends up being executed as a shell command.
+   * Either a call to `.format(..)` or a string-interpolation with a `%` operator.
+   */
+  class TaintedFormatStringAsSink extends Sink {
+    // TODO: Test
+    Concepts::SystemCommandExecution s;
+    DataFlow::Node formatCall;
+
+    TaintedFormatStringAsSink() {
+      (
+        formatCall.asExpr().(BinaryExpr).getOp() instanceof Mod and
+        this.asExpr() = formatCall.asExpr().(BinaryExpr).getASubExpression()
+        or
+        formatCall.(DataFlow::MethodCallNode).getMethodName() = "format" and
+        this =
+          [
+            formatCall.(DataFlow::MethodCallNode).getArg(_),
+            formatCall.(DataFlow::MethodCallNode).getObject()
+          ]
+      ) and
+      isUsedAsShellCommand(formatCall, s)
+    }
+
+    override string describe() { result = "formatted string" }
+
+    override DataFlow::Node getCommandExecution() { result = s }
+
+    override DataFlow::Node getStringConstruction() { result = formatCall }
+  }
+}
--- a/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/UnsafeShellCommandConstructionQuery.qll
@@ -0,0 +1,35 @@
+/**
+ * Provides a taint tracking configuration for reasoning about shell command
+ * constructed from library input vulnerabilities
+ *
+ * Note, for performance reasons: only import this file if `Configuration` is needed,
+ * otherwise `UnsafeShellCommandConstructionCustomizations` should be imported instead.
+ */
+
+import python
+import UnsafeShellCommandConstructionCustomizations::UnsafeShellCommandConstruction
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import CommandInjectionCustomizations::CommandInjection as CommandInjection
+import semmle.python.dataflow.new.BarrierGuards
+
+/**
+ * A taint-tracking configuration for detecting shell command constructed from library input vulnerabilities.
+ */
+class Configuration extends TaintTracking::Configuration {
+  Configuration() { this = "UnsafeShellCommandConstruction" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof Source }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    node instanceof CommandInjection::Sanitizer or // using all sanitizers from `rb/command-injection`
+    node instanceof StringConstCompareBarrier
+  }
+
+  // override to require the path doesn't have unmatched return steps
+  override DataFlow::FlowFeature getAFeature() {
+    result instanceof DataFlow::FeatureHasSourceCallContext
+  }
+}
--- a/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.qhelp
+++ b/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.qhelp
@@ -0,0 +1,73 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+<overview>
+	<p>
+		Dynamically constructing a shell command with inputs from exported 
+		functions may inadvertently change the meaning of the shell command.
+		
+		Clients using the exported function may use inputs containing
+		characters that the shell interprets in a special way, for instance
+		quotes and spaces.
+
+		This can result in the shell command misbehaving, or even
+		allowing a malicious user to execute arbitrary commands on the system.
+	</p>
+
+
+</overview>
+<recommendation>
+
+	<p>
+		If possible, provide the dynamic arguments to the shell as an array 
+		to APIs such as <code>system(..)</code> to avoid interpretation by the shell.
+	</p>
+
+	<p>
+		Alternatively, if the shell command must be constructed
+		dynamically, then add code to ensure that special characters 
+		do not alter the shell command unexpectedly.
+	</p>
+
+</recommendation>
+<example>
+
+	<p>
+		The following example shows a dynamically constructed shell
+		command that downloads a file from a remote URL.
+	</p>
+
+	<sample src="examples/unsafe-shell-command-construction.py" />
+
+	<p>
+		The shell command will, however, fail to work as intended if the
+		input contains spaces or other special characters interpreted in a 
+		special way by the shell. 
+	</p>
+
+	<p>
+		Even worse, a client might pass in user-controlled
+		data, not knowing that the input is interpreted as a shell command. 
+		This could allow a malicious user to provide the input <code>http://example.org; cat /etc/passwd</code>
+		in order to execute the command <code>cat /etc/passwd</code>.
+	</p>
+
+	<p>
+		To avoid such potentially catastrophic behaviors, provide the
+		input from exported functions as an argument that does not
+		get interpreted by a shell:
+	</p>
+
+	<sample src="examples/unsafe-shell-command-construction_fixed.py" />
+
+</example>
+<references>
+
+	<li>
+		OWASP:
+		<a href="https://www.owasp.org/index.php/Command_Injection">Command Injection</a>.
+	</li>
+
+</references>
+</qhelp>
--- a/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.ql
+++ b/python/ql/src/Security/CWE-078/UnsafeShellCommandConstruction.ql
@@ -0,0 +1,27 @@
+/**
+ * @name Unsafe shell command constructed from library input
+ * @description Using externally controlled strings in a command line may allow a malicious
+ *              user to change the meaning of the command.
+ * @kind path-problem
+ * @problem.severity error
+ * @security-severity 6.3
+ * @precision high
+ * @id py/shell-command-constructed-from-input
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-078
+ *       external/cwe/cwe-088
+ *       external/cwe/cwe-073
+ */
+
+import python
+import semmle.python.security.dataflow.UnsafeShellCommandConstructionQuery
+import DataFlow::PathGraph
+
+from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
+where
+  config.hasFlowPath(source, sink) and
+  sinkNode = sink.getNode()
+select sinkNode.getStringConstruction(), source, sink,
+  "This " + sinkNode.describe() + " which depends on $@ is later used in a $@.", source.getNode(),
+  "library input", sinkNode.getCommandExecution(), "shell command"
--- a/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction.py
+++ b/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction.py
@@ -0,0 +1,4 @@
+import os
+
+def download (path): 
+    os.system("wget " + path) # NOT OK
--- a/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction_fixed.py
+++ b/python/ql/src/Security/CWE-078/examples/unsafe-shell-command-construction_fixed.py
@@ -0,0 +1,4 @@
+import subprocess
+
+def download (path): 
+    subprocess.run(["wget", path]) # OK