JavaScript: Add new query DoubleEscaping.

2025-12-17 01:03:14 +01:00 · 2018-11-26 17:17:50 +00:00
parent 1c5322274a
commit 10166be535
12 changed files with 332 additions and 3 deletions
--- a/change-notes/1.20/analysis-javascript.md
+++ b/change-notes/1.20/analysis-javascript.md
@@ -4,9 +4,10 @@

 ## New queries

-| **Query** | **Tags** | **Purpose** |
-|-----------|----------|-------------|
-|           |          |             |
+| **Query**                                     | **Tags**                                             | **Purpose**                                                                                                                                                                 |
+|-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Double escaping or unescaping (`js/double-escaping') | correctness, security, external/cwe/cwe-116 | Highlights potential double escaping or unescaping of special characters, indicating a possible violation of [CWE-116](https://cwe.mitre.org/data/definitions/116.html). Results are shown on LGTM by default. |
+

 ## Changes to existing queries

--- a/javascript/config/suites/javascript/security
+++ b/javascript/config/suites/javascript/security
@@ -8,6 +8,7 @@
 + semmlecode-javascript-queries/Security/CWE-089/SqlInjection.ql: /Security/CWE/CWE-089
 + semmlecode-javascript-queries/Security/CWE-094/CodeInjection.ql: /Security/CWE/CWE-094
 + semmlecode-javascript-queries/Security/CWE-116/IncompleteSanitization.ql: /Security/CWE/CWE-116
+ semmlecode-javascript-queries/Security/CWE-116/DoubleEscaping.ql: /Security/CWE/CWE-116
 + semmlecode-javascript-queries/Security/CWE-134/TaintedFormatString.ql: /Security/CWE/CWE-134
 + semmlecode-javascript-queries/Security/CWE-209/StackTraceExposure.ql: /Security/CWE/CWE-209
 + semmlecode-javascript-queries/Security/CWE-312/CleartextStorage.ql: /Security/CWE/CWE-312
--- a/javascript/ql/src/Security/CWE-116/DoubleEscaping.qhelp
+++ b/javascript/ql/src/Security/CWE-116/DoubleEscaping.qhelp
@@ -0,0 +1,77 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Escaping meta-characters in untrusted input is an important technique for preventing injection
+attacks such as cross-site scripting. One particular example of this is HTML entity encoding,
+where HTML special characters are replaced by HTML character entities to prevent them from being
+interpreted as HTML markup. For example, the less-than character is encoded as <code>&amp;lt;</code>
+and the double-quote character as <code>&amp;quot;</code>.
+Other examples include backslash-escaping for including untrusted data in string literals and
+percent-encoding for URI components.
+</p>
+<p>
+The reverse process of replacing escape sequences with the characters they represent is known as
+unescaping.
+</p>
+<p>
+Note that the escape characters themselves (such as ampersand in the case of HTML encoding) play
+a special role during escaping and unescaping: they are themselves escaped, but also form part
+of the escaped representations of other characters. Hence care must be taken to avoid double escaping
+and unescaping: when escaping, the escape character must be escaped first, when unescaping it has
+to be unescaped last.
+</p>
+<p>
+If used in the context of sanitization, double unescaping may render the sanitization ineffective.
+Even if it is not used in a security-critical context, it may still result in confusing
+or garbled output.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Use a (well-tested) sanitization library if at all possible. These libraries are much more
+likely to handle corner cases correctly than a custom implementation. For URI encoding,
+you can use the standard `encodeURIComponent` and `decodeURIComponent` functions.
+</p>
+<p>
+Otherwise, make sure to always escape the escape character first, and unescape it last.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following example shows a pair of hand-written HTML encoding and decoding functions:
+</p>
+
+<sample src="examples/DoubleEscaping.js" />
+
+<p>
+The encoding function correctly handles ampersand before the other characters. For example,
+the string <code>me &amp; "you"</code> is encoded as <code>me &amp;amp; &amp;quot;you&amp;quot;</code>,
+and the string <code>&quot;</code> is encoded as <code>&amp;quot;</code>.
+</p>
+
+<p>
+The decoding function, however, incorrectly decodes <code>&amp;amp;</code> into <code>&amp;</code>
+before handling the other characters. So while it correctly decodes the first example above,
+it decodes the second example (<code>&amp;quot;</code>) to <code>&quot;</code> (a single double quote),
+which is not correct.
+</p>
+
+<p>
+Instead, the decoding function should decode the ampersand last:
+</p>
+
+<sample src="examples/DoubleEscapingGood.js" />
+</example>
+
+<references>
+<li>OWASP Top 10: <a href="https://www.owasp.org/index.php/Top_10-2017_A1-Injection">A1 Injection</a>.</li>
+<li>npm: <a href="https://www.npmjs.com/package/html-entities">html-entities</a> package.</li>
+<li>npm: <a href="https://www.npmjs.com/package/js-string-escape">js-string-escape</a> package.</li>
+</references>
+</qhelp>
--- a/javascript/ql/src/Security/CWE-116/DoubleEscaping.ql
+++ b/javascript/ql/src/Security/CWE-116/DoubleEscaping.ql
@@ -0,0 +1,159 @@
+/**
+ * @name Double escaping or unescaping
+ * @description When escaping special characters using a meta-character like backslash or
+ *              ampersand, the meta-character has to be escaped first to avoid double-escaping,
+ *              and conversely it has to be unescaped last to avoid double-unescaping.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id js/double-escaping
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-116
+ *       external/cwe/cwe-20
+ */
+
+import javascript
+
+/**
+ * Holds if `rl` is a simple constant, which is bound to the result of the predicate.
+ *
+ * For example, `/a/g` has string value `"a"` and `/abc/` has string value `"abc"`,
+ * while `/ab?/` and `/a(?=b)/` do not have a string value.
+ *
+ * Flags are ignored, so `/a/i` is still considered to have string value `"a"`,
+ * even though it also matches `"A"`.
+ *
+ * Note the somewhat subtle use of monotonic aggregate semantics, which makes the
+ * `strictconcat` fail if one of the children of the root is not a constant (legacy
+ * semantics would simply skip such children).
+ */
+language[monotonicAggregates]
+string getStringValue(RegExpLiteral rl) {
+  exists (RegExpTerm root | root = rl.getRoot() |
+    result = root.(RegExpConstant).getValue()
+    or
+    result = strictconcat(RegExpTerm ch, int i |
+      ch = root.(RegExpSequence).getChild(i) |
+      ch.(RegExpConstant).getValue() order by i
+    )
+  )
+}
+
+/**
+ * Gets a predecessor of `nd` that is not an SSA phi node.
+ */
+DataFlow::Node getASimplePredecessor(DataFlow::Node nd) {
+  result = nd.getAPredecessor() and
+  not nd.(DataFlow::SsaDefinitionNode).getSsaVariable().getDefinition() instanceof SsaPhiNode
+}
+
+/**
+ * Holds if `metachar` is a meta-character that is used to escape special characters
+ * into a form described by regular expression `regex`.
+ */
+predicate escapingScheme(string metachar, string regex) {
+  metachar = "&" and regex = "&.*;"
+  or
+  metachar = "%" and regex = "%.*"
+  or
+  metachar = "\\" and regex = "\\\\.*"
+}
+
+/**
+ * A call to `String.prototype.replace` that replaces all instances of a pattern.
+ */
+class Replacement extends DataFlow::Node {
+  RegExpLiteral pattern;
+
+  Replacement() {
+    exists (DataFlow::MethodCallNode mcn | this = mcn |
+      mcn.getMethodName() = "replace" and
+      mcn.getArgument(0).asExpr() = pattern and
+      mcn.getNumArgument() = 2 and
+      pattern.isGlobal()
+    )
+  }
+
+  /**
+   * Holds if this replacement replaces the string `input` with `output`.
+   */
+  predicate replaces(string input, string output) {
+    exists (DataFlow::MethodCallNode mcn |
+      mcn = this and
+      input = getStringValue(pattern) and
+      output = mcn.getArgument(1).asExpr().getStringValue()
+    )
+  }
+
+  /**
+   * Holds if this replacement escapes `char` using `metachar`.
+   *
+   * For example, during HTML entity escaping `<` is escaped (to `&lt;`)
+   * using `&`.
+   */
+  predicate escapes(string char, string metachar) {
+    exists (string regexp, string repl |
+      escapingScheme(metachar, regexp) and
+      replaces(char, repl) and
+      repl.regexpMatch(regexp)
+    )
+  }
+
+  /**
+   * Holds if this replacement unescapes `char` using `metachar`.
+   *
+   * For example, during HTML entity unescaping `<` is unescaped (from
+   * `&lt;`) using `<`.
+   */
+  predicate unescapes(string metachar, string char) {
+    exists (string regexp, string orig |
+      escapingScheme(metachar, regexp) and
+      replaces(orig, char) and
+      orig.regexpMatch(regexp)
+    )
+  }
+
+  /**
+   * Gets the previous replacement in this chain of replacements.
+   */
+  Replacement getPreviousReplacement() {
+    result = getASimplePredecessor*(this.(DataFlow::MethodCallNode).getReceiver())
+  }
+
+  /**
+   * Gets an earlier replacement in this chain of replacements that
+   * performs an escaping.
+   */
+  Replacement getAnEarlierEscaping(string metachar) {
+    exists (Replacement pred | pred = this.getPreviousReplacement() |
+      if pred.escapes(_, metachar) then
+        result = pred
+      else
+        result = pred.getAnEarlierEscaping(metachar)
+    )
+  }
+
+  /**
+   * Gets an earlier replacement in this chain of replacements that
+   * performs a unescaping.
+   */
+  Replacement getALaterUnescaping(string metachar) {
+    exists (Replacement succ | this = succ.getPreviousReplacement() |
+      if succ.unescapes(metachar, _) then
+        result = succ
+      else
+        result = succ.getALaterUnescaping(metachar)
+    )
+  }
+}
+
+from Replacement primary, Replacement supplementary, string message, string metachar
+where primary.escapes(metachar, _) and
+      supplementary = primary.getAnEarlierEscaping(metachar) and
+      message = "may double-escape '" + metachar + "' characters from $@"
+      or
+      primary.unescapes(_, metachar) and
+      supplementary = primary.getALaterUnescaping(metachar) and
+      message = "may produce '" + metachar + "' characters that are double-unescaped $@"
+select primary, "This replacement " + message + ".", supplementary, "here"
--- a/javascript/ql/src/Security/CWE-116/examples/DoubleEscaping.js
+++ b/javascript/ql/src/Security/CWE-116/examples/DoubleEscaping.js
@@ -0,0 +1,11 @@
+module.exports.encode = function(s) {
+  return s.replace(/&/g, "&amp;")
+          .replace(/"/g, "&quot;")
+          .replace(/'/g, "&apos;");
+};
+
+module.exports.decode = function(s) {
+  return s.replace(/&amp;/g, "&")
+          .replace(/&quot;/g, "\"")
+          .replace(/&apos;/g, "'");
+};
--- a/javascript/ql/src/Security/CWE-116/examples/DoubleEscapingGood.js
+++ b/javascript/ql/src/Security/CWE-116/examples/DoubleEscapingGood.js
@@ -0,0 +1,11 @@
+module.exports.encode = function(s) {
+  return s.replace(/&/g, "&amp;")
+          .replace(/"/g, "&quot;")
+          .replace(/'/g, "&apos;");
+};
+
+module.exports.decode = function(s) {
+  return s.replace(/&quot;/g, "\"")
+          .replace(/&apos;/g, "'")
+          .replace(/&amp;/g, "&");
+};
--- a/javascript/ql/test/query-tests/Security/CWE-116/DoubleEscaping/DoubleEscaping.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-116/DoubleEscaping/DoubleEscaping.expected
@@ -0,0 +1,6 @@
+| tst.js:2:10:4:33 | s.repla ... &amp;") | This replacement may double-escape '&' characters from $@. | tst.js:2:10:3:34 | s.repla ... apos;") | here |
+| tst.js:20:10:20:33 | s.repla ... g, "&") | This replacement may produce '&' characters that are double-unescaped $@. | tst.js:20:10:21:35 | s.repla ... , "\\"") | here |
+| tst.js:30:10:30:33 | s.repla ... g, "&") | This replacement may produce '&' characters that are double-unescaped $@. | tst.js:30:10:32:34 | s.repla ... g, "'") | here |
+| tst.js:47:7:47:30 | s.repla ... g, "&") | This replacement may produce '&' characters that are double-unescaped $@. | tst.js:48:7:48:32 | s.repla ... , "\\"") | here |
+| tst.js:53:10:53:33 | s.repla ... , '\\\\') | This replacement may produce '\\' characters that are double-unescaped $@. | tst.js:53:10:54:33 | s.repla ... , '\\'') | here |
+| tst.js:60:7:60:28 | s.repla ...  '%25') | This replacement may double-escape '%' characters from $@. | tst.js:59:7:59:28 | s.repla ...  '%26') | here |
--- a/javascript/ql/test/query-tests/Security/CWE-116/DoubleEscaping/DoubleEscaping.qlref
+++ b/javascript/ql/test/query-tests/Security/CWE-116/DoubleEscaping/DoubleEscaping.qlref
@@ -0,0 +1 @@
+Security/CWE-116/DoubleEscaping.ql
--- a/javascript/ql/test/query-tests/Security/CWE-116/DoubleEscaping/tst.js
+++ b/javascript/ql/test/query-tests/Security/CWE-116/DoubleEscaping/tst.js
@@ -0,0 +1,62 @@
+function badEncode(s) {
+  return s.replace(/"/g, "&quot;")
+          .replace(/'/g, "&apos;")
+          .replace(/&/g, "&amp;");
+}
+
+function goodEncode(s) {
+  return s.replace(/&/g, "&amp;")
+          .replace(/"/g, "&quot;")
+          .replace(/'/g, "&apos;");
+}
+
+function goodDecode(s) {
+  return s.replace(/&quot;/g, "\"")
+          .replace(/&apos;/g, "'")
+          .replace(/&amp;/g, "&");
+}
+
+function badDecode(s) {
+  return s.replace(/&amp;/g, "&")
+          .replace(/&quot;/g, "\"")
+          .replace(/&apos;/g, "'");
+}
+
+function cleverEncode(code) {
+    return code.replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/&(?![\w\#]+;)/g, '&amp;');
+}
+
+function badDecode2(s) {
+  return s.replace(/&amp;/g, "&")
+          .replace(/s?ome|thin*g/g, "else")
+          .replace(/&apos;/g, "'");
+}
+
+function goodDecodeInLoop(ss) {
+  var res = [];
+  for (var s of ss) {
+    s = s.replace(/&quot;/g, "\"")
+         .replace(/&apos;/g, "'")
+         .replace(/&amp;/g, "&");
+    res.push(s);
+  }
+  return res;
+}
+
+function badDecode3(s) {
+  s = s.replace(/&amp;/g, "&");
+  s = s.replace(/&quot;/g, "\"");
+  return s.replace(/&apos;/g, "'");
+}
+
+function badUnescape(s) {
+  return s.replace(/\\\\/g, '\\')
+           .replace(/\\'/g, '\'')
+           .replace(/\\"/g, '\"');
+}
+
+function badPercentEscape(s) {
+  s = s.replace(/&/g, '%26');
+  s = s.replace(/%/g, '%25');
+  return s;
+}
--- a/javascript/ql/test/query-tests/Security/CWE-116/IncompleteSanitization/IncompleteSanitization.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-116/IncompleteSanitization/IncompleteSanitization.expected
--- a/javascript/ql/test/query-tests/Security/CWE-116/IncompleteSanitization/IncompleteSanitization.qlref
+++ b/javascript/ql/test/query-tests/Security/CWE-116/IncompleteSanitization/IncompleteSanitization.qlref
--- a/javascript/ql/test/query-tests/Security/CWE-116/IncompleteSanitization/tst.js
+++ b/javascript/ql/test/query-tests/Security/CWE-116/IncompleteSanitization/tst.js