JavaScript: Add new query DoubleEscaping.

This commit is contained in:
Max Schaefer
2018-11-26 17:17:50 +00:00
parent 1c5322274a
commit 10166be535
12 changed files with 332 additions and 3 deletions

View File

@@ -4,9 +4,10 @@
## New queries
| **Query** | **Tags** | **Purpose** |
|-----------|----------|-------------|
| | | |
| **Query** | **Tags** | **Purpose** |
|-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Double escaping or unescaping (`js/double-escaping') | correctness, security, external/cwe/cwe-116 | Highlights potential double escaping or unescaping of special characters, indicating a possible violation of [CWE-116](https://cwe.mitre.org/data/definitions/116.html). Results are shown on LGTM by default. |
## Changes to existing queries

View File

@@ -8,6 +8,7 @@
+ semmlecode-javascript-queries/Security/CWE-089/SqlInjection.ql: /Security/CWE/CWE-089
+ semmlecode-javascript-queries/Security/CWE-094/CodeInjection.ql: /Security/CWE/CWE-094
+ semmlecode-javascript-queries/Security/CWE-116/IncompleteSanitization.ql: /Security/CWE/CWE-116
+ semmlecode-javascript-queries/Security/CWE-116/DoubleEscaping.ql: /Security/CWE/CWE-116
+ semmlecode-javascript-queries/Security/CWE-134/TaintedFormatString.ql: /Security/CWE/CWE-134
+ semmlecode-javascript-queries/Security/CWE-209/StackTraceExposure.ql: /Security/CWE/CWE-209
+ semmlecode-javascript-queries/Security/CWE-312/CleartextStorage.ql: /Security/CWE/CWE-312

View File

@@ -0,0 +1,77 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Escaping meta-characters in untrusted input is an important technique for preventing injection
attacks such as cross-site scripting. One particular example of this is HTML entity encoding,
where HTML special characters are replaced by HTML character entities to prevent them from being
interpreted as HTML markup. For example, the less-than character is encoded as <code>&amp;lt;</code>
and the double-quote character as <code>&amp;quot;</code>.
Other examples include backslash-escaping for including untrusted data in string literals and
percent-encoding for URI components.
</p>
<p>
The reverse process of replacing escape sequences with the characters they represent is known as
unescaping.
</p>
<p>
Note that the escape characters themselves (such as ampersand in the case of HTML encoding) play
a special role during escaping and unescaping: they are themselves escaped, but also form part
of the escaped representations of other characters. Hence care must be taken to avoid double escaping
and unescaping: when escaping, the escape character must be escaped first, when unescaping it has
to be unescaped last.
</p>
<p>
If used in the context of sanitization, double unescaping may render the sanitization ineffective.
Even if it is not used in a security-critical context, it may still result in confusing
or garbled output.
</p>
</overview>
<recommendation>
<p>
Use a (well-tested) sanitization library if at all possible. These libraries are much more
likely to handle corner cases correctly than a custom implementation. For URI encoding,
you can use the standard `encodeURIComponent` and `decodeURIComponent` functions.
</p>
<p>
Otherwise, make sure to always escape the escape character first, and unescape it last.
</p>
</recommendation>
<example>
<p>
The following example shows a pair of hand-written HTML encoding and decoding functions:
</p>
<sample src="examples/DoubleEscaping.js" />
<p>
The encoding function correctly handles ampersand before the other characters. For example,
the string <code>me &amp; "you"</code> is encoded as <code>me &amp;amp; &amp;quot;you&amp;quot;</code>,
and the string <code>&quot;</code> is encoded as <code>&amp;quot;</code>.
</p>
<p>
The decoding function, however, incorrectly decodes <code>&amp;amp;</code> into <code>&amp;</code>
before handling the other characters. So while it correctly decodes the first example above,
it decodes the second example (<code>&amp;quot;</code>) to <code>&quot;</code> (a single double quote),
which is not correct.
</p>
<p>
Instead, the decoding function should decode the ampersand last:
</p>
<sample src="examples/DoubleEscapingGood.js" />
</example>
<references>
<li>OWASP Top 10: <a href="https://www.owasp.org/index.php/Top_10-2017_A1-Injection">A1 Injection</a>.</li>
<li>npm: <a href="https://www.npmjs.com/package/html-entities">html-entities</a> package.</li>
<li>npm: <a href="https://www.npmjs.com/package/js-string-escape">js-string-escape</a> package.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,159 @@
/**
* @name Double escaping or unescaping
* @description When escaping special characters using a meta-character like backslash or
* ampersand, the meta-character has to be escaped first to avoid double-escaping,
* and conversely it has to be unescaped last to avoid double-unescaping.
* @kind problem
* @problem.severity warning
* @precision high
* @id js/double-escaping
* @tags correctness
* security
* external/cwe/cwe-116
* external/cwe/cwe-20
*/
import javascript
/**
* Holds if `rl` is a simple constant, which is bound to the result of the predicate.
*
* For example, `/a/g` has string value `"a"` and `/abc/` has string value `"abc"`,
* while `/ab?/` and `/a(?=b)/` do not have a string value.
*
* Flags are ignored, so `/a/i` is still considered to have string value `"a"`,
* even though it also matches `"A"`.
*
* Note the somewhat subtle use of monotonic aggregate semantics, which makes the
* `strictconcat` fail if one of the children of the root is not a constant (legacy
* semantics would simply skip such children).
*/
language[monotonicAggregates]
string getStringValue(RegExpLiteral rl) {
exists (RegExpTerm root | root = rl.getRoot() |
result = root.(RegExpConstant).getValue()
or
result = strictconcat(RegExpTerm ch, int i |
ch = root.(RegExpSequence).getChild(i) |
ch.(RegExpConstant).getValue() order by i
)
)
}
/**
* Gets a predecessor of `nd` that is not an SSA phi node.
*/
DataFlow::Node getASimplePredecessor(DataFlow::Node nd) {
result = nd.getAPredecessor() and
not nd.(DataFlow::SsaDefinitionNode).getSsaVariable().getDefinition() instanceof SsaPhiNode
}
/**
* Holds if `metachar` is a meta-character that is used to escape special characters
* into a form described by regular expression `regex`.
*/
predicate escapingScheme(string metachar, string regex) {
metachar = "&" and regex = "&.*;"
or
metachar = "%" and regex = "%.*"
or
metachar = "\\" and regex = "\\\\.*"
}
/**
* A call to `String.prototype.replace` that replaces all instances of a pattern.
*/
class Replacement extends DataFlow::Node {
RegExpLiteral pattern;
Replacement() {
exists (DataFlow::MethodCallNode mcn | this = mcn |
mcn.getMethodName() = "replace" and
mcn.getArgument(0).asExpr() = pattern and
mcn.getNumArgument() = 2 and
pattern.isGlobal()
)
}
/**
* Holds if this replacement replaces the string `input` with `output`.
*/
predicate replaces(string input, string output) {
exists (DataFlow::MethodCallNode mcn |
mcn = this and
input = getStringValue(pattern) and
output = mcn.getArgument(1).asExpr().getStringValue()
)
}
/**
* Holds if this replacement escapes `char` using `metachar`.
*
* For example, during HTML entity escaping `<` is escaped (to `&lt;`)
* using `&`.
*/
predicate escapes(string char, string metachar) {
exists (string regexp, string repl |
escapingScheme(metachar, regexp) and
replaces(char, repl) and
repl.regexpMatch(regexp)
)
}
/**
* Holds if this replacement unescapes `char` using `metachar`.
*
* For example, during HTML entity unescaping `<` is unescaped (from
* `&lt;`) using `<`.
*/
predicate unescapes(string metachar, string char) {
exists (string regexp, string orig |
escapingScheme(metachar, regexp) and
replaces(orig, char) and
orig.regexpMatch(regexp)
)
}
/**
* Gets the previous replacement in this chain of replacements.
*/
Replacement getPreviousReplacement() {
result = getASimplePredecessor*(this.(DataFlow::MethodCallNode).getReceiver())
}
/**
* Gets an earlier replacement in this chain of replacements that
* performs an escaping.
*/
Replacement getAnEarlierEscaping(string metachar) {
exists (Replacement pred | pred = this.getPreviousReplacement() |
if pred.escapes(_, metachar) then
result = pred
else
result = pred.getAnEarlierEscaping(metachar)
)
}
/**
* Gets an earlier replacement in this chain of replacements that
* performs a unescaping.
*/
Replacement getALaterUnescaping(string metachar) {
exists (Replacement succ | this = succ.getPreviousReplacement() |
if succ.unescapes(metachar, _) then
result = succ
else
result = succ.getALaterUnescaping(metachar)
)
}
}
from Replacement primary, Replacement supplementary, string message, string metachar
where primary.escapes(metachar, _) and
supplementary = primary.getAnEarlierEscaping(metachar) and
message = "may double-escape '" + metachar + "' characters from $@"
or
primary.unescapes(_, metachar) and
supplementary = primary.getALaterUnescaping(metachar) and
message = "may produce '" + metachar + "' characters that are double-unescaped $@"
select primary, "This replacement " + message + ".", supplementary, "here"

View File

@@ -0,0 +1,11 @@
module.exports.encode = function(s) {
return s.replace(/&/g, "&amp;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
};
module.exports.decode = function(s) {
return s.replace(/&amp;/g, "&")
.replace(/&quot;/g, "\"")
.replace(/&apos;/g, "'");
};

View File

@@ -0,0 +1,11 @@
module.exports.encode = function(s) {
return s.replace(/&/g, "&amp;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
};
module.exports.decode = function(s) {
return s.replace(/&quot;/g, "\"")
.replace(/&apos;/g, "'")
.replace(/&amp;/g, "&");
};

View File

@@ -0,0 +1,6 @@
| tst.js:2:10:4:33 | s.repla ... &amp;") | This replacement may double-escape '&' characters from $@. | tst.js:2:10:3:34 | s.repla ... apos;") | here |
| tst.js:20:10:20:33 | s.repla ... g, "&") | This replacement may produce '&' characters that are double-unescaped $@. | tst.js:20:10:21:35 | s.repla ... , "\\"") | here |
| tst.js:30:10:30:33 | s.repla ... g, "&") | This replacement may produce '&' characters that are double-unescaped $@. | tst.js:30:10:32:34 | s.repla ... g, "'") | here |
| tst.js:47:7:47:30 | s.repla ... g, "&") | This replacement may produce '&' characters that are double-unescaped $@. | tst.js:48:7:48:32 | s.repla ... , "\\"") | here |
| tst.js:53:10:53:33 | s.repla ... , '\\\\') | This replacement may produce '\\' characters that are double-unescaped $@. | tst.js:53:10:54:33 | s.repla ... , '\\'') | here |
| tst.js:60:7:60:28 | s.repla ... '%25') | This replacement may double-escape '%' characters from $@. | tst.js:59:7:59:28 | s.repla ... '%26') | here |

View File

@@ -0,0 +1 @@
Security/CWE-116/DoubleEscaping.ql

View File

@@ -0,0 +1,62 @@
function badEncode(s) {
return s.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;")
.replace(/&/g, "&amp;");
}
function goodEncode(s) {
return s.replace(/&/g, "&amp;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
function goodDecode(s) {
return s.replace(/&quot;/g, "\"")
.replace(/&apos;/g, "'")
.replace(/&amp;/g, "&");
}
function badDecode(s) {
return s.replace(/&amp;/g, "&")
.replace(/&quot;/g, "\"")
.replace(/&apos;/g, "'");
}
function cleverEncode(code) {
return code.replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/&(?![\w\#]+;)/g, '&amp;');
}
function badDecode2(s) {
return s.replace(/&amp;/g, "&")
.replace(/s?ome|thin*g/g, "else")
.replace(/&apos;/g, "'");
}
function goodDecodeInLoop(ss) {
var res = [];
for (var s of ss) {
s = s.replace(/&quot;/g, "\"")
.replace(/&apos;/g, "'")
.replace(/&amp;/g, "&");
res.push(s);
}
return res;
}
function badDecode3(s) {
s = s.replace(/&amp;/g, "&");
s = s.replace(/&quot;/g, "\"");
return s.replace(/&apos;/g, "'");
}
function badUnescape(s) {
return s.replace(/\\\\/g, '\\')
.replace(/\\'/g, '\'')
.replace(/\\"/g, '\"');
}
function badPercentEscape(s) {
s = s.replace(/&/g, '%26');
s = s.replace(/%/g, '%25');
return s;
}