JS: add query js/useless-regexp-character-escape

This commit is contained in:
Esben Sparre Andreasen
2019-09-17 16:32:24 +02:00
parent 25130f200b
commit e1d7434be4
14 changed files with 530 additions and 11 deletions

View File

@@ -18,7 +18,7 @@
| Loop bound injection (`js/loop-bound-injection`) | security, external/cwe/cwe-834 | Highlights loops where a user-controlled object with an arbitrary .length value can trick the server to loop indefinitely. Results are not shown on LGTM by default. |
| Suspicious method name (`js/suspicious-method-name-declaration`) | correctness, typescript, methods | Highlights suspiciously named methods where the developer likely meant to write a constructor or function. Results are shown on LGTM by default. |
| Use of returnless function (`js/use-of-returnless-function`) | maintainability, correctness | Highlights calls where the return value is used, but the callee never returns a value. Results are shown on LGTM by default. |
| Useless regular expression character escape (`js/useless-regexp-character-escape`) | correctness, security, external/cwe/cwe-20 | Highlights regular expression strings with useless character escapes, indicating a possible violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default. |
## Changes to existing queries

View File

@@ -4,6 +4,7 @@
+ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-020/MissingRegExpAnchor.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-020/UselessRegExpCharacterEscape.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022
+ semmlecode-javascript-queries/Security/CWE-022/ZipSlip.ql: /Security/CWE/CWE-022
+ semmlecode-javascript-queries/Security/CWE-078/CommandInjection.ql: /Security/CWE/CWE-078

View File

@@ -11,6 +11,7 @@
*/
import javascript
import semmle.javascript.CharacterEscapes
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
@@ -40,7 +41,9 @@ where
)
) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*") and
// avoid double reporting
not CharacterEscapes::hasALikelyRegExpPatternMistake(re)
select re,
"This " + kind + " has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected.", aux, "here"

View File

@@ -0,0 +1,86 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
When a character in a string literal or regular expression
literal is preceded by a backslash, it is interpreted as part of an
escape sequence. For example, the escape sequence <code>\n</code> in a
string literal corresponds to a single <code>newline</code> character,
and not the <code>\</code> and <code>n</code> characters.
However, not all characters change meaning when used in an
escape sequence. In this case, the backslash just makes the character
appear to mean something else, and the backslash actually has no
effect. For example, the escape sequence <code>\k</code> in a string
literal just means <code>k</code>.
Such superfluous escape sequences are usually benign, and
do not change the behavior of the program.
</p>
<p>
The set of characters that change meaning when in escape
sequences is different for regular expression literals and string
literals.
This can be problematic when a regular expression literal
is turned into a regular expression that is built from one or more
string literals. The problem occurs when a regular expression escape
sequence loses its special meaning in a string literal.
</p>
</overview>
<recommendation>
<p>
Ensure that the right amount of backslashes is used when
escaping characters in strings, template literals and regular
expressions.
Pay special attention to the number of backslashes when
rewriting a regular expression as a string literal.
</p>
</recommendation>
<example>
<p>
The following example code checks that a string is
<code>"my-marker"</code>, possibly surrounded by white space:
</p>
<sample src="examples/UselessRegExpCharacterEscape_bad_1.js"/>
<p>
However, the check does not work properly for white space
as the two <code>\s</code> occurrences are semantically equivalent to
just <code>s</code>, meaning that the check will succeed for strings
like <code>"smy-markers"</code> instead of <code>" my-marker
"</code>.
Address these shortcomings by either using a regular
expression literal (<code>/(^\s*)my-marker(\s*$)/</code>), or by
adding extra backslashes
(<code>'(^\\s*)my-marker(\\s*$)'</code>).
</p>
</example>
<references>
<li>MDN: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#Escaping">Regular expression escape notation</a></li>
<li>MDN: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String#Escape_notation">String escape notation</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,156 @@
/**
* @name Useless regular-expression character escape
* @description Prepending a backslash to an ordinary character in a string
* does not have any effect, and may make regular expressions constructed from this string
* behave unexpectedly.
* @kind problem
* @problem.severity error
* @precision high
* @id js/useless-regexp-character-escape
* @tags correctness
* security
* external/cwe/cwe-20
*/
import javascript
import semmle.javascript.CharacterEscapes::CharacterEscapes
newtype TRegExpPatternMistake =
/**
* A character escape mistake in regular expression string `src`
* for the character `char` at `index` in `rawStringNode`, explained
* by `mistake`.
*/
TIdentityEscapeInStringMistake(
RegExpPatternSource src, string char, string mistake, ASTNode rawStringNode, int index
) {
char = getALikelyRegExpPatternMistake(src, mistake, rawStringNode, index)
} or
/**
* A backslash-escaped 'b' at `index` of `rawStringNode` in the
* regular expression string `src`, indicating intent to use the
* word-boundary assertion '\b'.
*/
TBackspaceInStringMistake(RegExpPatternSource src, ASTNode rawStringNode, int index) {
exists(string raw, string cooked |
exists(StringLiteral lit | lit = rawStringNode |
rawStringNode = src.asExpr() and
raw = lit.getRawValue() and
cooked = lit.getStringValue()
)
or
exists(TemplateElement elem | elem = rawStringNode |
rawStringNode = src.asExpr().(TemplateLiteral).getAnElement() and
raw = elem.getRawValue() and
cooked = elem.getStringValue()
)
|
"b" = getAnEscapedCharacter(raw, index) and
// except if the string is exactly \b
cooked.length() != 1
)
}
/**
* A character escape mistake in a regular expression string.
*
* Implementation note: the main purpose of this class is to associate an
* exact character location with an alert message, in the name of
* user-friendly alerts. The implementation can be simplified
* significantly by only using the enclosing string location as the alert
* location.
*/
class RegExpPatternMistake extends TRegExpPatternMistake {
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
exists(int srcStartcolumn, int srcEndcolumn, int index |
index = getIndex() and
getRawStringNode()
.getLocation()
.hasLocationInfo(filepath, startline, srcStartcolumn, endline, srcEndcolumn)
|
(
if startline = endline
then startcolumn = srcStartcolumn + index - 1 and endcolumn = srcStartcolumn + index
else (
startcolumn = srcStartcolumn and endcolumn = srcEndcolumn
)
)
)
}
/** Gets a textual representation of this element. */
string toString() { result = getMessage() }
abstract ASTNode getRawStringNode();
abstract RegExpPatternSource getSrc();
abstract int getIndex();
abstract string getMessage();
}
/**
* An identity-escaped character that indicates programmer intent to
* do something special in a regular expression.
*/
class IdentityEscapeInStringMistake extends RegExpPatternMistake, TIdentityEscapeInStringMistake {
RegExpPatternSource src;
string char;
string mistake;
int index;
ASTNode rawStringNode;
IdentityEscapeInStringMistake() {
this = TIdentityEscapeInStringMistake(src, char, mistake, rawStringNode, index)
}
override string getMessage() {
result = "'\\" + char + "' is equivalent to just '" + char + "', so the sequence " + mistake
}
override int getIndex() { result = index }
override RegExpPatternSource getSrc() { result = src }
override ASTNode getRawStringNode() { result = rawStringNode }
}
/**
* A backspace as '\b' in a regular expression string, indicating
* programmer intent to use the word-boundary assertion '\b'.
*/
class BackspaceInStringMistake extends RegExpPatternMistake, TBackspaceInStringMistake {
RegExpPatternSource src;
int index;
ASTNode rawStringNode;
BackspaceInStringMistake() { this = TBackspaceInStringMistake(src, rawStringNode, index) }
override string getMessage() { result = "'\\b' is a backspace, and not a word-boundary assertion" }
override int getIndex() { result = index }
override RegExpPatternSource getSrc() { result = src }
override ASTNode getRawStringNode() { result = rawStringNode }
}
from RegExpPatternMistake mistake
select mistake, "The escape sequence " + mistake.getMessage() + " when it is used in a $@.",
mistake.getSrc().getAParse(), "regular expression"

View File

@@ -0,0 +1,2 @@
let regex = new RegExp('(^\s*)my-marker(\s*$)'),
isMyMarkerText = regex.test(text);

View File

@@ -0,0 +1,97 @@
/**
* Provides classes for reasoning about character escapes in literals.
*/
import javascript
module CharacterEscapes {
/**
* Provides sets of characters (as strings) with specific string/regexp characteristics.
*/
private module Sets {
string sharedEscapeChars() { result = "fnrtvux0\\" }
string regexpAssertionChars() { result = "bB" }
string regexpCharClassChars() { result = "cdDpPsSwW" }
string regexpBackreferenceChars() { result = "123456789k" }
string regexpMetaChars() { result = "^$*+?.()|{}[]-" }
}
/**
* Gets the `i`th character of `raw`, which is preceded by an uneven number of backslashes.
*/
bindingset[raw]
string getAnEscapedCharacter(string raw, int i) {
result = raw.regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10}).", _, i)
}
/**
* Holds if `n` is delimited by `delim` and contains `rawStringNode` with the raw string value `raw`.
*/
private predicate hasRawStringAndQuote(
DataFlow::ValueNode n, string delim, ASTNode rawStringNode, string raw
) {
rawStringNode = n.asExpr() and
raw = rawStringNode.(StringLiteral).getRawValue() and
delim = raw.charAt(0)
or
rawStringNode = n.asExpr().(TemplateLiteral).getAnElement() and
raw = rawStringNode.(TemplateElement).getRawValue() and
delim = "`"
or
rawStringNode = n.asExpr() and
raw = rawStringNode.(RegExpLiteral).getRawValue() and
delim = "/"
}
/**
* Gets a character in `n` that is preceded by a single useless backslash.
*
* The character is the `i`th character of `rawStringNode`'s raw string value.
*/
string getAnIdentityEscapedCharacter(DataFlow::Node n, ASTNode rawStringNode, int i) {
exists(string delim, string raw, string additionalEscapeChars |
hasRawStringAndQuote(n, delim, rawStringNode, raw) and
if rawStringNode instanceof RegExpLiteral
then
additionalEscapeChars = Sets::regexpMetaChars() + Sets::regexpAssertionChars() + Sets::regexpCharClassChars() +
Sets::regexpBackreferenceChars()
else additionalEscapeChars = "b"
|
result = getAnEscapedCharacter(raw, i) and
not result = (Sets::sharedEscapeChars() + delim + additionalEscapeChars).charAt(_)
)
}
/**
* Holds if `src` likely contains a regular expression mistake, to be reported by `js/useless-regexp-character-escape`.
*/
predicate hasALikelyRegExpPatternMistake(RegExpPatternSource src) {
exists(getALikelyRegExpPatternMistake(src, _, _, _))
}
/**
* Gets a character in `n` that is preceded by a single useless backslash, resulting in a likely regular expression mistake explained by `mistake`.
*
* The character is the `i`th character of the raw string value of `rawStringNode`.
*/
string getALikelyRegExpPatternMistake(
RegExpPatternSource src, string mistake, ASTNode rawStringNode, int i
) {
result = getAnIdentityEscapedCharacter(src, rawStringNode, i) and
(
result = Sets::regexpAssertionChars().charAt(_) and mistake = "is not an assertion"
or
result = Sets::regexpCharClassChars().charAt(_) and mistake = "is not a character class"
or
result = Sets::regexpBackreferenceChars().charAt(_) and mistake = "is not a backreference"
or
// conservative formulation: we do not know in general if the sequence is enclosed in a character class `[...]`
result = Sets::regexpMetaChars().charAt(_) and
mistake = "may still represent a meta-character"
)
}
}

View File

@@ -14,15 +14,11 @@
| tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
| tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
| tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here |
| tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | here |
| tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:42:13:42:62 | '^http[ ... \\/(.+)' | here |
| tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
| tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
| tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:46:2:46:29 | /^(exam ... e.com)/ | here |
| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
| tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
| tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... \\\\.com' | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... \\\\.com' | here |
| tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... \\\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... \\\\.com' | here |
| tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here |
| tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:66:13:66:34 | '^good. ... er.com' | here |
| tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | here |

View File

@@ -0,0 +1,58 @@
| tst-IncompleteHostnameRegExp.js:42:13:42:65 | '^http[ ... \\/(.+)' | The escape sequence '\\/' is equivalent to just '/'. |
| tst-SemiAnchoredRegExp.js:107:2:107:45 | /^((\\+\| ... ?\\d\\d)/ | The escape sequence '\\:' is equivalent to just ':'. |
| tst-escapes.js:19:8:19:11 | "\\ " | The escape sequence '\\ ' is equivalent to just ' '. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\a' is equivalent to just 'a'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\e' is equivalent to just 'e'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\g' is equivalent to just 'g'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\h' is equivalent to just 'h'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\i' is equivalent to just 'i'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\j' is equivalent to just 'j'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\l' is equivalent to just 'l'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\m' is equivalent to just 'm'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\o' is equivalent to just 'o'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\q' is equivalent to just 'q'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\y' is equivalent to just 'y'. |
| tst-escapes.js:20:1:20:54 | /\\a\\b\\c ... x\\y\\z"/ | The escape sequence '\\z' is equivalent to just 'z'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\A' is equivalent to just 'A'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\C' is equivalent to just 'C'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\E' is equivalent to just 'E'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\F' is equivalent to just 'F'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\G' is equivalent to just 'G'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\H' is equivalent to just 'H'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\I' is equivalent to just 'I'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\J' is equivalent to just 'J'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\K' is equivalent to just 'K'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\L' is equivalent to just 'L'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\M' is equivalent to just 'M'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\N' is equivalent to just 'N'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\O' is equivalent to just 'O'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\Q' is equivalent to just 'Q'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\R' is equivalent to just 'R'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\T' is equivalent to just 'T'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\U' is equivalent to just 'U'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\V' is equivalent to just 'V'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\X' is equivalent to just 'X'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\Y' is equivalent to just 'Y'. |
| tst-escapes.js:21:1:21:52 | /\\A\\B\\C ... \\X\\Y\\Z/ | The escape sequence '\\Z' is equivalent to just 'Z'. |
| tst-escapes.js:22:1:22:28 | /\\`\\1\\2 ... \\0\\-\\=/ | The escape sequence '\\=' is equivalent to just '='. |
| tst-escapes.js:22:1:22:28 | /\\`\\1\\2 ... \\0\\-\\=/ | The escape sequence '\\`' is equivalent to just '`'. |
| tst-escapes.js:23:1:23:28 | /\\~\\!\\@ ... \\)\\_\\+/ | The escape sequence '\\!' is equivalent to just '!'. |
| tst-escapes.js:23:1:23:28 | /\\~\\!\\@ ... \\)\\_\\+/ | The escape sequence '\\#' is equivalent to just '#'. |
| tst-escapes.js:23:1:23:28 | /\\~\\!\\@ ... \\)\\_\\+/ | The escape sequence '\\%' is equivalent to just '%'. |
| tst-escapes.js:23:1:23:28 | /\\~\\!\\@ ... \\)\\_\\+/ | The escape sequence '\\&' is equivalent to just '&'. |
| tst-escapes.js:23:1:23:28 | /\\~\\!\\@ ... \\)\\_\\+/ | The escape sequence '\\@' is equivalent to just '@'. |
| tst-escapes.js:23:1:23:28 | /\\~\\!\\@ ... \\)\\_\\+/ | The escape sequence '\\_' is equivalent to just '_'. |
| tst-escapes.js:23:1:23:28 | /\\~\\!\\@ ... \\)\\_\\+/ | The escape sequence '\\~' is equivalent to just '~'. |
| tst-escapes.js:24:1:24:15 | /\\[\\]\\'\\\\,\\.\\// | The escape sequence '\\'' is equivalent to just '''. |
| tst-escapes.js:25:1:25:16 | /\\{\\}\\"\\\|\\<\\>\\?/ | The escape sequence '\\"' is equivalent to just '"'. |
| tst-escapes.js:25:1:25:16 | /\\{\\}\\"\\\|\\<\\>\\?/ | The escape sequence '\\<' is equivalent to just '<'. |
| tst-escapes.js:25:1:25:16 | /\\{\\}\\"\\\|\\<\\>\\?/ | The escape sequence '\\>' is equivalent to just '>'. |
| tst-escapes.js:26:1:26:4 | /\\ / | The escape sequence '\\ ' is equivalent to just ' '. |
| tst-escapes.js:29:8:29:11 | "\\a" | The escape sequence '\\a' is equivalent to just 'a'. |
| tst-escapes.js:31:8:31:13 | "\\\\\\a" | The escape sequence '\\a' is equivalent to just 'a'. |
| tst-escapes.js:33:8:33:15 | "\\\\\\\\\\a" | The escape sequence '\\a' is equivalent to just 'a'. |
| tst-escapes.js:35:8:35:17 | "\\\\\\\\\\\\\\a" | The escape sequence '\\a' is equivalent to just 'a'. |
| tst-escapes.js:37:8:37:19 | "\\\\\\\\\\\\\\\\\\a" | The escape sequence '\\a' is equivalent to just 'a'. |
| tst-escapes.js:42:1:42:4 | "\\." | The escape sequence '\\.' is equivalent to just '.'. |
| tst-escapes.js:48:8:48:15 | "'\\'\\\\'" | The escape sequence '\\'' is equivalent to just '''. |
| tst-escapes.js:50:8:50:15 | '"\\"\\\\"' | The escape sequence '\\"' is equivalent to just '"'. |

View File

@@ -0,0 +1,11 @@
import javascript
import semmle.javascript.CharacterEscapes::CharacterEscapes
from DataFlow::Node n, string char
where
char = getAnIdentityEscapedCharacter(n, _, _) and
not hasALikelyRegExpPatternMistake(n) and
not char = "\n" // ignore escaped newlines in multiline strings
select n,
"The escape sequence '\\" + char + "' is equivalent to just '" +
char + "'."

View File

@@ -0,0 +1,45 @@
| tst-IncompleteHostnameRegExp.js:55:26:55:27 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-IncompleteHostnameRegExp.js:55:13:55:39 | '^http: ... le.com' | regular expression |
| tst-SemiAnchoredRegExp.js:67:19:67:20 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | regular expression |
| tst-SemiAnchoredRegExp.js:67:31:67:32 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-SemiAnchoredRegExp.js:67:13:67:36 | '^good\\ ... r\\.com' | regular expression |
| tst-SemiAnchoredRegExp.js:69:21:69:22 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-SemiAnchoredRegExp.js:69:13:69:40 | '^good\\ ... \\\\.com' | regular expression |
| tst-SemiAnchoredRegExp.js:69:35:69:36 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-SemiAnchoredRegExp.js:69:13:69:40 | '^good\\ ... \\\\.com' | regular expression |
| tst-escapes.js:13:11:13:12 | '\\b' is a backspace, and not a word-boundary assertion | The escape sequence '\\b' is a backspace, and not a word-boundary assertion when it is used in a $@. | tst-escapes.js:13:8:13:61 | "\\a\\b\\c ... \\x\\y\\z" | regular expression |
| tst-escapes.js:13:13:13:14 | '\\c' is equivalent to just 'c', so the sequence is not a character class | The escape sequence '\\c' is equivalent to just 'c', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:13:8:13:61 | "\\a\\b\\c ... \\x\\y\\z" | regular expression |
| tst-escapes.js:13:15:13:16 | '\\d' is equivalent to just 'd', so the sequence is not a character class | The escape sequence '\\d' is equivalent to just 'd', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:13:8:13:61 | "\\a\\b\\c ... \\x\\y\\z" | regular expression |
| tst-escapes.js:13:29:13:30 | '\\k' is equivalent to just 'k', so the sequence is not a backreference | The escape sequence '\\k' is equivalent to just 'k', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:13:8:13:61 | "\\a\\b\\c ... \\x\\y\\z" | regular expression |
| tst-escapes.js:13:39:13:40 | '\\p' is equivalent to just 'p', so the sequence is not a character class | The escape sequence '\\p' is equivalent to just 'p', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:13:8:13:61 | "\\a\\b\\c ... \\x\\y\\z" | regular expression |
| tst-escapes.js:13:45:13:46 | '\\s' is equivalent to just 's', so the sequence is not a character class | The escape sequence '\\s' is equivalent to just 's', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:13:8:13:61 | "\\a\\b\\c ... \\x\\y\\z" | regular expression |
| tst-escapes.js:14:11:14:12 | '\\B' is equivalent to just 'B', so the sequence is not an assertion | The escape sequence '\\B' is equivalent to just 'B', so the sequence is not an assertion when it is used in a $@. | tst-escapes.js:14:8:14:59 | "\\A\\B\\C ... \\X\\Y\\Z" | regular expression |
| tst-escapes.js:14:15:14:16 | '\\D' is equivalent to just 'D', so the sequence is not a character class | The escape sequence '\\D' is equivalent to just 'D', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:14:8:14:59 | "\\A\\B\\C ... \\X\\Y\\Z" | regular expression |
| tst-escapes.js:14:39:14:40 | '\\P' is equivalent to just 'P', so the sequence is not a character class | The escape sequence '\\P' is equivalent to just 'P', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:14:8:14:59 | "\\A\\B\\C ... \\X\\Y\\Z" | regular expression |
| tst-escapes.js:14:45:14:46 | '\\S' is equivalent to just 'S', so the sequence is not a character class | The escape sequence '\\S' is equivalent to just 'S', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:14:8:14:59 | "\\A\\B\\C ... \\X\\Y\\Z" | regular expression |
| tst-escapes.js:15:11:15:12 | '\\1' is equivalent to just '1', so the sequence is not a backreference | The escape sequence '\\1' is equivalent to just '1', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:13:15:14 | '\\2' is equivalent to just '2', so the sequence is not a backreference | The escape sequence '\\2' is equivalent to just '2', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:15:15:16 | '\\3' is equivalent to just '3', so the sequence is not a backreference | The escape sequence '\\3' is equivalent to just '3', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:17:15:18 | '\\4' is equivalent to just '4', so the sequence is not a backreference | The escape sequence '\\4' is equivalent to just '4', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:19:15:20 | '\\5' is equivalent to just '5', so the sequence is not a backreference | The escape sequence '\\5' is equivalent to just '5', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:21:15:22 | '\\6' is equivalent to just '6', so the sequence is not a backreference | The escape sequence '\\6' is equivalent to just '6', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:23:15:24 | '\\7' is equivalent to just '7', so the sequence is not a backreference | The escape sequence '\\7' is equivalent to just '7', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:25:15:26 | '\\8' is equivalent to just '8', so the sequence is not a backreference | The escape sequence '\\8' is equivalent to just '8', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:27:15:28 | '\\9' is equivalent to just '9', so the sequence is not a backreference | The escape sequence '\\9' is equivalent to just '9', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:15:31:15:32 | '\\-' is equivalent to just '-', so the sequence may still represent a meta-character | The escape sequence '\\-' is equivalent to just '-', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:15:8:15:35 | "\\`\\1\\2 ... \\0\\-\\=" | regular expression |
| tst-escapes.js:16:17:16:18 | '\\$' is equivalent to just '$', so the sequence may still represent a meta-character | The escape sequence '\\$' is equivalent to just '$', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:16:8:16:35 | "\\~\\!\\@ ... \\)\\_\\+" | regular expression |
| tst-escapes.js:16:21:16:22 | '\\^' is equivalent to just '^', so the sequence may still represent a meta-character | The escape sequence '\\^' is equivalent to just '^', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:16:8:16:35 | "\\~\\!\\@ ... \\)\\_\\+" | regular expression |
| tst-escapes.js:16:25:16:26 | '\\*' is equivalent to just '*', so the sequence may still represent a meta-character | The escape sequence '\\*' is equivalent to just '*', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:16:8:16:35 | "\\~\\!\\@ ... \\)\\_\\+" | regular expression |
| tst-escapes.js:16:27:16:28 | '\\(' is equivalent to just '(', so the sequence may still represent a meta-character | The escape sequence '\\(' is equivalent to just '(', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:16:8:16:35 | "\\~\\!\\@ ... \\)\\_\\+" | regular expression |
| tst-escapes.js:16:29:16:30 | '\\)' is equivalent to just ')', so the sequence may still represent a meta-character | The escape sequence '\\)' is equivalent to just ')', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:16:8:16:35 | "\\~\\!\\@ ... \\)\\_\\+" | regular expression |
| tst-escapes.js:16:33:16:34 | '\\+' is equivalent to just '+', so the sequence may still represent a meta-character | The escape sequence '\\+' is equivalent to just '+', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:16:8:16:35 | "\\~\\!\\@ ... \\)\\_\\+" | regular expression |
| tst-escapes.js:17:9:17:10 | '\\[' is equivalent to just '[', so the sequence may still represent a meta-character | The escape sequence '\\[' is equivalent to just '[', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:17:8:17:22 | "\\[\\]\\'\\\\,\\.\\/" | regular expression |
| tst-escapes.js:17:11:17:12 | '\\]' is equivalent to just ']', so the sequence may still represent a meta-character | The escape sequence '\\]' is equivalent to just ']', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:17:8:17:22 | "\\[\\]\\'\\\\,\\.\\/" | regular expression |
| tst-escapes.js:17:18:17:19 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:17:8:17:22 | "\\[\\]\\'\\\\,\\.\\/" | regular expression |
| tst-escapes.js:18:9:18:10 | '\\{' is equivalent to just '{', so the sequence may still represent a meta-character | The escape sequence '\\{' is equivalent to just '{', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:18:8:18:25 | "\\{\\}\\\\\\"\\\|\\<\\>\\?" | regular expression |
| tst-escapes.js:18:11:18:12 | '\\}' is equivalent to just '}', so the sequence may still represent a meta-character | The escape sequence '\\}' is equivalent to just '}', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:18:8:18:25 | "\\{\\}\\\\\\"\\\|\\<\\>\\?" | regular expression |
| tst-escapes.js:18:17:18:18 | '\\\|' is equivalent to just '\|', so the sequence may still represent a meta-character | The escape sequence '\\\|' is equivalent to just '\|', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:18:8:18:25 | "\\{\\}\\\\\\"\\\|\\<\\>\\?" | regular expression |
| tst-escapes.js:18:23:18:24 | '\\?' is equivalent to just '?', so the sequence may still represent a meta-character | The escape sequence '\\?' is equivalent to just '?', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:18:8:18:25 | "\\{\\}\\\\\\"\\\|\\<\\>\\?" | regular expression |
| tst-escapes.js:41:9:41:10 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:41:8:41:11 | "\\." | regular expression |
| tst-escapes.js:56:10:56:11 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:56:8:56:13 | "[\\.]" | regular expression |
| tst-escapes.js:57:12:57:13 | '\\.' is equivalent to just '.', so the sequence may still represent a meta-character | The escape sequence '\\.' is equivalent to just '.', so the sequence may still represent a meta-character when it is used in a $@. | tst-escapes.js:57:8:57:17 | "a[b\\.c]d" | regular expression |
| tst-escapes.js:60:9:60:10 | '\\k' is equivalent to just 'k', so the sequence is not a backreference | The escape sequence '\\k' is equivalent to just 'k', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:60:8:60:19 | `\\k\\\\k\\d\\\\d` | regular expression |
| tst-escapes.js:60:14:60:15 | '\\d' is equivalent to just 'd', so the sequence is not a character class | The escape sequence '\\d' is equivalent to just 'd', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:60:8:60:19 | `\\k\\\\k\\d\\\\d` | regular expression |
| tst-escapes.js:61:9:61:10 | '\\k' is equivalent to just 'k', so the sequence is not a backreference | The escape sequence '\\k' is equivalent to just 'k', so the sequence is not a backreference when it is used in a $@. | tst-escapes.js:61:8:61:25 | `\\k\\\\k${foo}\\d\\\\d` | regular expression |
| tst-escapes.js:61:20:61:21 | '\\d' is equivalent to just 'd', so the sequence is not a character class | The escape sequence '\\d' is equivalent to just 'd', so the sequence is not a character class when it is used in a $@. | tst-escapes.js:61:8:61:25 | `\\k\\\\k${foo}\\d\\\\d` | regular expression |

View File

@@ -0,0 +1 @@
Security/CWE-020/UselessRegExpCharacterEscape.ql

View File

@@ -38,17 +38,19 @@
/^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK
/^(http:\/\/sub.example.com\/)/g; // NOT OK
/^https?:\/\/api.example.com/; // NOT OK
new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK
new RegExp('^http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK
new RegExp('^http://localhost:8000|' + '^https?://.+\\.example\\.com'); // NOT OK
new RegExp('^http[s]?:\/\/?sub1\\.sub2\\.example\\.com\/f\/(.+)'); // NOT OK
/^https:\/\/[a-z]*.example.com$/; // NOT OK
RegExp('^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK
/^(example.dev|example.com)/; // OK, but still flagged
new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK
new RegExp('^http://localhost:8000|' + '^https?://.+.example\\.com'); // NOT OK
var primary = 'example.com$';
new RegExp('test.' + primary); // NOT OK, but not detected
new RegExp('test.' + 'example.com$'); // NOT OK
new RegExp('^http://test\.example.com'); // NOT OK, but flagged by js/useless-regexp-character-escape
});

View File

@@ -0,0 +1,61 @@
// (the lines of this file are not annotated with alert expectations)
// no backslashes
RegExp("abcdefghijklmnopqrstuvxyz");
RegExp("ABCDEFGHIJKLMNOPQRSTUVXYZ");
RegExp("`1234567890-=");
RegExp("~!@#$%^&*()_+");
RegExp("[]'\\,./");
RegExp("{}\"|<>?");
RegExp(" ");
// backslashes
RegExp("\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\\u\v\\x\y\z");
RegExp("\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\X\Y\Z");
RegExp("\`\1\2\3\4\5\6\7\8\9\0\-\=");
RegExp("\~\!\@\#\$\%\^\&\*\(\)\_\+");
RegExp("\[\]\'\\,\.\/");
RegExp("\{\}\\\"\|\<\>\?");
RegExp("\ ");
/\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\\x\y\z"/;
/\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\X\Y\Z/;
/\`\1\2\3\4\5\6\7\8\9\0\-\=/;
/\~\!\@\#\$\%\^\&\*\(\)\_\+/;
/\[\]\'\\,\.\//;
/\{\}\"\|\<\>\?/;
/\ /;
// many backslashes
RegExp("\a");
RegExp("\\a");
RegExp("\\\a");
RegExp("\\\\a");
RegExp("\\\\\a");
RegExp("\\\\\\a");
RegExp("\\\\\\\a");
RegExp("\\\\\\\\a");
RegExp("\\\\\\\\\a");
RegExp("\\\\\\\\\\a");
// string vs regexp
RegExp("\.")
"\.";
// other
/\/\\\/\\\\\//;
RegExp("\uaaaa\uAAAA\uFFFF\u1000");
RegExp("\xaaaa\xAAAA\xFFFF\x1000");
RegExp("'\'\\'");
RegExp("\"\\\"");
RegExp('"\"\\"'),
RegExp('\'\\\''),
RegExp("^\\\\Q\\\\E$");
RegExp("/\\*");
RegExp("/\
");
RegExp("[\.]");
RegExp("a[b\.c]d");
RegExp("\b");
RegExp(`\b`);
RegExp(`\k\\k\d\\d`)
RegExp(`\k\\k${foo}\d\\d`)