From 09d41952dc5d6baaa628cee3acc094b08d318a4a Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Sun, 8 Nov 2020 09:20:07 -0800 Subject: [PATCH] SuspiciousCharacterInRegexp: Add fix for raw string literals --- ...9-suspicious-character-in-regexp-improvement.md | 2 ++ .../CWE-020/SuspiciousCharacterInRegexp.go | 1 + .../CWE-020/SuspiciousCharacterInRegexp.ql | 9 +++------ .../CWE-020/SuspiciousCharacterInRegexpGood.go | 3 ++- .../SuspiciousCharacterInRegexp.expected | 2 ++ .../SuspiciousCharacterInRegexp.go | 14 ++++++++++++++ .../SuspiciousCharacterInRegexpGood.go | 14 ++++++++++++++ .../CWE-020/SuspiciousCharacterInRegexp/test.go | 7 ++++--- 8 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 change-notes/2020-11-09-suspicious-character-in-regexp-improvement.md create mode 100644 ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.go create mode 100644 ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexpGood.go diff --git a/change-notes/2020-11-09-suspicious-character-in-regexp-improvement.md b/change-notes/2020-11-09-suspicious-character-in-regexp-improvement.md new file mode 100644 index 00000000000..3e5c9fce574 --- /dev/null +++ b/change-notes/2020-11-09-suspicious-character-in-regexp-improvement.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* The query "Suspicious characters in a regular expression" has been improved to recognize raw string literals, which should lead to fewer false positives. diff --git a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.go b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.go index 0a4b8a90794..d9f2199fd52 100644 --- a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.go +++ b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.go @@ -9,5 +9,6 @@ func broken(hostNames []byte) string { } else { // This will be reached even if hostNames is exactly "forbidden.host.org", // because the literal backspace is not matched + return "" } } diff --git a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.ql b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.ql index d39d667c22a..72afa79b758 100644 --- a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.ql +++ b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.ql @@ -20,13 +20,10 @@ import DataFlow::PathGraph */ predicate containsEscapedCharacter(DataFlow::Node source, string character) { character in ["a", "b"] and - exists( + exists(StringLit s | s = source.asExpr() | // Search for `character` preceded by an odd number of backslashes: - source - .asExpr() - .(BasicLit) - .getText() - .regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10})" + character, _, _) + exists(s.getText().regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10})" + character, _, _)) and + not s.isRaw() ) } diff --git a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexpGood.go b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexpGood.go index a311e66af8d..9af991d1666 100644 --- a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexpGood.go +++ b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexpGood.go @@ -3,11 +3,12 @@ package main import "regexp" func fixed(hostNames []byte) string { - var hostRe = regexp.MustCompile("\\bforbidden.host.org") + var hostRe = regexp.MustCompile(`\bforbidden.host.org`) if hostRe.Match(hostNames) { return "Must not target forbidden.host.org" } else { // hostNames definitely doesn't contain a word "forbidden.host.org", as "\\b" // is the start-of-word anchor, not a literal backspace. + return "" } } diff --git a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.expected b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.expected index 81f5d768b0a..0d6d90255ee 100644 --- a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.expected +++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.expected @@ -1,10 +1,12 @@ edges nodes +| SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | semmle.label | "\\bforbidden.host.org" | | test.go:8:21:8:34 | "hello\\aworld" | semmle.label | "hello\\aworld" | | test.go:9:21:9:36 | "hello\\\\\\aworld" | semmle.label | "hello\\\\\\aworld" | | test.go:10:21:10:34 | "hello\\bworld" | semmle.label | "hello\\bworld" | | test.go:11:21:11:36 | "hello\\\\\\bworld" | semmle.label | "hello\\\\\\bworld" | #select +| SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | A regular expression | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | here | | test.go:8:21:8:34 | "hello\\aworld" | test.go:8:21:8:34 | "hello\\aworld" | test.go:8:21:8:34 | "hello\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:8:21:8:34 | "hello\\aworld" | A regular expression | test.go:8:21:8:34 | "hello\\aworld" | here | | test.go:9:21:9:36 | "hello\\\\\\aworld" | test.go:9:21:9:36 | "hello\\\\\\aworld" | test.go:9:21:9:36 | "hello\\\\\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:9:21:9:36 | "hello\\\\\\aworld" | A regular expression | test.go:9:21:9:36 | "hello\\\\\\aworld" | here | | test.go:10:21:10:34 | "hello\\bworld" | test.go:10:21:10:34 | "hello\\bworld" | test.go:10:21:10:34 | "hello\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:10:21:10:34 | "hello\\bworld" | A regular expression | test.go:10:21:10:34 | "hello\\bworld" | here | diff --git a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.go b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.go new file mode 100644 index 00000000000..d9f2199fd52 --- /dev/null +++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.go @@ -0,0 +1,14 @@ +package main + +import "regexp" + +func broken(hostNames []byte) string { + var hostRe = regexp.MustCompile("\bforbidden.host.org") + if hostRe.Match(hostNames) { + return "Must not target forbidden.host.org" + } else { + // This will be reached even if hostNames is exactly "forbidden.host.org", + // because the literal backspace is not matched + return "" + } +} diff --git a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexpGood.go b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexpGood.go new file mode 100644 index 00000000000..9af991d1666 --- /dev/null +++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexpGood.go @@ -0,0 +1,14 @@ +package main + +import "regexp" + +func fixed(hostNames []byte) string { + var hostRe = regexp.MustCompile(`\bforbidden.host.org`) + if hostRe.Match(hostNames) { + return "Must not target forbidden.host.org" + } else { + // hostNames definitely doesn't contain a word "forbidden.host.org", as "\\b" + // is the start-of-word anchor, not a literal backspace. + return "" + } +} diff --git a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/test.go b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/test.go index 23eec481396..5e8d8c1576a 100644 --- a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/test.go +++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/test.go @@ -1,8 +1,8 @@ -package test +package main import "regexp" -func test() { +func main() { // BAD: probably a mistake: regexp.MustCompile("hello\aworld") @@ -20,5 +20,6 @@ func test() { regexp.MustCompile("hello\010world") regexp.MustCompile("hello\u0008world") regexp.MustCompile("hello\U00000008world") - + // GOOD: use of a raw string literal + regexp.MustCompile(`hello\b\sworld`) }