Merge pull request #416 from smowton/smowton/admin/cherrypick-suspicious-char-fix

Cherry-pick #395 (suspicious-char-in-regex FP fix) onto rc/1.26
2026-03-04 06:36:46 +01:00 · 2020-12-01 11:45:14 +00:00
parent ed6804859a b2ae6550ec
commit 63079b3e9d
9 changed files with 76 additions and 18 deletions
--- a/change-notes/2020-11-09-suspicious-character-in-regexp-improvement.md
+++ b/change-notes/2020-11-09-suspicious-character-in-regexp-improvement.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* The query "Suspicious characters in a regular expression" has been improved to recognize raw string literals, which should lead to fewer false positives.
--- a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.go
+++ b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.go
@@ -9,5 +9,6 @@ func broken(hostNames []byte) string {
 	} else {
 		// This will be reached even if hostNames is exactly "forbidden.host.org",
 		// because the literal backspace is not matched
+		return ""
 	}
 }
--- a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.ql
+++ b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexp.ql
@@ -20,13 +20,10 @@ import DataFlow::PathGraph
 */
 predicate containsEscapedCharacter(DataFlow::Node source, string character) {
  character in ["a", "b"] and
-  exists(
+  exists(StringLit s | s = source.asExpr() |
    // Search for `character` preceded by an odd number of backslashes:
-    source
-        .asExpr()
-        .(BasicLit)
-        .getText()
-        .regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10})" + character, _, _)
+    exists(s.getText().regexpFind("(?<=(^|[^\\\\])\\\\(\\\\{2}){0,10})" + character, _, _)) and
+    not s.isRaw()
  )
 }

--- a/ql/src/Security/CWE-020/SuspiciousCharacterInRegexpGood.go
+++ b/ql/src/Security/CWE-020/SuspiciousCharacterInRegexpGood.go
@@ -3,11 +3,12 @@ package main
 import "regexp"

 func fixed(hostNames []byte) string {
-	var hostRe = regexp.MustCompile("\\bforbidden.host.org")
+	var hostRe = regexp.MustCompile(`\bforbidden.host.org`)
 	if hostRe.Match(hostNames) {
 		return "Must not target forbidden.host.org"
 	} else {
 		// hostNames definitely doesn't contain a word "forbidden.host.org", as "\\b"
 		// is the start-of-word anchor, not a literal backspace.
+		return ""
 	}
 }
--- a/ql/src/semmle/go/Expr.qll
+++ b/ql/src/semmle/go/Expr.qll
@@ -343,6 +343,8 @@ class RuneLit = CharLit;
 */
 class StringLit extends @stringlit, BasicLit {
  override string getAPrimaryQlClass() { result = "StringLit" }
+
+  predicate isRaw() { this.getText().matches("`%`") }
 }

 /**
--- a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.expected
+++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.expected
@@ -1,11 +1,25 @@
 edges
 nodes
-| test.go:8:21:8:34 | "hello\\aworld" | semmle.label | "hello\\aworld" |
-| test.go:9:21:9:36 | "hello\\\\\\aworld" | semmle.label | "hello\\\\\\aworld" |
-| test.go:10:21:10:34 | "hello\\bworld" | semmle.label | "hello\\bworld" |
-| test.go:11:21:11:36 | "hello\\\\\\bworld" | semmle.label | "hello\\\\\\bworld" |
+| SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | semmle.label | "\\bforbidden.host.org" |
+| test.go:7:21:7:24 | "\\a" | semmle.label | "\\a" |
+| test.go:9:21:9:26 | "\\\\\\a" | semmle.label | "\\\\\\a" |
+| test.go:10:21:10:27 | "x\\\\\\a" | semmle.label | "x\\\\\\a" |
+| test.go:12:21:12:28 | "\\\\\\\\\\a" | semmle.label | "\\\\\\\\\\a" |
+| test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | semmle.label | "\\\\\\\\\\\\\\a" |
+| test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | semmle.label | "\\\\\\\\\\\\\\\\\\a" |
+| test.go:20:21:20:34 | "hello\\aworld" | semmle.label | "hello\\aworld" |
+| test.go:21:21:21:36 | "hello\\\\\\aworld" | semmle.label | "hello\\\\\\aworld" |
+| test.go:22:21:22:34 | "hello\\bworld" | semmle.label | "hello\\bworld" |
+| test.go:23:21:23:36 | "hello\\\\\\bworld" | semmle.label | "hello\\\\\\bworld" |
 #select
-| test.go:8:21:8:34 | "hello\\aworld" | test.go:8:21:8:34 | "hello\\aworld" | test.go:8:21:8:34 | "hello\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:8:21:8:34 | "hello\\aworld" | A regular expression | test.go:8:21:8:34 | "hello\\aworld" | here |
-| test.go:9:21:9:36 | "hello\\\\\\aworld" | test.go:9:21:9:36 | "hello\\\\\\aworld" | test.go:9:21:9:36 | "hello\\\\\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:9:21:9:36 | "hello\\\\\\aworld" | A regular expression | test.go:9:21:9:36 | "hello\\\\\\aworld" | here |
-| test.go:10:21:10:34 | "hello\\bworld" | test.go:10:21:10:34 | "hello\\bworld" | test.go:10:21:10:34 | "hello\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:10:21:10:34 | "hello\\bworld" | A regular expression | test.go:10:21:10:34 | "hello\\bworld" | here |
-| test.go:11:21:11:36 | "hello\\\\\\bworld" | test.go:11:21:11:36 | "hello\\\\\\bworld" | test.go:11:21:11:36 | "hello\\\\\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:11:21:11:36 | "hello\\\\\\bworld" | A regular expression | test.go:11:21:11:36 | "hello\\\\\\bworld" | here |
+| SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | A regular expression | SuspiciousCharacterInRegexp.go:6:34:6:55 | "\\bforbidden.host.org" | here |
+| test.go:7:21:7:24 | "\\a" | test.go:7:21:7:24 | "\\a" | test.go:7:21:7:24 | "\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:7:21:7:24 | "\\a" | A regular expression | test.go:7:21:7:24 | "\\a" | here |
+| test.go:9:21:9:26 | "\\\\\\a" | test.go:9:21:9:26 | "\\\\\\a" | test.go:9:21:9:26 | "\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:9:21:9:26 | "\\\\\\a" | A regular expression | test.go:9:21:9:26 | "\\\\\\a" | here |
+| test.go:10:21:10:27 | "x\\\\\\a" | test.go:10:21:10:27 | "x\\\\\\a" | test.go:10:21:10:27 | "x\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:10:21:10:27 | "x\\\\\\a" | A regular expression | test.go:10:21:10:27 | "x\\\\\\a" | here |
+| test.go:12:21:12:28 | "\\\\\\\\\\a" | test.go:12:21:12:28 | "\\\\\\\\\\a" | test.go:12:21:12:28 | "\\\\\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:12:21:12:28 | "\\\\\\\\\\a" | A regular expression | test.go:12:21:12:28 | "\\\\\\\\\\a" | here |
+| test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | A regular expression | test.go:14:21:14:30 | "\\\\\\\\\\\\\\a" | here |
+| test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | A regular expression | test.go:16:21:16:32 | "\\\\\\\\\\\\\\\\\\a" | here |
+| test.go:20:21:20:34 | "hello\\aworld" | test.go:20:21:20:34 | "hello\\aworld" | test.go:20:21:20:34 | "hello\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:20:21:20:34 | "hello\\aworld" | A regular expression | test.go:20:21:20:34 | "hello\\aworld" | here |
+| test.go:21:21:21:36 | "hello\\\\\\aworld" | test.go:21:21:21:36 | "hello\\\\\\aworld" | test.go:21:21:21:36 | "hello\\\\\\aworld" | $@ used $@ contains the bell character \\a; did you mean \\\\a, the Vim alphabetic character class (use [[:alpha:]] instead) or \\\\A, the beginning of text? | test.go:21:21:21:36 | "hello\\\\\\aworld" | A regular expression | test.go:21:21:21:36 | "hello\\\\\\aworld" | here |
+| test.go:22:21:22:34 | "hello\\bworld" | test.go:22:21:22:34 | "hello\\bworld" | test.go:22:21:22:34 | "hello\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:22:21:22:34 | "hello\\bworld" | A regular expression | test.go:22:21:22:34 | "hello\\bworld" | here |
+| test.go:23:21:23:36 | "hello\\\\\\bworld" | test.go:23:21:23:36 | "hello\\\\\\bworld" | test.go:23:21:23:36 | "hello\\\\\\bworld" | $@ used $@ contains a literal backspace \\b; did you mean \\\\b, a word boundary? | test.go:23:21:23:36 | "hello\\\\\\bworld" | A regular expression | test.go:23:21:23:36 | "hello\\\\\\bworld" | here |
--- a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.go
+++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexp.go
@@ -0,0 +1,14 @@
+package main
+
+import "regexp"
+
+func broken(hostNames []byte) string {
+	var hostRe = regexp.MustCompile("\bforbidden.host.org")
+	if hostRe.Match(hostNames) {
+		return "Must not target forbidden.host.org"
+	} else {
+		// This will be reached even if hostNames is exactly "forbidden.host.org",
+		// because the literal backspace is not matched
+		return ""
+	}
+}
--- a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexpGood.go
+++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/SuspiciousCharacterInRegexpGood.go
@@ -0,0 +1,14 @@
+package main
+
+import "regexp"
+
+func fixed(hostNames []byte) string {
+	var hostRe = regexp.MustCompile(`\bforbidden.host.org`)
+	if hostRe.Match(hostNames) {
+		return "Must not target forbidden.host.org"
+	} else {
+		// hostNames definitely doesn't contain a word "forbidden.host.org", as "\\b"
+		// is the start-of-word anchor, not a literal backspace.
+		return ""
+	}
+}
--- a/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/test.go
+++ b/ql/test/query-tests/Security/CWE-020/SuspiciousCharacterInRegexp/test.go
@@ -1,8 +1,20 @@
-package test
+package main

 import "regexp"

-func test() {
+func main() {
+	// many backslashes
+	regexp.MustCompile("\a") // BAD
+	regexp.MustCompile("\\a")
+	regexp.MustCompile("\\\a")  // BAD
+	regexp.MustCompile("x\\\a") // BAD
+	regexp.MustCompile("\\\\a")
+	regexp.MustCompile("\\\\\a") // BAD
+	regexp.MustCompile("\\\\\\a")
+	regexp.MustCompile("\\\\\\\a") // BAD
+	regexp.MustCompile("\\\\\\\\a")
+	regexp.MustCompile("\\\\\\\\\a") // BAD
+	regexp.MustCompile("\\\\\\\\\\a")

 	// BAD: probably a mistake:
 	regexp.MustCompile("hello\aworld")
@@ -20,5 +32,6 @@ func test() {
 	regexp.MustCompile("hello\010world")
 	regexp.MustCompile("hello\u0008world")
 	regexp.MustCompile("hello\U00000008world")
-
+	// GOOD: use of a raw string literal
+	regexp.MustCompile(`hello\b\sworld`)
 }