Support more regexp anchors

This commit is contained in:
Tom Payne
2020-12-23 13:52:12 +01:00
parent 5647a47bd4
commit 9bbdf86487
3 changed files with 8 additions and 1 deletions

View File

@@ -53,7 +53,7 @@ predicate isInterestingUnanchoredRegexpString(string re, string msg) {
// a substring sequence of a protocol and subdomains, perhaps with some regex characters mixed in, followed by a known TLD
re.regexpMatch("(?i)[():|?a-z0-9-\\\\./]+[.]" + commonTLD() + "([/#?():]\\S*)?") and
// without any anchors
re.regexpMatch("[^$^]+") and
not re.regexpMatch(".*(\\$|\\^|\\\\A|\\\\z).*") and
msg =
"When this is used as a regular expression on a URL, it may match anywhere, and arbitrary " +
"hosts may come before or after it."

View File

@@ -8,3 +8,4 @@
| main.go:31:15:31:22 | `(a)\|b$` | Misleading operator precedence. The subexpression 'b$' is anchored, but the other parts of this regular expression are not. |
| main.go:33:15:33:24 | `(a)\|(b)$` | Misleading operator precedence. The subexpression '(b)$' is anchored, but the other parts of this regular expression are not. |
| main.go:35:15:35:33 | `https?://good.com` | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| main.go:38:15:38:33 | `www\\.example\\.com` | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |

View File

@@ -34,4 +34,10 @@ func main() {
regexp.Match(`https?://good.com`, []byte("http://evil.com/?http://good.com")) // NOT OK
regexp.Match(`^https?://good.com`, []byte("http://evil.com/?http://good.com")) // OK
regexp.Match(`www\.example\.com`, []byte("")) // NOT OK
regexp.Match(`^www\.example\.com`, []byte("")) // OK
regexp.Match(`\Awww\.example\.com`, []byte("")) // OK
regexp.Match(`www\.example\.com$`, []byte("")) // OK
regexp.Match(`www\.example\.com\z`, []byte("")) // OK
}