From 52ca696ff440b4904ce756844d8c0d493e7dad4e Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Wed, 5 Dec 2018 14:22:34 +0100 Subject: [PATCH 01/11] JS: add query js/incomplete-url-regexp --- javascript/config/suites/javascript/security | 1 + .../CWE-020/IncompleteUrlRegExp.qhelp | 69 ++++++++++++++++++ .../Security/CWE-020/IncompleteUrlRegExp.ql | 70 +++++++++++++++++++ .../CWE-020/examples/IncompleteUrlRegExp.js | 9 +++ .../CWE-020/IncompleteUrlRegExp.expected | 24 +++++++ .../CWE-020/IncompleteUrlRegExp.qlref | 1 + .../CWE-020/tst-IncompleteUrlRegExp.js | 47 +++++++++++++ 7 files changed, 221 insertions(+) create mode 100644 javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp create mode 100644 javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql create mode 100644 javascript/ql/src/Security/CWE-020/examples/IncompleteUrlRegExp.js create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.qlref create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteUrlRegExp.js diff --git a/javascript/config/suites/javascript/security b/javascript/config/suites/javascript/security index 2cf8792ddae..a0c4fef0514 100644 --- a/javascript/config/suites/javascript/security +++ b/javascript/config/suites/javascript/security @@ -1,5 +1,6 @@ + semmlecode-javascript-queries/DOM/TargetBlank.ql: /Security/CWE/CWE-200 + semmlecode-javascript-queries/Electron/EnablingNodeIntegration.ql: /Security/CWE/CWE-094 ++ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlRegExp.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022 diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp new file mode 100644 index 00000000000..9324e8a0bdf --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp @@ -0,0 +1,69 @@ + + + + +

+ + Sanitizing untrusted URLs is an important technique for + preventing attacks such as request forgeries and malicious + redirections. Usually, this is done by checking that the host of a URL + is in a set of allowed hosts. + +

+ +

+ + If a regular expression implements such a check, it is + easy to accidentally make the check too permissive by not escaping the + . meta-characters appropriately. + + Even if the check is not used in a security-critical + context, the incomplete check may still cause undesirable behaviors + when the check succeeds accidentally. + +

+
+ + +

+ + Escape all meta-characters appropriately when constructing + regular expressions for security checks, pay special attention to the + . meta-character. + +

+
+ + + +

+ + The following example code checks that a URL redirection + will reach the example.com domain, or one of its + subdomains. + +

+ + + +

+ + The check is however easy to bypass because the unescaped + . allows for any character before + example.com, effectively allowing the redirect to go to + an attacker-controlled domain such as wwwXexample.com. + + Address this vulnerability by escaping . + appropriately: let regex =/(www|beta|)\.example\.com/. + +

+ +
+ + +
  • OWASP: SSRF
  • +
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
    +
    diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql new file mode 100644 index 00000000000..06cb41df0d5 --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql @@ -0,0 +1,70 @@ +/** + * @name Incomplete URL regular expression + * @description Security checks on URLs using regular expressions are sometimes vulnerable to bypassing. + * @kind problem + * @problem.severity error + * @precision high + * @id js/incomplete-url-regexp + * @tags correctness + * security + * external/cwe/cwe-20 + */ + +import javascript +import semmle.javascript.security.dataflow.RegExpInjection + +module IncompleteUrlRegExpTracking { + + /** + * A taint tracking configuration for incomplete URL regular expressions sources. + */ + class Configuration extends TaintTracking::Configuration { + Configuration() { this = "IncompleteUrlRegExpTracking" } + + override + predicate isSource(DataFlow::Node source) { + isIncompleteHostNameRegExpPattern(source.asExpr().(ConstantString).getStringValue(), _) + } + + override + predicate isSink(DataFlow::Node sink) { + sink instanceof RegExpInjection::Sink + } + + } + +} + +/** + * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`, + * and `pattern` contains a subtle mistake that allows it to match unexpected hosts. + */ +bindingset[pattern] +predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { + hostPart = pattern.regexpCapture( + "(?i).*" + + // Either: + // - an unescaped and repeated `.`, followed by anything + // - a unescaped single `.` + "(?:(? convert(d)); + + /(.+\.(?:example-a|example-b)\.com)/; // NOT OK + /^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK + /^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK + /\(http:\/\/sub.example.com\/\)/g; // NOT OK + /https?:\/\/api.example.com/; // NOT OK + new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK + new RegExp('http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK + /^https:\/\/[a-z]*.example.com$/; // NOT OK + RegExp('protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK + + /example.dev|example.com/; // OK, but still flagged +}); From c65c7e700efb12892fc6ad44f7724eeb617e7296 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 6 Dec 2018 14:07:32 +0100 Subject: [PATCH 02/11] JS: change notes for js/incomplete-url-regexp --- change-notes/1.20/analysis-javascript.md | 1 + 1 file changed, 1 insertion(+) diff --git a/change-notes/1.20/analysis-javascript.md b/change-notes/1.20/analysis-javascript.md index 14a4104c7dd..d1c83a5e9ac 100644 --- a/change-notes/1.20/analysis-javascript.md +++ b/change-notes/1.20/analysis-javascript.md @@ -11,6 +11,7 @@ | **Query** | **Tags** | **Purpose** | |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Double escaping or unescaping (`js/double-escaping`) | correctness, security, external/cwe/cwe-116 | Highlights potential double escaping or unescaping of special characters, indicating a possible violation of [CWE-116](https://cwe.mitre.org/data/definitions/116.html). Results are shown on LGTM by default. | +| Incomplete URL regular expression | correctness, security, external/cwe/cwe-020 | Highlights URL sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default.| | Incomplete URL substring sanitization | correctness, security, external/cwe/cwe-020 | Highlights URL sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default. | | Incorrect suffix check (`js/incorrect-suffix-check`) | correctness, security, external/cwe/cwe-020 | Highlights error-prone suffix checks based on `indexOf`, indicating a potential violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default. | | Useless comparison test (`js/useless-comparison-test`) | correctness | Highlights code that is unreachable due to a numeric comparison that is always true or always false. Results are shown on LGTM by default. | From d4e4bc6a0b5a5dafad62c2e13da6af2d07749314 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 6 Dec 2018 21:51:31 +0100 Subject: [PATCH 03/11] JS: sharpen js/incomplete-url-regexp by not matching `.*` or `.+` --- javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql | 8 +++----- .../Security/CWE-020/IncompleteUrlRegExp.expected | 3 --- .../Security/CWE-020/tst-IncompleteUrlRegExp.js | 6 +++--- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql index 06cb41df0d5..51449509795 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql @@ -43,11 +43,9 @@ bindingset[pattern] predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { hostPart = pattern.regexpCapture( "(?i).*" + - // Either: - // - an unescaped and repeated `.`, followed by anything - // - a unescaped single `.` - "(?:(? convert(d)); - /(.+\.(?:example-a|example-b)\.com)/; // NOT OK + /(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision /^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK /^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK /\(http:\/\/sub.example.com\/\)/g; // NOT OK From 994fe1bea58f6e9cd28b0802dd69d0f82c02a19b Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 6 Dec 2018 22:10:59 +0100 Subject: [PATCH 04/11] JS: address non-semantic review comments --- .../CWE-020/IncompleteUrlRegExp.qhelp | 20 ++++----- .../Security/CWE-020/IncompleteUrlRegExp.ql | 10 ++--- .../CWE-020/IncompleteUrlRegExp.expected | 42 +++++++++---------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp index 9324e8a0bdf..20e9e67588e 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp +++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp @@ -11,17 +11,17 @@ redirections. Usually, this is done by checking that the host of a URL is in a set of allowed hosts. -

    +

    -

    +

    - If a regular expression implements such a check, it is - easy to accidentally make the check too permissive by not escaping the - . meta-characters appropriately. + If a regular expression implements such a check, it is + easy to accidentally make the check too permissive by not escaping the + . meta-characters appropriately. - Even if the check is not used in a security-critical - context, the incomplete check may still cause undesirable behaviors - when the check succeeds accidentally. + Even if the check is not used in a security-critical + context, the incomplete check may still cause undesirable behaviors + when the check succeeds accidentally.

    @@ -63,7 +63,7 @@ -
  • OWASP: SSRF
  • -
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
  • OWASP: SSRF
  • +
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql index 51449509795..204bc7ce354 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql @@ -1,6 +1,6 @@ /** * @name Incomplete URL regular expression - * @description Security checks on URLs using regular expressions are sometimes vulnerable to bypassing. + * @description Using a regular expression that contains an 'any character' may match more URLs than expected. * @kind problem * @problem.severity error * @precision high @@ -23,7 +23,7 @@ module IncompleteUrlRegExpTracking { override predicate isSource(DataFlow::Node source) { - isIncompleteHostNameRegExpPattern(source.asExpr().(ConstantString).getStringValue(), _) + isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _) } override @@ -50,7 +50,7 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { ".*", 1) } -from Expr e, string pattern, string intendedHost +from Expr e, string pattern, string hostPart where ( e.(RegExpLiteral).getValue() = pattern or @@ -59,10 +59,10 @@ where e.mayHaveStringValue(pattern) ) ) and - isIncompleteHostNameRegExpPattern(pattern, intendedHost) + isIncompleteHostNameRegExpPattern(pattern, hostPart) and // ignore patterns with capture groups after the TLD not pattern.regexpMatch("(?i).*[.](com|org|edu|gov|uk|net).*[(][?]:.*[)].*") -select e, "This regular expression has an unescaped '.', which means that '" + intendedHost + "' might not match the intended host of a matched URL." +select e, "This regular expression has an unescaped '.' before '" + hostPart + "', so it might match more hosts than expected." diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected index b2a5b8f7dea..685baad4165 100644 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected @@ -1,21 +1,21 @@ -| tst-IncompleteUrlRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.', which means that 'example.net' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.', which means that '(example-a\|example-b).com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.', which means that ')?example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.', which means that 'example-b.com' might not match the intended host of a matched URL. | -| tst-IncompleteUrlRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.', which means that 'dev\|example.com' might not match the intended host of a matched URL. | +| tst-IncompleteUrlRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | +| tst-IncompleteUrlRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | From 7c6e28d9174aadd78fd8b02a6aa17ed7d5d61b6d Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Thu, 6 Dec 2018 22:35:57 +0100 Subject: [PATCH 05/11] JS: introduce near-empty RegularExpressions.qll --- .../Security/CWE-020/IncompleteUrlRegExp.ql | 3 +- .../ql/src/semmle/javascript/Regexp.qll | 29 +++++++++++++++-- .../security/dataflow/RegExpInjection.qll | 31 +++---------------- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql index 204bc7ce354..262df1e199c 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql @@ -11,7 +11,6 @@ */ import javascript -import semmle.javascript.security.dataflow.RegExpInjection module IncompleteUrlRegExpTracking { @@ -28,7 +27,7 @@ module IncompleteUrlRegExpTracking { override predicate isSink(DataFlow::Node sink) { - sink instanceof RegExpInjection::Sink + isInterpretedAsRegExp(sink) } } diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index a4b473ce4fd..b8a5ab6eeea 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -1,11 +1,12 @@ /** - * Provides classes for working with regular expression literals. + * Provides classes for working with regular expressions. * - * Regular expressions are represented as an abstract syntax tree of regular expression + * Regular expression literals are represented as an abstract syntax tree of regular expression * terms. */ import javascript +private import semmle.javascript.dataflow.InferredTypes /** * An element containing a regular expression term, that is, either @@ -484,3 +485,27 @@ class RegExpParseError extends Error, @regexp_parse_error { result = getMessage() } } + +/** + * Holds if `source` may be interpreted as a regular expression. + */ +predicate isInterpretedAsRegExp(DataFlow::Node source) { + // The first argument to an invocation of `RegExp` (with or without `new`). + source = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0) + or + // The argument of a call that coerces the argument to a regular expression. + exists(MethodCallExpr mce, string methodName | + mce.getReceiver().analyze().getAType() = TTString() and + mce.getMethodName() = methodName + | + (methodName = "match" and source.asExpr() = mce.getArgument(0) and mce.getNumArgument() = 1) + or + ( + methodName = "search" and + source.asExpr() = mce.getArgument(0) and + mce.getNumArgument() = 1 and + // `String.prototype.search` returns a number, so exclude chained accesses + not exists(PropAccess p | p.getBase() = mce) + ) + ) +} diff --git a/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll b/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll index ce940810a7e..e771f2a14ef 100644 --- a/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll +++ b/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll @@ -4,7 +4,6 @@ */ import javascript -private import semmle.javascript.dataflow.InferredTypes module RegExpInjection { /** @@ -51,36 +50,14 @@ module RegExpInjection { } /** - * The first argument to an invocation of `RegExp` (with or without `new`). + * The source string of a regular expression. */ - class RegExpObjectCreationSink extends Sink, DataFlow::ValueNode { - RegExpObjectCreationSink() { - this = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0) + class RegularExpressionSourceAsSink extends Sink { + RegularExpressionSourceAsSink() { + isInterpretedAsRegExp(this) } } - /** - * The argument of a call that coerces the argument to a regular expression. - */ - class RegExpObjectCoercionSink extends Sink { - - RegExpObjectCoercionSink() { - exists (MethodCallExpr mce, string methodName | - mce.getReceiver().analyze().getAType() = TTString() and - mce.getMethodName() = methodName | - (methodName = "match" and this.asExpr() = mce.getArgument(0) and mce.getNumArgument() = 1) or - ( - methodName = "search" and - this.asExpr() = mce.getArgument(0) and - mce.getNumArgument() = 1 and - // `String.prototype.search` returns a number, so exclude chained accesses - not exists(PropAccess p | p.getBase() = mce) - ) - ) - } - - } - /** * A call to a function whose name suggests that it escapes regular * expression meta-characters. From ab519d4abf6792a6edebf2856f6c72d796d1f2ff Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Sun, 9 Dec 2018 22:08:10 +0100 Subject: [PATCH 06/11] JS: rename query "Incomplete URL regular expression" -> "Incomplete regular expression for hostnames". --- javascript/config/suites/javascript/security | 2 +- ...p.qhelp => IncompleteHostnameRegExp.qhelp} | 2 +- ...lRegExp.ql => IncompleteHostnameRegExp.ql} | 16 +++++++------- ...lRegExp.js => IncompleteHostnameRegExp.js} | 0 .../CWE-020/IncompleteHostnameRegExp.expected | 21 +++++++++++++++++++ .../CWE-020/IncompleteHostnameRegExp.qlref | 1 + .../CWE-020/IncompleteUrlRegExp.expected | 21 ------------------- .../CWE-020/IncompleteUrlRegExp.qlref | 1 - ...Exp.js => tst-IncompleteHostnameRegExp.js} | 0 9 files changed, 32 insertions(+), 32 deletions(-) rename javascript/ql/src/Security/CWE-020/{IncompleteUrlRegExp.qhelp => IncompleteHostnameRegExp.qhelp} (97%) rename javascript/ql/src/Security/CWE-020/{IncompleteUrlRegExp.ql => IncompleteHostnameRegExp.ql} (74%) rename javascript/ql/src/Security/CWE-020/examples/{IncompleteUrlRegExp.js => IncompleteHostnameRegExp.js} (100%) create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected create mode 100644 javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref delete mode 100644 javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected delete mode 100644 javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.qlref rename javascript/ql/test/query-tests/Security/CWE-020/{tst-IncompleteUrlRegExp.js => tst-IncompleteHostnameRegExp.js} (100%) diff --git a/javascript/config/suites/javascript/security b/javascript/config/suites/javascript/security index a0c4fef0514..7c5f4a8fec5 100644 --- a/javascript/config/suites/javascript/security +++ b/javascript/config/suites/javascript/security @@ -1,6 +1,6 @@ + semmlecode-javascript-queries/DOM/TargetBlank.ql: /Security/CWE/CWE-200 + semmlecode-javascript-queries/Electron/EnablingNodeIntegration.ql: /Security/CWE/CWE-094 -+ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlRegExp.ql: /Security/CWE/CWE-020 ++ semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022 diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp similarity index 97% rename from javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp rename to javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp index 20e9e67588e..d6b4f279728 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.qhelp +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp @@ -46,7 +46,7 @@

    - +

    diff --git a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql similarity index 74% rename from javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql rename to javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql index 262df1e199c..49344ee8fec 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteUrlRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -1,10 +1,10 @@ /** - * @name Incomplete URL regular expression - * @description Using a regular expression that contains an 'any character' may match more URLs than expected. + * @name Incomplete regular expression for hostnames + * @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more than expected. * @kind problem - * @problem.severity error + * @problem.severity warning * @precision high - * @id js/incomplete-url-regexp + * @id js/incomplete-hostname-regexp * @tags correctness * security * external/cwe/cwe-20 @@ -12,13 +12,13 @@ import javascript -module IncompleteUrlRegExpTracking { +module IncompleteHostnameRegExpTracking { /** - * A taint tracking configuration for incomplete URL regular expressions sources. + * A taint tracking configuration for incomplete hostname regular expressions sources. */ class Configuration extends TaintTracking::Configuration { - Configuration() { this = "IncompleteUrlRegExpTracking" } + Configuration() { this = "IncompleteHostnameRegExpTracking" } override predicate isSource(DataFlow::Node source) { @@ -53,7 +53,7 @@ from Expr e, string pattern, string hostPart where ( e.(RegExpLiteral).getValue() = pattern or - exists (IncompleteUrlRegExpTracking::Configuration cfg | + exists (IncompleteHostnameRegExpTracking::Configuration cfg | cfg.hasFlow(e.flow(), _) and e.mayHaveStringValue(pattern) ) diff --git a/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlRegExp.js b/javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js similarity index 100% rename from javascript/ql/src/Security/CWE-020/examples/IncompleteUrlRegExp.js rename to javascript/ql/src/Security/CWE-020/examples/IncompleteHostnameRegExp.js diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected new file mode 100644 index 00000000000..cbb655c77ba --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -0,0 +1,21 @@ +| tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref new file mode 100644 index 00000000000..e818d947252 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref @@ -0,0 +1 @@ +Security/CWE-020/IncompleteHostnameRegExp.ql \ No newline at end of file diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected deleted file mode 100644 index 685baad4165..00000000000 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.expected +++ /dev/null @@ -1,21 +0,0 @@ -| tst-IncompleteUrlRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | -| tst-IncompleteUrlRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.qlref b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.qlref deleted file mode 100644 index 8a9d0df5efb..00000000000 --- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlRegExp.qlref +++ /dev/null @@ -1 +0,0 @@ -Security/CWE-020/IncompleteUrlRegExp.ql \ No newline at end of file diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteUrlRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js similarity index 100% rename from javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteUrlRegExp.js rename to javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js From 09e7124bb1a57cbe3058931fe69bad7a06c5d938 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Sun, 9 Dec 2018 22:20:37 +0100 Subject: [PATCH 07/11] JS: update change notes for renamed query --- change-notes/1.20/analysis-javascript.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/change-notes/1.20/analysis-javascript.md b/change-notes/1.20/analysis-javascript.md index d1c83a5e9ac..17930ffc192 100644 --- a/change-notes/1.20/analysis-javascript.md +++ b/change-notes/1.20/analysis-javascript.md @@ -11,7 +11,7 @@ | **Query** | **Tags** | **Purpose** | |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Double escaping or unescaping (`js/double-escaping`) | correctness, security, external/cwe/cwe-116 | Highlights potential double escaping or unescaping of special characters, indicating a possible violation of [CWE-116](https://cwe.mitre.org/data/definitions/116.html). Results are shown on LGTM by default. | -| Incomplete URL regular expression | correctness, security, external/cwe/cwe-020 | Highlights URL sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default.| +| Incomplete regular expression for hostnames (`js/incomplete-hostname-regexp`) | correctness, security, external/cwe/cwe-020 | Highlights hostname sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default.| | Incomplete URL substring sanitization | correctness, security, external/cwe/cwe-020 | Highlights URL sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default. | | Incorrect suffix check (`js/incorrect-suffix-check`) | correctness, security, external/cwe/cwe-020 | Highlights error-prone suffix checks based on `indexOf`, indicating a potential violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default. | | Useless comparison test (`js/useless-comparison-test`) | correctness | Highlights code that is unreachable due to a numeric comparison that is always true or always false. Results are shown on LGTM by default. | From 1bc73ab59244b7e560456b73ab3f94d42862f96d Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Tue, 11 Dec 2018 12:58:21 +0100 Subject: [PATCH 08/11] JS: address review comments --- .../CWE-020/IncompleteHostnameRegExp.qhelp | 4 +-- .../CWE-020/IncompleteHostnameRegExp.ql | 35 +++++++++---------- .../IncompleteUrlSubstringSanitization.ql | 2 +- .../ql/src/semmle/javascript/Regexp.qll | 16 ++++++++- 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp index d6b4f279728..0b2b7369bc1 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp @@ -8,7 +8,7 @@ Sanitizing untrusted URLs is an important technique for preventing attacks such as request forgeries and malicious - redirections. Usually, this is done by checking that the host of a URL + redirections. Often, this is done by checking that the host of a URL is in a set of allowed hosts.

    @@ -56,7 +56,7 @@ an attacker-controlled domain such as wwwXexample.com. Address this vulnerability by escaping . - appropriately: let regex =/(www|beta|)\.example\.com/. + appropriately: let regex = /(www|beta|)\.example\.com/.

    diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql index 49344ee8fec..abe6d76d6f0 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -12,28 +12,25 @@ import javascript -module IncompleteHostnameRegExpTracking { +/** + * A taint tracking configuration for incomplete hostname regular expressions sources. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "IncompleteHostnameRegExpTracking" } - /** - * A taint tracking configuration for incomplete hostname regular expressions sources. - */ - class Configuration extends TaintTracking::Configuration { - Configuration() { this = "IncompleteHostnameRegExpTracking" } - - override - predicate isSource(DataFlow::Node source) { - isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _) - } - - override - predicate isSink(DataFlow::Node sink) { - isInterpretedAsRegExp(sink) - } + override + predicate isSource(DataFlow::Node source) { + isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _) + } + override + predicate isSink(DataFlow::Node sink) { + isInterpretedAsRegExp(sink) } } + /** * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`, * and `pattern` contains a subtle mistake that allows it to match unexpected hosts. @@ -45,7 +42,7 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { // an unescaped single `.` "(?> + result = "com|org|edu|gov|uk|net|io" + } +} \ No newline at end of file From bb3e3a541d69a6f01abb1cea906a67270abb05ce Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen Date: Fri, 14 Dec 2018 10:24:30 +0100 Subject: [PATCH 09/11] JS: address doc review comments --- change-notes/1.20/analysis-javascript.md | 2 +- .../ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp | 5 ++++- .../ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/change-notes/1.20/analysis-javascript.md b/change-notes/1.20/analysis-javascript.md index 17930ffc192..d268c3a39d0 100644 --- a/change-notes/1.20/analysis-javascript.md +++ b/change-notes/1.20/analysis-javascript.md @@ -11,7 +11,7 @@ | **Query** | **Tags** | **Purpose** | |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Double escaping or unescaping (`js/double-escaping`) | correctness, security, external/cwe/cwe-116 | Highlights potential double escaping or unescaping of special characters, indicating a possible violation of [CWE-116](https://cwe.mitre.org/data/definitions/116.html). Results are shown on LGTM by default. | -| Incomplete regular expression for hostnames (`js/incomplete-hostname-regexp`) | correctness, security, external/cwe/cwe-020 | Highlights hostname sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default.| +| Incomplete regular expression for hostnames (`js/incomplete-hostname-regexp`) | correctness, security, external/cwe/cwe-020 | Highlights hostname sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default.| | Incomplete URL substring sanitization | correctness, security, external/cwe/cwe-020 | Highlights URL sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default. | | Incorrect suffix check (`js/incorrect-suffix-check`) | correctness, security, external/cwe/cwe-020 | Highlights error-prone suffix checks based on `indexOf`, indicating a potential violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default. | | Useless comparison test (`js/useless-comparison-test`) | correctness | Highlights code that is unreachable due to a numeric comparison that is always true or always false. Results are shown on LGTM by default. | diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp index 0b2b7369bc1..08756cf9cd5 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp @@ -21,7 +21,7 @@ Even if the check is not used in a security-critical context, the incomplete check may still cause undesirable behaviors - when the check succeeds accidentally. + when it accidentally succeeds.

    @@ -55,6 +55,9 @@ example.com, effectively allowing the redirect to go to an attacker-controlled domain such as wwwXexample.com. +

    +

    + Address this vulnerability by escaping . appropriately: let regex = /(www|beta|)\.example\.com/. diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql index abe6d76d6f0..01335fbbe0a 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -1,6 +1,6 @@ /** * @name Incomplete regular expression for hostnames - * @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more than expected. + * @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected. * @kind problem * @problem.severity warning * @precision high From 23a2bf17566a9adb9f338547374939bd1c275e51 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Fri, 14 Dec 2018 10:57:39 +0000 Subject: [PATCH 10/11] C++: Delete dead code with warnings in it --- .../src/semmle/code/cpp/security/CommandExecution.qll | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll b/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll index f240e687a6d..d80486b32e3 100644 --- a/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll +++ b/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll @@ -159,17 +159,6 @@ predicate shellCommandPreface(string cmd, string flag) { ) } -/** - * An array element. This supports multiple kinds of array syntax. - */ -private predicate arrayElement(Expr arrayLit, int idx, Expr element) { - exists (ArrayLiteral lit | lit = arrayLit | - lit.getElement(idx) = element) - or exists (MessageExpr arrayWithObjects | arrayWithObjects = arrayLit | - arrayWithObjects.getStaticTarget().getQualifiedName().matches("NSArray%::+arrayWithObjects:") and - arrayWithObjects.getArgument(idx) = element) -} - /** * A command that is used as a command, or component of a command, * that will be executed by a general-purpose command interpreter From 487b8c52c65efba4b537530effbe1a6753040061 Mon Sep 17 00:00:00 2001 From: Esben Sparre Andreasen <42067045+esben-semmle@users.noreply.github.com> Date: Fri, 14 Dec 2018 13:04:10 +0100 Subject: [PATCH 11/11] JS: fix

    issue --- .../ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp index 08756cf9cd5..71fefb488fa 100644 --- a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp @@ -55,8 +55,8 @@ example.com, effectively allowing the redirect to go to an attacker-controlled domain such as wwwXexample.com. -

    +

    Address this vulnerability by escaping . appropriately: let regex = /(www|beta|)\.example\.com/.