diff --git a/change-notes/1.20/analysis-javascript.md b/change-notes/1.20/analysis-javascript.md index 69f6613c26c..af8bb984ae8 100644 --- a/change-notes/1.20/analysis-javascript.md +++ b/change-notes/1.20/analysis-javascript.md @@ -15,6 +15,7 @@ | **Query** | **Tags** | **Purpose** | |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Double escaping or unescaping (`js/double-escaping`) | correctness, security, external/cwe/cwe-116 | Highlights potential double escaping or unescaping of special characters, indicating a possible violation of [CWE-116](https://cwe.mitre.org/data/definitions/116.html). Results are shown on LGTM by default. | +| Incomplete regular expression for hostnames (`js/incomplete-hostname-regexp`) | correctness, security, external/cwe/cwe-020 | Highlights hostname sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default.| | Incomplete URL substring sanitization | correctness, security, external/cwe/cwe-020 | Highlights URL sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default. | | Incorrect suffix check (`js/incorrect-suffix-check`) | correctness, security, external/cwe/cwe-020 | Highlights error-prone suffix checks based on `indexOf`, indicating a potential violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default. | | Useless comparison test (`js/useless-comparison-test`) | correctness | Highlights code that is unreachable due to a numeric comparison that is always true or always false. Results are shown on LGTM by default. | diff --git a/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll b/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll index f240e687a6d..d80486b32e3 100644 --- a/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll +++ b/cpp/ql/src/semmle/code/cpp/security/CommandExecution.qll @@ -159,17 +159,6 @@ predicate shellCommandPreface(string cmd, string flag) { ) } -/** - * An array element. This supports multiple kinds of array syntax. - */ -private predicate arrayElement(Expr arrayLit, int idx, Expr element) { - exists (ArrayLiteral lit | lit = arrayLit | - lit.getElement(idx) = element) - or exists (MessageExpr arrayWithObjects | arrayWithObjects = arrayLit | - arrayWithObjects.getStaticTarget().getQualifiedName().matches("NSArray%::+arrayWithObjects:") and - arrayWithObjects.getArgument(idx) = element) -} - /** * A command that is used as a command, or component of a command, * that will be executed by a general-purpose command interpreter diff --git a/javascript/config/suites/javascript/security b/javascript/config/suites/javascript/security index 0f8e6686c62..2e010b5abd8 100644 --- a/javascript/config/suites/javascript/security +++ b/javascript/config/suites/javascript/security @@ -1,5 +1,6 @@ + semmlecode-javascript-queries/DOM/TargetBlank.ql: /Security/CWE/CWE-200 + semmlecode-javascript-queries/Electron/EnablingNodeIntegration.ql: /Security/CWE/CWE-094 ++ semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022 diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp new file mode 100644 index 00000000000..71fefb488fa --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp @@ -0,0 +1,72 @@ + + + + +

+ + Sanitizing untrusted URLs is an important technique for + preventing attacks such as request forgeries and malicious + redirections. Often, this is done by checking that the host of a URL + is in a set of allowed hosts. + +

+ +

+ + If a regular expression implements such a check, it is + easy to accidentally make the check too permissive by not escaping the + . meta-characters appropriately. + + Even if the check is not used in a security-critical + context, the incomplete check may still cause undesirable behaviors + when it accidentally succeeds. + +

+
+ + +

+ + Escape all meta-characters appropriately when constructing + regular expressions for security checks, pay special attention to the + . meta-character. + +

+
+ + + +

+ + The following example code checks that a URL redirection + will reach the example.com domain, or one of its + subdomains. + +

+ + + +

+ + The check is however easy to bypass because the unescaped + . allows for any character before + example.com, effectively allowing the redirect to go to + an attacker-controlled domain such as wwwXexample.com. + +

+

+ + Address this vulnerability by escaping . + appropriately: let regex = /(www|beta|)\.example\.com/. + +

+ +
+ + +
  • OWASP: SSRF
  • +
  • OWASP: XSS Unvalidated Redirects and Forwards Cheat Sheet.
  • +
    +
    diff --git a/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql new file mode 100644 index 00000000000..01335fbbe0a --- /dev/null +++ b/javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql @@ -0,0 +1,64 @@ +/** + * @name Incomplete regular expression for hostnames + * @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected. + * @kind problem + * @problem.severity warning + * @precision high + * @id js/incomplete-hostname-regexp + * @tags correctness + * security + * external/cwe/cwe-20 + */ + +import javascript + +/** + * A taint tracking configuration for incomplete hostname regular expressions sources. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "IncompleteHostnameRegExpTracking" } + + override + predicate isSource(DataFlow::Node source) { + isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _) + } + + override + predicate isSink(DataFlow::Node sink) { + isInterpretedAsRegExp(sink) + } + +} + + +/** + * Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`, + * and `pattern` contains a subtle mistake that allows it to match unexpected hosts. + */ +bindingset[pattern] +predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) { + hostPart = pattern.regexpCapture( + "(?i).*" + + // an unescaped single `.` + "(?> + result = "com|org|edu|gov|uk|net|io" + } +} \ No newline at end of file diff --git a/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll b/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll index ce940810a7e..e771f2a14ef 100644 --- a/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll +++ b/javascript/ql/src/semmle/javascript/security/dataflow/RegExpInjection.qll @@ -4,7 +4,6 @@ */ import javascript -private import semmle.javascript.dataflow.InferredTypes module RegExpInjection { /** @@ -51,36 +50,14 @@ module RegExpInjection { } /** - * The first argument to an invocation of `RegExp` (with or without `new`). + * The source string of a regular expression. */ - class RegExpObjectCreationSink extends Sink, DataFlow::ValueNode { - RegExpObjectCreationSink() { - this = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0) + class RegularExpressionSourceAsSink extends Sink { + RegularExpressionSourceAsSink() { + isInterpretedAsRegExp(this) } } - /** - * The argument of a call that coerces the argument to a regular expression. - */ - class RegExpObjectCoercionSink extends Sink { - - RegExpObjectCoercionSink() { - exists (MethodCallExpr mce, string methodName | - mce.getReceiver().analyze().getAType() = TTString() and - mce.getMethodName() = methodName | - (methodName = "match" and this.asExpr() = mce.getArgument(0) and mce.getNumArgument() = 1) or - ( - methodName = "search" and - this.asExpr() = mce.getArgument(0) and - mce.getNumArgument() = 1 and - // `String.prototype.search` returns a number, so exclude chained accesses - not exists(PropAccess p | p.getBase() = mce) - ) - ) - } - - } - /** * A call to a function whose name suggests that it escapes regular * expression meta-characters. diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected new file mode 100644 index 00000000000..cbb655c77ba --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected @@ -0,0 +1,21 @@ +| tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | +| tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. | diff --git a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref new file mode 100644 index 00000000000..e818d947252 --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.qlref @@ -0,0 +1 @@ +Security/CWE-020/IncompleteHostnameRegExp.ql \ No newline at end of file diff --git a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js new file mode 100644 index 00000000000..45e6476a22e --- /dev/null +++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js @@ -0,0 +1,47 @@ +(function() { + /http:\/\/example.com/; // OK + /http:\/\/test.example.com/; // NOT OK + /http:\/\/test\\.example.com/; // OK + /http:\/\/test.example.net/; // NOT OK + /http:\/\/test.(example-a|example-b).com/; // NOT OK + /http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision + /http:\/\/(\\.+)\\.example.com/; // OK + /http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision + /http:\/\/test.example.com\/(?:.*)/; // OK + new RegExp("http://test.example.com"); // NOT OK + s.match("http://test.example.com"); // NOT OK + + function id(e) { return e; } + new RegExp(id(id(id("http://test.example.com")))); // NOT OK + + new RegExp(`test.example.com$`); // NOT OK + + let hostname = 'test.example.com'; // NOT OK + new RegExp(`${hostname}$`); + + let domain = { hostname: 'test.example.com' }; + new RegExp(domain.hostname); + + function convert(domain) { + return new RegExp(domain.hostname); + } + convert({ hostname: 'test.example.com' }); // NOT OK + + let domains = [ { hostname: 'test.example.com' } ]; // NOT OK, but not yet supported + function convert(domain) { + return new RegExp(domain.hostname); + } + domains.map(d => convert(d)); + + /(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision + /^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK + /^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK + /\(http:\/\/sub.example.com\/\)/g; // NOT OK + /https?:\/\/api.example.com/; // NOT OK + new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK + new RegExp('http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK + /^https:\/\/[a-z]*.example.com$/; // NOT OK + RegExp('protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK + + /example.dev|example.com/; // OK, but still flagged +});