Merge pull request #15187 from github/max-schaefer/py-url-redirection

Python: Add support for more URL redirect sanitisers.
This commit is contained in:
Taus
2024-01-22 23:19:36 +01:00
committed by GitHub
11 changed files with 358 additions and 32 deletions

View File

@@ -2928,5 +2928,10 @@ module PrivateDjango {
DjangoAllowedUrl() {
this = DataFlow::BarrierGuard<djangoUrlHasAllowedHostAndScheme/3>::getABarrierNode()
}
override predicate sanitizes(UrlRedirect::FlowState state) {
// sanitize all flow states
any()
}
}
}

View File

@@ -10,6 +10,7 @@
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.security.dataflow.UrlRedirectCustomizations
/**
* Provides models for the `urllib` module, part of
@@ -70,4 +71,55 @@ private module Urllib {
}
}
}
/**
* Provides models for the `urllib.parse` extension library.
*/
module Parse {
/**
* A call to `urllib.parse.urlparse`.
*/
private DataFlow::CallCfgNode getUrlParseCall() {
result = API::moduleImport("urllib").getMember("parse").getMember("urlparse").getACall()
}
/**
* A read of the `netloc` attribute of a parsed URL as returned by `urllib.parse.urlparse`,
* which is being checked in a way that is relevant for URL redirection vulnerabilities.
*/
private predicate netlocCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
exists(DataFlow::CallCfgNode urlParseCall, DataFlow::AttrRead netlocRead |
urlParseCall = getUrlParseCall() and
netlocRead = urlParseCall.getAnAttributeRead("netloc") and
node = urlParseCall.getArg(0).asCfgNode()
|
// either a simple check of the netloc attribute
g = netlocRead.asCfgNode() and
branch = false
or
// or a comparison (we don't care against what)
exists(Compare cmp, string op |
cmp = g.getNode() and
op = unique(Cmpop opp | opp = cmp.getAnOp()).getSymbol() and
cmp.getASubExpression() = netlocRead.asExpr()
|
op in ["==", "is", "in"] and branch = true
or
op in ["!=", "is not", "not in"] and branch = false
)
)
}
/**
* A check of `urllib.parse.urlparse().netloc`, considered as a sanitizer-guard for URL redirection.
*/
private class NetlocCheck extends UrlRedirect::Sanitizer {
NetlocCheck() { this = DataFlow::BarrierGuard<netlocCheck/3>::getABarrierNode() }
override predicate sanitizes(UrlRedirect::FlowState state) {
// `urlparse` does not handle backslashes
state instanceof UrlRedirect::NoBackslashes
}
}
}
}

View File

@@ -10,6 +10,7 @@ private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.UrlRedirectCustomizations
/**
* INTERNAL: Do not use.
@@ -108,5 +109,32 @@ module Yarl {
this.(DataFlow::AttrRead).getAttributeName() = "query"
}
}
private predicate yarlUrlIsAbsoluteCall(
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
) {
exists(ClassInstantiation instance, DataFlow::MethodCallNode call |
call.calls(instance, "is_absolute") and
g = call.asCfgNode() and
node = instance.getArg(0).asCfgNode() and
branch = false
)
}
/**
* A call to `yarl.URL.is_absolute`, considered as a sanitizer-guard for URL redirection.
*
* See https://yarl.aio-libs.org/en/latest/api/#absolute-and-relative-urls.
*/
private class YarlIsAbsoluteUrl extends UrlRedirect::Sanitizer {
YarlIsAbsoluteUrl() {
this = DataFlow::BarrierGuard<yarlUrlIsAbsoluteCall/3>::getABarrierNode()
}
override predicate sanitizes(UrlRedirect::FlowState state) {
// `is_absolute` does not handle backslashes
state instanceof UrlRedirect::NoBackslashes
}
}
}
}

View File

@@ -16,6 +16,29 @@ private import semmle.python.dataflow.new.BarrierGuards
* vulnerabilities, as well as extension points for adding your own.
*/
module UrlRedirect {
/**
* A state value to track whether the untrusted data may contain backslashes.
*/
abstract class FlowState extends string {
bindingset[this]
FlowState() { any() }
}
/**
* A state value signifying that the untrusted data may contain backslashes.
*/
class MayContainBackslashes extends UrlRedirect::FlowState {
MayContainBackslashes() { this = "MayContainBackslashes" }
}
/**
* A state value signifying that any backslashes in the untrusted data have
* been eliminated, but no other sanitization has happened.
*/
class NoBackslashes extends UrlRedirect::FlowState {
NoBackslashes() { this = "NoBackslashes" }
}
/**
* A data flow source for "URL redirection" vulnerabilities.
*/
@@ -29,7 +52,28 @@ module UrlRedirect {
/**
* A sanitizer for "URL redirection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
abstract class Sanitizer extends DataFlow::Node {
/**
* Holds if this sanitizer sanitizes flow in the given state.
*/
abstract predicate sanitizes(FlowState state);
}
/**
* An additional flow step for "URL redirection" vulnerabilities.
*/
abstract class AdditionalFlowStep extends DataFlow::Node {
/**
* Holds if there should be an additional flow step from `nodeFrom` in `stateFrom`
* to `nodeTo` in `stateTo`.
*
* For example, a call to `replace` that replaces backslashes with forward slashes
* takes flow from `MayContainBackslashes` to `NoBackslashes`.
*/
abstract predicate step(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
);
}
/**
* A source of remote user input, considered as a flow source.
@@ -57,10 +101,46 @@ module UrlRedirect {
string_concat.getRight() = this.asCfgNode()
)
}
override predicate sanitizes(FlowState state) {
// sanitize all flow states
any()
}
}
/**
* A call that replaces backslashes with forward slashes or eliminates them
* altogether, considered as a partial sanitizer, as well as an additional
* flow step.
*/
class ReplaceBackslashesSanitizer extends Sanitizer, AdditionalFlowStep, DataFlow::MethodCallNode {
DataFlow::Node receiver;
ReplaceBackslashesSanitizer() {
this.calls(receiver, "replace") and
this.getArg(0).asExpr().(StrConst).getText() = "\\" and
this.getArg(1).asExpr().(StrConst).getText() in ["/", ""]
}
override predicate sanitizes(FlowState state) { state instanceof MayContainBackslashes }
override predicate step(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
nodeFrom = receiver and
stateFrom instanceof MayContainBackslashes and
nodeTo = this and
stateTo instanceof NoBackslashes
}
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends Sanitizer, StringConstCompareBarrier { }
class StringConstCompareAsSanitizerGuard extends Sanitizer, StringConstCompareBarrier {
override predicate sanitizes(FlowState state) {
// sanitize all flow states
any()
}
}
}

View File

@@ -9,7 +9,7 @@
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import UrlRedirectCustomizations::UrlRedirect
import UrlRedirectCustomizations::UrlRedirect as UrlRedirect
/**
* DEPRECATED: Use `UrlRedirectFlow` module instead.
@@ -19,20 +19,48 @@ import UrlRedirectCustomizations::UrlRedirect
deprecated class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UrlRedirect" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink instanceof UrlRedirect::Sink and state instanceof UrlRedirect::FlowState
}
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
node.(UrlRedirect::Sanitizer).sanitizes(state)
}
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
}
}
private module UrlRedirectConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
private module UrlRedirectConfig implements DataFlow::StateConfigSig {
class FlowState = UrlRedirect::FlowState;
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
}
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof UrlRedirect::Sink and
exists(state)
}
predicate isBarrier(DataFlow::Node node, FlowState state) {
node.(UrlRedirect::Sanitizer).sanitizes(state)
}
predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
) {
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
}
}
/** Global taint-tracking for detecting "URL redirection" vulnerabilities. */
module UrlRedirectFlow = TaintTracking::Global<UrlRedirectConfig>;
module UrlRedirectFlow = TaintTracking::GlobalWithState<UrlRedirectConfig>;

View File

@@ -39,19 +39,30 @@ and check that the user input is in that list:
<p>
Often this is not possible, so an alternative is to check that the target URL does not
specify an explicit host name. For example, the Django framework provides a
function <code>url_has_allowed_host_and_scheme</code> that can be used to check that a
URL is safe to redirect to, as shown in the following example:
specify an explicit host name. For example, you can use the <code>urlparse</code> function
from the Python standard library to parse the URL and check that the <code>netloc</code>
attribute is empty.
</p>
<p>
Note, however, that many browsers accept backslash characters (<code>\</code>) as equivalent
to forward slash characters (<code>/</code>) in URLs, but the <code>urlparse</code> function
does not. To account for this, you can first replace all backslashes with forward slashes,
as shown in the following example:
</p>
<sample src="examples/redirect_good2.py"/>
<p>
Note that many browsers accept backslash characters (<code>\</code>) as equivalent to
forward slash characters (<code>/</code>) in URLs, so it is important to account for this
when validating the URL, for example by first replacing all backslashes with forward
slashes. Django's <code>url_has_allowed_host_and_scheme</code> function
does this automatically, but other libraries may not.
For Django application, you can use the function <code>url_has_allowed_host_and_scheme</code>
to check that a URL is safe to redirect to, as shown in the following example:
</p>
<sample src="examples/redirect_good3.py"/>
<p>
Note that <code>url_has_allowed_host_and_scheme</code> handles backslashes correctly, so no
additional processing is required.
</p>
</example>
@@ -59,6 +70,8 @@ does this automatically, but other libraries may not.
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">
XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
<li>Python standard library: <a href="https://docs.python.org/3/library/urllib.parse.html">
urllib.parse</a>.</li>
</references>
</qhelp>

View File

@@ -1,13 +1,14 @@
from django.http import HttpResponseRedirect
from django.shortcuts import redirect
from django.utils.http import url_has_allowed_host_and_scheme
from django.views import View
from flask import Flask, request, redirect
from urllib.parse import urlparse
class RedirectView(View):
def get(self, request, *args, **kwargs):
target = request.GET.get('target', '')
if url_has_allowed_host_and_scheme(target, allowed_hosts=None):
return HttpResponseRedirect(target)
else:
# ignore the target and redirect to the home page
return redirect('/')
app = Flask(__name__)
@app.route('/')
def hello():
target = request.args.get('target', '')
target = target.replace('\\', '')
if not urlparse(target).netloc:
# relative path, safe to redirect
return redirect(target, code=302)
# ignore the target and redirect to the home page
return redirect('/', code=302)

View File

@@ -0,0 +1,13 @@
from django.http import HttpResponseRedirect
from django.shortcuts import redirect
from django.utils.http import url_has_allowed_host_and_scheme
from django.views import View
class RedirectView(View):
def get(self, request, *args, **kwargs):
target = request.GET.get('target', '')
if url_has_allowed_host_and_scheme(target, allowed_hosts=None):
return HttpResponseRedirect(target)
else:
# ignore the target and redirect to the home page
return redirect('/')

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
- Added modeling of YARL's `is_absolute` method and checks of the `netloc` of a parsed URL as sanitizers for the `py/url-redirection` query, leading to fewer false positives.

View File

@@ -9,6 +9,9 @@ edges
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:74:17:74:23 | ControlFlowNode for request |
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:81:17:81:23 | ControlFlowNode for request |
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:90:17:90:23 | ControlFlowNode for request |
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:111:17:111:23 | ControlFlowNode for request |
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:137:17:137:23 | ControlFlowNode for request |
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:145:17:145:23 | ControlFlowNode for request |
| test.py:7:5:7:10 | ControlFlowNode for target | test.py:8:21:8:26 | ControlFlowNode for target |
| test.py:7:14:7:20 | ControlFlowNode for request | test.py:7:14:7:25 | ControlFlowNode for Attribute |
| test.py:7:14:7:25 | ControlFlowNode for Attribute | test.py:7:14:7:43 | ControlFlowNode for Attribute() |
@@ -52,6 +55,18 @@ edges
| test.py:90:17:90:23 | ControlFlowNode for request | test.py:90:17:90:28 | ControlFlowNode for Attribute |
| test.py:90:17:90:28 | ControlFlowNode for Attribute | test.py:90:17:90:46 | ControlFlowNode for Attribute() |
| test.py:90:17:90:46 | ControlFlowNode for Attribute() | test.py:90:5:90:13 | ControlFlowNode for untrusted |
| test.py:111:5:111:13 | ControlFlowNode for untrusted | test.py:114:25:114:33 | ControlFlowNode for untrusted |
| test.py:111:17:111:23 | ControlFlowNode for request | test.py:111:17:111:28 | ControlFlowNode for Attribute |
| test.py:111:17:111:28 | ControlFlowNode for Attribute | test.py:111:17:111:46 | ControlFlowNode for Attribute() |
| test.py:111:17:111:46 | ControlFlowNode for Attribute() | test.py:111:5:111:13 | ControlFlowNode for untrusted |
| test.py:137:5:137:13 | ControlFlowNode for untrusted | test.py:140:25:140:33 | ControlFlowNode for untrusted |
| test.py:137:17:137:23 | ControlFlowNode for request | test.py:137:17:137:28 | ControlFlowNode for Attribute |
| test.py:137:17:137:28 | ControlFlowNode for Attribute | test.py:137:17:137:46 | ControlFlowNode for Attribute() |
| test.py:137:17:137:46 | ControlFlowNode for Attribute() | test.py:137:5:137:13 | ControlFlowNode for untrusted |
| test.py:145:5:145:13 | ControlFlowNode for untrusted | test.py:148:25:148:33 | ControlFlowNode for untrusted |
| test.py:145:17:145:23 | ControlFlowNode for request | test.py:145:17:145:28 | ControlFlowNode for Attribute |
| test.py:145:17:145:28 | ControlFlowNode for Attribute | test.py:145:17:145:46 | ControlFlowNode for Attribute() |
| test.py:145:17:145:46 | ControlFlowNode for Attribute() | test.py:145:5:145:13 | ControlFlowNode for untrusted |
nodes
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
@@ -107,6 +122,21 @@ nodes
| test.py:90:17:90:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:90:17:90:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:93:18:93:26 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
| test.py:111:5:111:13 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
| test.py:111:17:111:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:111:17:111:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:111:17:111:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:114:25:114:33 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
| test.py:137:5:137:13 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
| test.py:137:17:137:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:137:17:137:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:137:17:137:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:140:25:140:33 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
| test.py:145:5:145:13 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
| test.py:145:17:145:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:145:17:145:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:145:17:145:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:148:25:148:33 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
subpaths
#select
| test.py:8:21:8:26 | ControlFlowNode for target | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:8:21:8:26 | ControlFlowNode for target | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
@@ -118,3 +148,6 @@ subpaths
| test.py:76:21:76:26 | ControlFlowNode for unsafe | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:76:21:76:26 | ControlFlowNode for unsafe | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:83:21:83:26 | ControlFlowNode for unsafe | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:83:21:83:26 | ControlFlowNode for unsafe | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:93:18:93:26 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:93:18:93:26 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:114:25:114:33 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:114:25:114:33 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:140:25:140:33 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:140:25:140:33 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:148:25:148:33 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:148:25:148:33 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -94,4 +94,72 @@ def ok6():
if url_has_allowed_host_and_scheme(untrusted, allowed_hosts=None):
return redirect(untrusted, code=302) # OK
return redirect("https://example.com", code=302) # OK
return redirect("https://example.com", code=302) # OK
import yarl
@app.route('/ok7')
def ok7():
untrusted = request.args.get('target', '')
untrusted = untrusted.replace("\\", "/")
if not yarl.URL(untrusted).is_absolute():
return redirect(untrusted, code=302) # OK
return redirect("/", code=302)
@app.route('/not_ok5')
def not_ok5():
untrusted = request.args.get('target', '')
# no backslash replace
if not yarl.URL(untrusted).is_absolute():
return redirect(untrusted, code=302) # NOT OK
return redirect("/", code=302)
from urllib.parse import urlparse
@app.route('/ok8')
def ok8():
untrusted = request.args.get('target', '')
untrusted = untrusted.replace("\\", "/")
if not urlparse(untrusted).netloc:
return redirect(untrusted, code=302) # OK
return redirect("/", code=302)
@app.route('/ok9')
def ok9():
untrusted = request.args.get('target', '')
untrusted = untrusted.replace("\\", "/")
if urlparse(untrusted).netloc == "":
return redirect(untrusted, code=302) # OK
return redirect("/", code=302)
@app.route('/not_ok6')
def not_ok6():
untrusted = request.args.get('target', '')
# no backslash replace
if not urlparse(untrusted).netloc:
return redirect(untrusted, code=302) # NOT OK
return redirect("/", code=302)
@app.route('/not_ok7')
def not_ok7():
untrusted = request.args.get('target', '')
# wrong check
if urlparse(untrusted).netloc != "":
return redirect(untrusted, code=302) # NOT OK
return redirect("/", code=302)
@app.route('/ok10')
def ok10():
untrusted = request.args.get('target', '')
untrusted = untrusted.replace("\\", "/")
if urlparse(untrusted).netloc in ["", request.host]:
return redirect(untrusted, code=302) # OK
return redirect("/", code=302)
@app.route('/ok11')
def ok11():
untrusted = request.args.get('target', '')
untrusted = untrusted.replace("\\", "/")
if urlparse(untrusted).netloc not in ["", request.host]:
return redirect("/", code=302) # OK
return redirect(untrusted, code=302)