Python: Untrusted data used in external APIs

A port of the one for Java that was added in https://github.com/github/codeql/pull/3938
This commit is contained in:
Rasmus Wriedt Larsen
2020-11-26 11:34:39 +01:00
parent 7e3dbb0149
commit 9e4910f863
12 changed files with 435 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
from flask import Flask, request, make_response
app = Flask(__name__)
@app.route("/xss")
def xss():
username = request.args.get("username")
return make_response("Hello {}".format(username))

View File

@@ -0,0 +1,12 @@
import base64
import pickle
from flask import Flask, request, make_response
app = Flask(__name__)
@app.route("/example")
def profile():
raw_data = request.args.get("data").encode('utf-8')
data = base64.decodebytes(raw_data)
obj = pickle.loads(data)
...

View File

@@ -0,0 +1,196 @@
/**
* Definitions for reasoning about untrusted data used in APIs defined outside the
* database.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
private import semmle.python.types.Builtins
private import semmle.python.objects.ObjectInternal
// IMPLEMENTATION NOTES:
//
// This query uses *both* the new data-flow library, and points-to. Why? To get this
// finished quickly, so it can provide value for our field team and ourselves.
//
// In the long run, it should not need to use points-to for anything. Possibly this can
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
// library to be fully operational.
//
// At least it will allow us to provide a baseline comparison against a solution that
// doesn't use points-to at all
//
// There is a few dirty things we do here:
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
// publicly, but we really want access to them.
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
// `DataFlowCallable`
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
// really wanted to polish our points-to implementation, we could move this
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
// probably require some more work: for this query, it's totally ok to use
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
// figure out if that is desireable in general. I simply skipped a corner here!
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
/**
* A callable that is considered a "safe" external API from a security perspective.
*/
class SafeExternalAPI extends Unit {
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
}
/** The default set of "safe" external APIs. */
private class DefaultSafeExternalAPI extends SafeExternalAPI {
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
exists(CallableValue cv | cv = result.getCallableValue() |
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
or
exists(ClassValue cls, string attr |
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
|
cls.lookup(attr) = cv
)
)
}
}
/** A node representing data being passed to an external API through a call. */
class ExternalAPIDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCall call;
DataFlowPrivate::DataFlowCallable callable;
int i;
ExternalAPIDataNode() {
exists(call.getLocation().getFile().getRelativePath()) and
callable = call.getCallable() and
not any(SafeExternalAPI safe).getSafeCallable() = callable and
exists(Value cv | cv = callable.getCallableValue() |
cv.isAbsent()
or
cv.isBuiltin()
or
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
or
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
) and
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
this = call.getArg(i) and
// Not already modeled as a taint step
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
// Since we have modeled this explicitly, I don't see any cases where we would want to report this.
not exists(DataFlow::Node prev, DataFlow::PostUpdateNode post |
post.getPreUpdateNode() = this and
TaintTrackingPrivate::defaultAdditionalTaintStep(prev, post)
)
}
/** Gets the index for the parameter that will receive this untrusted data */
int getIndex() { result = i }
/** Gets the callable to which this argument is passed. */
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
}
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */
class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
}
/** A node representing untrusted data being passed to an external API. */
class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
/** Gets a source of untrusted data which is passed to this external API data node. */
DataFlow::Node getAnUntrustedSource() {
any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
}
}
private newtype TExternalAPI =
TExternalAPIParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
exists(UntrustedExternalAPIDataNode n |
callable = n.getCallable() and
index = n.getIndex()
)
}
/** An external API which is used with untrusted data. */
class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
/** Gets a possibly untrusted use of this external API. */
UntrustedExternalAPIDataNode getUntrustedDataNode() {
this = TExternalAPIParameter(result.getCallable(), result.getIndex())
}
/** Gets the number of untrusted sources used with this external API. */
int getNumberOfUntrustedSources() {
result = count(getUntrustedDataNode().getAnUntrustedSource())
}
/** Gets a textual representation of this element. */
string toString() {
exists(
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
string indexString
|
this = TExternalAPIParameter(callable, index) and
indexString = "param " + index and
exists(CallableValue cv | cv = callable.getCallableValue() |
callableString =
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
or
not exists(cv.getScope()) and
(
cv instanceof BuiltinFunctionValue and
callableString = pretty_builtin_function_value(cv)
or
cv instanceof BuiltinMethodValue and
callableString = pretty_builtin_method_value(cv)
or
not cv instanceof BuiltinFunctionValue and
not cv instanceof BuiltinMethodValue and
callableString = cv.toString()
)
) and
result = callableString + " [" + indexString + "]"
)
}
}
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
result = prefix_with_module_if_found(b)
)
}
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
result = prefix_with_module_if_found(cls) + "." + b.getName()
)
or
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
result = b.getName()
)
}
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
private string prefix_with_module_if_found(Builtin b) {
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
result = mod.getName() + "." + b.getName()
)
or
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
result = b.getName()
}

View File

@@ -0,0 +1,61 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Using unsanitized untrusted data in an external API can cause a variety of security
issues. This query reports external APIs that are used with untrusted data, along
with how frequently the API is used, and how many unique sources of untrusted data flow
to this API. This query is designed primarily to help identify which APIs may be
relevant for security analysis of this application.</p>
<p>An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
be from the Python standard library or dependencies. The query will report the fully qualified name,
along with <code>[param x]</code>, where <code>x</code> indicates the position of
the parameter receiving the untrusted data. Note that for methods and
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
respectively.</p>
<p>Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
Unsafe Deserialization query, but is also a taint step for other queries.</p>
<p>Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.</p>
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
</overview>
<recommendation>
<p>For each result:</p>
<ul>
<li>If the result highlights a known sink, no action is required.</li>
<li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query.</li>
<li>If the result represents a call to an external API which transfers taint, add the appropriate modeling, and
re-run the query to determine what new results have appeared due to this additional modeling.</li>
</ul>
<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPI</code>
class and specify <code>getSafeCallable</code> to exclude known safe external APIs from future analysis.</p>
</recommendation>
<example>
<p>If the query were to return the API <code>flask.make_response [param 0]</code>
then we should first consider whether this a security relevant sink. In this case, this is making a HTTP response, so we should
consider whether this is an XSS sink. If it is, we should confirm that it is handled by the XSS query.</p>
<p>If the query were to return the API <code>base64.decodebytes [param 0]</code>, then this should be
reviewed as a possible taint step, because tainted data would flow from the 0th argument to the result of the call.</p>
<p>Note that both examples are correctly handled by the standard taint tracking library and XSS query.</p>
</example>
<references>
</references>
</qhelp>
Mention something about `pickle.loads(user_input)`

View File

@@ -0,0 +1,17 @@
/**
* @name Frequency counts for external APIs that are used with untrusted data
* @description This reports the external APIs that are used with untrusted data, along with how
* frequently the API is called, and how many unique sources of untrusted data flow
* to it.
* @id python/count-untrusted-data-external-api
* @kind table
* @tags security external/cwe/cwe-20
*/
import python
import ExternalAPIs
from ExternalAPIUsedWithUntrustedData externalAPI
select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
numberOfUntrustedSources desc

View File

@@ -0,0 +1,74 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Using unsanitized untrusted data in an external API can cause a variety of security
issues. This query reports external APIs that use untrusted data. The results are not
filtered so that you can audit all examples. The query provides data for security
reviews of the application and you can also use it to identify external APIs that should
be modeled as either taint steps, or sinks for specific problems.</p>
<p>An external API is defined as a call to a method that is not defined in the source
code, and is not modeled as a taint step in the default taint library. External APIs may
be from the Python standard library or dependencies. The query will report the fully qualified name,
along with <code>[param x]</code>, where <code>x</code> indicates the position of
the parameter receiving the untrusted data. Note that for methods and
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
respectively.</p>
<p>Note that an excepted sink might not be included in the results, if it also defines a
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
Unsafe Deserialization query, but is also a taint step for other queries.</p>
<p>Note: Compared to the Java version of this query, we currently do not give special
care to methods that are overridden in the source code.</p>
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
</overview>
<recommendation>
<p>For each result:</p>
<ul>
<li>If the result highlights a known sink, confirm that the result is reported by the relevant query, or
that the result is a false positive because this data is sanitized.</li>
<li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query,
and confirm that the result is either found, or is safe due to appropriate sanitization.</li>
<li>If the result represents a call to an external API that transfers taint, add the appropriate modeling, and
re-run the query to determine what new results have appeared due to this additional modeling.</li>
</ul>
<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPI</code>
class and specify <code>getSafeCallable</code> to exclude known safe external APIs from future analysis.</p>
</recommendation>
<example>
<p>In this first example, a request parameter is read from the Flask <code>request</code> and then ultimately used in a call to the
<code>flask.make_response</code> external API:</p>
<sample src="ExternalAPISinkExample.py" />
<p>This is an XSS sink. The XSS query should therefore be reviewed to confirm that this sink is appropriately modeled,
and if it is, to confirm that the query reports this particular result, or that the result is a false positive due to
some existing sanitization.</p>
<p>In this second example, again a request parameter is read from the Flask <code>request</code>.</p>
<sample src="ExternalAPITaintStepExample.py" />
<p>If the query reported the call to <code>base64.decodebytes</code> on line 10, this
would suggest that this external API is not currently modeled as a taint step in the
taint tracking library. The next step would be to model this as a taint step, then
re-run the query to determine what additional results might be found. In this example,
the result of the Base64 decoding is pickled, which can result in remote code execution due
to unsafe deserialization.
<p>Note that both examples are correctly handled by the standard taint tracking library and Unsafe Deserialization query.</p>
</example>
<references>
</references>
</qhelp>

View File

@@ -0,0 +1,23 @@
/**
* @name Untrusted data passed to external API
* @description Data provided remotely is used in this external API without sanitization, which could be a security risk.
* @id python/untrusted-data-to-external-api
* @kind path-problem
* @precision low
* @problem.severity error
* @tags security external/cwe/cwe-20
*/
import python
import ExternalAPIs
import DataFlow::PathGraph
from
UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
ExternalAPIUsedWithUntrustedData externalAPI
where
sink.getNode() = externalAPI.getUntrustedDataNode() and
config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"Call to " + externalAPI.toString() + " with untrusted data from $@.", source.getNode(),
source.toString()