Python: Fix ExternalAPIs queries

The output might end up being slightly more noisy since we don't
collapse positional and keyword arguments when the external target
function is included in the database, but this aligns with our long-term
goal of not doing that anymore, so I think it's fine.
This commit is contained in:
Rasmus Wriedt Larsen
2022-08-16 14:56:19 +02:00
parent 2e2cee06c3
commit 7648462f98
4 changed files with 159 additions and 141 deletions

View File

@@ -1,48 +1,36 @@
/** /**
* Definitions for reasoning about untrusted data used in APIs defined outside the * Definitions for reasoning about untrusted data used in APIs defined outside the
* database. * user-written code.
*/ */
import python private import python
import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking private import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts private import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
private import semmle.python.types.Builtins
private import semmle.python.objects.ObjectInternal
// IMPLEMENTATION NOTES:
//
// This query uses *both* the new data-flow library, and points-to. Why? To get this
// finished quickly, so it can provide value for our field team and ourselves.
//
// In the long run, it should not need to use points-to for anything. Possibly this can
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
// library to be fully operational.
//
// At least it will allow us to provide a baseline comparison against a solution that
// doesn't use points-to at all
//
// There is a few dirty things we do here:
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
// publicly, but we really want access to them.
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
// `DataFlowCallable`
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
// really wanted to polish our points-to implementation, we could move this
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
// probably require some more work: for this query, it's totally ok to use
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
// figure out if that is desirable in general. I simply skipped a corner here!
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
/** /**
* A callable that is considered a "safe" external API from a security perspective. * An external API that is considered a "safe" from a security perspective.
*/ */
class SafeExternalApi extends Unit { class SafeExternalApi extends Unit {
/** Gets a callable that is considered a "safe" external API from a security perspective. */ /**
abstract DataFlowPrivate::DataFlowCallable getSafeCallable(); * Gets a call that is considered "safe" from a security perspective. You can use API
* graphs to find calls to functions you know are safe.
*
* Which works even when the external library isn't extracted.
*/
abstract DataFlow::CallCfgNode getSafeCall();
/**
* Gets a callable that is considered a "safe" external API from a security
* perspective.
*
* You probably want to define this as `none()` and use `getSafeCall` instead, since
* that can handle the external library not being extracted.
*/
DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
} }
/** DEPRECATED: Alias for SafeExternalApi */ /** DEPRECATED: Alias for SafeExternalApi */
@@ -50,57 +38,112 @@ deprecated class SafeExternalAPI = SafeExternalApi;
/** The default set of "safe" external APIs. */ /** The default set of "safe" external APIs. */
private class DefaultSafeExternalApi extends SafeExternalApi { private class DefaultSafeExternalApi extends SafeExternalApi {
override DataFlowPrivate::DataFlowCallable getSafeCallable() { override DataFlow::CallCfgNode getSafeCall() {
exists(CallableValue cv | cv = result.getCallableValue() | result = API::builtin(["len", "isinstance", "getattr", "hasattr"]).getACall()
cv = Value::named(["len", "isinstance", "getattr", "hasattr"]) }
or }
exists(ClassValue cls, string attr |
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"] /** Gets a human readable representation of `node`. */
| string apiNodeToStringRepr(API::Node node) {
cls.lookup(attr) = cv node = API::builtin(result)
) or
node = API::moduleImport(result)
or
exists(API::Node base, string basename |
base.getDepth() < node.getDepth() and
basename = apiNodeToStringRepr(base)
|
exists(string m | node = base.getMember(m) | result = basename + "." + m)
or
node = base.getReturn() and
result = basename + "()"
or
node = base.getAwaited() and
result = basename
)
}
newtype TInterestingExternalApiCall =
TUnresolvedCall(DataFlow::CallCfgNode call) {
exists(call.getLocation().getFile().getRelativePath()) and
not exists(DataFlowPrivate::DataFlowCall dfCall | dfCall.getNode() = call.getNode()) and
not call = any(SafeExternalApi safe).getSafeCall()
} or
TResolvedCall(DataFlowPrivate::DataFlowCall call) {
exists(call.getLocation().getFile().getRelativePath()) and
not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
not exists(call.getCallable().getLocation().getFile().getRelativePath())
}
abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
/** Gets the argument at position `apos`, if any */
abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
/** Gets a textual representation of this element. */
abstract string toString();
/**
* Gets a human-readable name for the external API.
*/
abstract string getApiName();
}
class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
DataFlowPrivate::DataFlowCall dfCall;
ResolvedCall() { this = TResolvedCall(dfCall) }
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
result = dfCall.getArgument(apos)
}
override string toString() { result = "ExternalAPI:ResolvedCall" }
override string getApiName() {
exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
result = apiNodeToStringRepr(apiNode) and
apiNode.getACall() = call
)
}
}
class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
DataFlow::CallCfgNode call;
UnresolvedCall() { this = TUnresolvedCall(call) }
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
exists(int i | apos.isPositional(i) | result = call.getArg(i))
or
exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
}
override string toString() { result = "ExternalAPI:UnresolvedCall" }
override string getApiName() {
exists(API::Node apiNode |
result = apiNodeToStringRepr(apiNode) and
apiNode.getACall() = call
) )
} }
} }
/** A node representing data being passed to an external API through a call. */ /** A node representing data being passed to an external API through a call. */
class ExternalApiDataNode extends DataFlow::Node { class ExternalApiDataNode extends DataFlow::Node {
DataFlowPrivate::DataFlowCallable callable; InterestingExternalApiCall call;
int i; DataFlowPrivate::ArgumentPosition apos;
ExternalApiDataNode() { ExternalApiDataNode() {
exists(DataFlowPrivate::DataFlowCall call | this = call.getArgument(apos) and
exists(call.getLocation().getFile().getRelativePath())
|
callable = call.getCallable() and
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
this = call.getArg(i)
) and
not any(SafeExternalApi safe).getSafeCallable() = callable and
exists(Value cv | cv = callable.getCallableValue() |
cv.isAbsent()
or
cv.isBuiltin()
or
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
or
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
) and
// Not already modeled as a taint step // Not already modeled as a taint step
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
// for `list.append(x)`, we have a additional taint step from x -> [post] list. // for `list.append(x)`, we have a additional taint step from x -> [post] list.
// Since we have modeled this explicitly, I don't see any cases where we would want to report this. // Since we have modeled this explicitly, I don't see any cases where we would want to report this.
not exists(DataFlow::Node prev, DataFlow::PostUpdateNode post | not exists(DataFlow::PostUpdateNode post |
post.getPreUpdateNode() = this and post.getPreUpdateNode() = this and
TaintTrackingPrivate::defaultAdditionalTaintStep(prev, post) TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
) )
} }
/** Gets the index for the parameter that will receive this untrusted data */
int getIndex() { result = i }
/** Gets the callable to which this argument is passed. */
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
} }
/** DEPRECATED: Alias for ExternalApiDataNode */ /** DEPRECATED: Alias for ExternalApiDataNode */
@@ -133,19 +176,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
/** An external API which is used with untrusted data. */ /** An external API which is used with untrusted data. */
private newtype TExternalApi = private newtype TExternalApi =
/** An untrusted API method `m` where untrusted data is passed at `index`. */ MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) { exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
exists(UntrustedExternalApiDataNode n | ex = call.getArgument(apos) and
callable = n.getCallable() and repr = call.getApiName()
index = n.getIndex()
) )
} }
/** An external API which is used with untrusted data. */ /** A argument of an external API which is used with untrusted data. */
class ExternalApiUsedWithUntrustedData extends TExternalApi { class ExternalApiUsedWithUntrustedData extends MkExternalApi {
string repr;
DataFlowPrivate::ArgumentPosition apos;
ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
/** Gets a possibly untrusted use of this external API. */ /** Gets a possibly untrusted use of this external API. */
UntrustedExternalApiDataNode getUntrustedDataNode() { UntrustedExternalApiDataNode getUntrustedDataNode() {
this = TExternalApiParameter(result.getCallable(), result.getIndex()) exists(InterestingExternalApiCall call |
result = call.getArgument(apos) and
call.getApiName() = repr
)
} }
/** Gets the number of untrusted sources used with this external API. */ /** Gets the number of untrusted sources used with this external API. */
@@ -154,63 +204,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
} }
/** Gets a textual representation of this element. */ /** Gets a textual representation of this element. */
string toString() { string toString() { result = repr + " [" + apos + "]" }
exists(
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
string indexString
|
this = TExternalApiParameter(callable, index) and
indexString = "param " + index and
exists(CallableValue cv | cv = callable.getCallableValue() |
callableString =
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
or
not exists(cv.getScope()) and
(
cv instanceof BuiltinFunctionValue and
callableString = pretty_builtin_function_value(cv)
or
cv instanceof BuiltinMethodValue and
callableString = pretty_builtin_method_value(cv)
or
not cv instanceof BuiltinFunctionValue and
not cv instanceof BuiltinMethodValue and
callableString = cv.toString()
)
) and
result = callableString + " [" + indexString + "]"
)
}
} }
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */ /** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData; deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
result = prefix_with_module_if_found(b)
)
}
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
result = prefix_with_module_if_found(cls) + "." + b.getName()
)
or
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
result = b.getName()
)
}
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
private string prefix_with_module_if_found(Builtin b) {
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
result = mod.getName() + "." + b.getName()
)
or
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
result = b.getName()
}

View File

@@ -1 +1,4 @@
| hmac.new [param 1] | 2 | 1 | | hmac.new [keyword msg] | 1 | 1 |
| hmac.new [position 1] | 1 | 1 |
| unknown.lib.func [keyword kw] | 2 | 1 |
| unknown.lib.func [position 0] | 2 | 1 |

View File

@@ -1,12 +1,20 @@
edges edges
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:13:16:13:22 | ControlFlowNode for request | | test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:13:16:13:22 | ControlFlowNode for request |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:23:16:23:22 | ControlFlowNode for request | | test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:23:16:23:22 | ControlFlowNode for request |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:34:12:34:18 | ControlFlowNode for request |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:42:12:42:18 | ControlFlowNode for request |
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:5:26:5:32 | GSSA Variable request | | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:5:26:5:32 | GSSA Variable request |
| test.py:5:26:5:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request | | test.py:5:26:5:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
| test.py:13:16:13:22 | ControlFlowNode for request | test.py:13:16:13:27 | ControlFlowNode for Attribute | | test.py:13:16:13:22 | ControlFlowNode for request | test.py:13:16:13:27 | ControlFlowNode for Attribute |
| test.py:13:16:13:27 | ControlFlowNode for Attribute | test.py:15:36:15:39 | ControlFlowNode for data | | test.py:13:16:13:27 | ControlFlowNode for Attribute | test.py:15:36:15:39 | ControlFlowNode for data |
| test.py:23:16:23:22 | ControlFlowNode for request | test.py:23:16:23:27 | ControlFlowNode for Attribute | | test.py:23:16:23:22 | ControlFlowNode for request | test.py:23:16:23:27 | ControlFlowNode for Attribute |
| test.py:23:16:23:27 | ControlFlowNode for Attribute | test.py:25:44:25:47 | ControlFlowNode for data | | test.py:23:16:23:27 | ControlFlowNode for Attribute | test.py:25:44:25:47 | ControlFlowNode for data |
| test.py:34:12:34:18 | ControlFlowNode for request | test.py:34:12:34:23 | ControlFlowNode for Attribute |
| test.py:34:12:34:23 | ControlFlowNode for Attribute | test.py:35:10:35:13 | ControlFlowNode for data |
| test.py:34:12:34:23 | ControlFlowNode for Attribute | test.py:36:13:36:16 | ControlFlowNode for data |
| test.py:42:12:42:18 | ControlFlowNode for request | test.py:42:12:42:23 | ControlFlowNode for Attribute |
| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:43:22:43:25 | ControlFlowNode for data |
| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:44:25:44:28 | ControlFlowNode for data |
nodes nodes
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request | | test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | | test.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
@@ -17,7 +25,19 @@ nodes
| test.py:23:16:23:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | test.py:23:16:23:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:23:16:23:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | test.py:23:16:23:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:25:44:25:47 | ControlFlowNode for data | semmle.label | ControlFlowNode for data | | test.py:25:44:25:47 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:34:12:34:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:34:12:34:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:35:10:35:13 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:36:13:36:16 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:42:12:42:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:42:12:42:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:43:22:43:25 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:44:25:44:28 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
subpaths subpaths
#select #select
| test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [param 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember | | test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [position 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:25:44:25:47 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:44:25:47 | ControlFlowNode for data | Call to hmac.new [param 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember | | test.py:25:44:25:47 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:44:25:47 | ControlFlowNode for data | Call to hmac.new [keyword msg] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:35:10:35:13 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:35:10:35:13 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:36:13:36:16 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:36:13:36:16 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:43:22:43:25 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:43:22:43:25 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
| test.py:44:25:44:28 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:44:25:44:28 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |

View File

@@ -32,16 +32,16 @@ def hmac_example2():
def unknown_lib_1(): def unknown_lib_1():
from unknown.lib import func from unknown.lib import func
data = request.args.get("data") data = request.args.get("data")
func(data) # TODO: currently not recognized func(data)
func(kw=data) # TODO: currently not recognized func(kw=data)
@app.route("/unknown-lib-2") @app.route("/unknown-lib-2")
def unknown_lib_2(): def unknown_lib_2():
import unknown.lib import unknown.lib
data = request.args.get("data") data = request.args.get("data")
unknown.lib.func(data) # TODO: currently not recognized unknown.lib.func(data)
unknown.lib.func(kw=data) # TODO: currently not recognized unknown.lib.func(kw=data)
if __name__ == "__main__": if __name__ == "__main__":