Python: Add basic taint support for urlsplit/urlparse

This commit is contained in:
Rasmus Wriedt Larsen
2020-02-19 16:22:09 +01:00
parent 74345b1c05
commit fd270cc02c
8 changed files with 206 additions and 0 deletions

View File

@@ -18,6 +18,10 @@ abstract class ExternalStringKind extends StringKind {
json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this
or
tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this
or
urlsplit(fromnode, tonode) and result.(ExternalUrlSplitResult).getItem() = this
or
urlparse(fromnode, tonode) and result.(ExternalUrlParseResult).getItem() = this
}
}
@@ -65,6 +69,65 @@ class ExternalStringSequenceDictKind extends DictKind {
ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind }
}
/** TaintKind for the result of `urlsplit(tainted_string)` */
class ExternalUrlSplitResult extends ExternalStringSequenceKind {
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
override TaintKind getTaintOfAttribute(string name) {
result = super.getTaintOfAttribute(name)
or
(
// namedtuple field names
name = "scheme" or
name = "netloc" or
name = "path" or
name = "query" or
name = "fragment" or
// class methods
name = "username" or
name = "password" or
name = "hostname"
) and
result instanceof ExternalStringKind
}
override TaintKind getTaintOfMethodResult(string name) {
result = super.getTaintOfMethodResult(name)
or
name = "geturl" and
result instanceof ExternalStringKind
}
}
/** TaintKind for the result of `urlparse(tainted_string)` */
class ExternalUrlParseResult extends ExternalStringSequenceKind {
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
override TaintKind getTaintOfAttribute(string name) {
result = super.getTaintOfAttribute(name)
or
(
// namedtuple field names
name = "scheme" or
name = "netloc" or
name = "path" or
name = "params" or
name = "query" or
name = "fragment" or
// class methods
name = "username" or
name = "password" or
name = "hostname"
) and
result instanceof ExternalStringKind
}
override TaintKind getTaintOfMethodResult(string name) {
result = super.getTaintOfMethodResult(name)
or
name = "geturl" and
result instanceof ExternalStringKind
}
}
/* Helper for getTaintForStep() */
pragma[noinline]
private predicate json_subscript_taint(
@@ -83,6 +146,44 @@ private predicate json_load(ControlFlowNode fromnode, CallNode tonode) {
)
}
private predicate urlsplit(ControlFlowNode fromnode, CallNode tonode) {
// This could be implemented as `exists(FunctionValue` without the explicit six part,
// but then our tests will need to import +100 modules, so for now this slightly
// altered version gets to live on.
exists(Value urlsplit |
(
urlsplit = Value::named("six.moves.urllib.parse.urlsplit")
or
// Python 2
urlsplit = Value::named("urlparse.urlsplit")
or
// Python 3
urlsplit = Value::named("urllib.parse.urlsplit")
) and
tonode = urlsplit.getACall() and
tonode.getArg(0) = fromnode
)
}
private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) {
// This could be implemented as `exists(FunctionValue` without the explicit six part,
// but then our tests will need to import +100 modules, so for now this slightly
// altered version gets to live on.
exists(Value urlparse |
(
urlparse = Value::named("six.moves.urllib.parse.urlparse")
or
// Python 2
urlparse = Value::named("urlparse.urlparse")
or
// Python 3
urlparse = Value::named("urllib.parse.urlparse")
) and
tonode = urlparse.getACall() and
tonode.getArg(0) = fromnode
)
}
/** A kind of "taint", representing an open file-like object from an external source. */
class ExternalFileObject extends TaintKind {
ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" }

View File

@@ -0,0 +1,27 @@
import python
import semmle.python.security.TaintTracking
import semmle.python.security.strings.Untrusted
class SimpleSource extends TaintSource {
SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" }
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind }
override string toString() { result = "taint source" }
}
class ListSource extends TaintSource {
ListSource() { this.(NameNode).getId() = "TAINTED_LIST" }
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind }
override string toString() { result = "list taint source" }
}
class DictSource extends TaintSource {
DictSource() { this.(NameNode).getId() = "TAINTED_DICT" }
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind }
override string toString() { result = "dict taint source" }
}

View File

@@ -0,0 +1,11 @@
| test.py:13 | test_basic | a | externally controlled string |
| test.py:13 | test_basic | b | externally controlled string |
| test.py:13 | test_basic | c | externally controlled string |
| test.py:13 | test_basic | d | externally controlled string |
| test.py:13 | test_basic | urlsplit_res | [externally controlled string] |
| test.py:20 | test_sanitizer | Attribute | externally controlled string |
| test.py:23 | test_sanitizer | Subscript | externally controlled string |
| test.py:33 | test_namedtuple | a | NO TAINT |
| test.py:33 | test_namedtuple | b | NO TAINT |
| test.py:33 | test_namedtuple | c | NO TAINT |
| test.py:33 | test_namedtuple | d | NO TAINT |

View File

@@ -0,0 +1,18 @@
import python
import semmle.python.security.TaintTracking
import Taint
from Call call, Expr arg, string taint_string
where
call.getLocation().getFile().getShortName() = "test.py" and
call.getFunc().(Name).getId() = "test" and
arg = call.getAnArg() and
(
not exists(TaintedNode tainted | tainted.getAstNode() = arg) and
taint_string = "NO TAINT"
or
exists(TaintedNode tainted | tainted.getAstNode() = arg |
taint_string = tainted.getTaintKind().toString()
)
)
select arg.getLocation().toString(), call.getScope().(Function).getName(), arg.toString(), taint_string

View File

@@ -0,0 +1,33 @@
from six.moves.urllib.parse import urlsplit
# Currently we don't have support for namedtuples in general, but do have special support
# for `urlsplit` (and `urlparse`)
def test_basic():
tainted_string = TAINTED_STRING
urlsplit_res = urlsplit(tainted_string)
a = urlsplit_res.netloc # field access
b = urlsplit_res.hostname # property
c = urlsplit_res[3] # indexing
_, _, d, _, _ = urlsplit(tainted_string) # unpacking
test(a, b, c, d, urlsplit_res)
def test_sanitizer():
tainted_string = TAINTED_STRING
urlsplit_res = urlsplit(tainted_string)
if urlsplit_res.netloc == "OK":
test(urlsplit_res.netloc) # TODO: FP, should not be tainted here
if urlsplit_res[2] == "OK":
test(urlsplit_res[0]) # TODO: FP, should not be tainted here
def test_namedtuple():
tainted_string = TAINTED_STRING
Point = namedtuple('Point', ['x', 'y'])
p = Point('safe', tainted_string)
a = p.x
b = p.y
c = p[0]
d = p[1]
test(a, b, c, d) # TODO: FN, at least p.y and p[1] should be tainted

View File

@@ -1,3 +1,5 @@
| Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | | --> | Taint [externally controlled string] | test.py:69 | test.py:69:10:69:21 | urlsplit_res | |
| Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | | --> | Taint [externally controlled string] | test.py:69 | test.py:69:24:69:35 | urlparse_res | |
| Taint exception.info | test.py:44 | test.py:44:22:44:26 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info |
| Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:12:45:22 | func() | p1 = exception.info |
| Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:52 | test.py:52:19:52:21 | arg | p0 = exception.info |
@@ -56,6 +58,10 @@
| Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | | --> | Taint externally controlled string | test.py:58 | test.py:58:10:58:12 | res | |
| Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:44 | test.py:44:22:44:26 | taint | p1 = externally controlled string |
| Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | |
| Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | |
| Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | |
| Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | |
| Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | |
| Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | |
| Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | |
| Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | |

View File

@@ -20,3 +20,5 @@
| test.py:42 | test_str2 | c | externally controlled string |
| test.py:50 | test_exc_info | res | exception.info |
| test.py:58 | test_untrusted | res | externally controlled string |
| test.py:69 | test_urlsplit_urlparse | urlparse_res | [externally controlled string] |
| test.py:69 | test_urlsplit_urlparse | urlsplit_res | [externally controlled string] |

View File

@@ -59,3 +59,11 @@ def test_untrusted():
def exc_untrusted_call(arg):
return arg
from six.moves.urllib.parse import urlsplit, urlparse
def test_urlsplit_urlparse():
tainted_string = TAINTED_STRING
urlsplit_res = urlsplit(tainted_string)
urlparse_res = urlparse(tainted_string)
test(urlsplit_res, urlparse_res)