mirror of
https://github.com/github/codeql.git
synced 2026-04-29 10:45:15 +02:00
Merge pull request #4590 from RasmusWL/python-model-base64
Python: Model encoding/decoding with base64 module
This commit is contained in:
@@ -6,8 +6,9 @@
|
||||
|
||||
import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.semmle.python.Frameworks
|
||||
private import experimental.dataflow.RemoteFlowSources
|
||||
private import experimental.dataflow.TaintTracking
|
||||
private import experimental.semmle.python.Frameworks
|
||||
|
||||
/**
|
||||
* A data-flow node that executes an operating system command,
|
||||
@@ -113,8 +114,9 @@ module Path {
|
||||
* is intended to include deserialization, unmarshalling, decoding, unpickling,
|
||||
* decompressing, decrypting, parsing etc.
|
||||
*
|
||||
* Doing so should normally preserve taint, but it can also be a problem
|
||||
* in itself, e.g. if it allows code execution or could result in denial-of-service.
|
||||
* A decoding (automatically) preserves taint from input to output. However, it can
|
||||
* also be a problem in itself, for example if it allows code execution or could result
|
||||
* in denial-of-service.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Decoding::Range` instead.
|
||||
@@ -144,8 +146,9 @@ module Decoding {
|
||||
* is intended to include deserialization, unmarshalling, decoding, unpickling,
|
||||
* decompressing, decrypting, parsing etc.
|
||||
*
|
||||
* Doing so should normally preserve taint, but it can also be a problem
|
||||
* in itself, e.g. if it allows code execution or could result in denial-of-service.
|
||||
* A decoding (automatically) preserves taint from input to output. However, it can
|
||||
* also be a problem in itself, for example if it allows code execution or could result
|
||||
* in denial-of-service.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Decoding` instead.
|
||||
@@ -165,6 +168,73 @@ module Decoding {
|
||||
}
|
||||
}
|
||||
|
||||
private class DecodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(Decoding decoding |
|
||||
nodeFrom = decoding.getAnInput() and
|
||||
nodeTo = decoding.getOutput()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that encodes data to a binary or textual format. This
|
||||
* is intended to include serialization, marshalling, encoding, pickling,
|
||||
* compressing, encrypting, etc.
|
||||
*
|
||||
* An encoding (automatically) preserves taint from input to output.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Encoding::Range` instead.
|
||||
*/
|
||||
class Encoding extends DataFlow::Node {
|
||||
Encoding::Range range;
|
||||
|
||||
Encoding() { this = range }
|
||||
|
||||
/** Gets an input that is encoded by this function. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the encoded data produced by this function. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
string getFormat() { result = range.getFormat() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new encoding mechanisms. */
|
||||
module Encoding {
|
||||
/**
|
||||
* A data-flow node that encodes data to a binary or textual format. This
|
||||
* is intended to include serialization, marshalling, encoding, pickling,
|
||||
* compressing, encrypting, etc.
|
||||
*
|
||||
* An encoding (automatically) preserves taint from input to output.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Encoding` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that is encoded by this function. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the encoded data produced by this function. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
abstract string getFormat();
|
||||
}
|
||||
}
|
||||
|
||||
private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(Encoding encoding |
|
||||
nodeFrom = encoding.getAnInput() and
|
||||
nodeTo = encoding.getOutput()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
|
||||
@@ -753,6 +753,131 @@ private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// base64
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `base64` module. */
|
||||
private DataFlow::Node base64(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("base64")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = base64(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `base64` module. */
|
||||
DataFlow::Node base64() { result = base64(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `base64` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node base64_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["b64encode", "b64decode", "standard_b64encode", "standard_b64decode",
|
||||
"urlsafe_b64encode", "urlsafe_b64decode", "b32encode", "b32decode", "b16encode",
|
||||
"b16decode", "encodestring", "decodestring", "a85encode", "a85decode", "b85encode",
|
||||
"b85decode", "encodebytes", "decodebytes"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("base64" + "." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = base64()
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `base64_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
base64_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate base64_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(base64_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `base64` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node base64_attr(string attr_name) {
|
||||
result = base64_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
/** A call to any of the encode functions in the `base64` module. */
|
||||
private class Base64EncodeCall extends Encoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
Base64EncodeCall() {
|
||||
exists(string name |
|
||||
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "b32encode", "b16encode",
|
||||
"encodestring", "a85encode", "b85encode", "encodebytes"] and
|
||||
node.getFunction() = base64_attr(name).asCfgNode()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() {
|
||||
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
|
||||
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "encodestring", "encodebytes"] and
|
||||
result = "Base64"
|
||||
or
|
||||
name = "b32encode" and result = "Base32"
|
||||
or
|
||||
name = "b16encode" and result = "Base16"
|
||||
or
|
||||
name = "a85encode" and result = "Ascii85"
|
||||
or
|
||||
name = "b85encode" and result = "Base85"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A call to any of the decode functions in the `base64` module. */
|
||||
private class Base64DecodeCall extends Decoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
Base64DecodeCall() {
|
||||
exists(string name |
|
||||
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "b32decode", "b16decode",
|
||||
"decodestring", "a85decode", "b85decode", "decodebytes"] and
|
||||
node.getFunction() = base64_attr(name).asCfgNode()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate mayExecuteInput() { none() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() {
|
||||
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
|
||||
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "decodestring", "decodebytes"] and
|
||||
result = "Base64"
|
||||
or
|
||||
name = "b32decode" and result = "Base32"
|
||||
or
|
||||
name = "b16decode" and result = "Base16"
|
||||
or
|
||||
name = "a85decode" and result = "Ascii85"
|
||||
or
|
||||
name = "b85decode" and result = "Base85"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OTHER
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* A call to the `startswith` method on a string.
|
||||
* See https://docs.python.org/3.9/library/stdtypes.html#str.startswith
|
||||
|
||||
@@ -16,12 +16,12 @@
|
||||
| test_string.py:17 | ok | str_methods | ts.casefold() |
|
||||
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
|
||||
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |
|
||||
| test_string.py:31 | fail | binary_decode_encode | base64.a85encode(..) |
|
||||
| test_string.py:32 | fail | binary_decode_encode | base64.a85decode(..) |
|
||||
| test_string.py:35 | fail | binary_decode_encode | base64.b85encode(..) |
|
||||
| test_string.py:36 | fail | binary_decode_encode | base64.b85decode(..) |
|
||||
| test_string.py:39 | fail | binary_decode_encode | base64.encodebytes(..) |
|
||||
| test_string.py:40 | fail | binary_decode_encode | base64.decodebytes(..) |
|
||||
| test_string.py:31 | ok | binary_decode_encode | base64.a85encode(..) |
|
||||
| test_string.py:32 | ok | binary_decode_encode | base64.a85decode(..) |
|
||||
| test_string.py:35 | ok | binary_decode_encode | base64.b85encode(..) |
|
||||
| test_string.py:36 | ok | binary_decode_encode | base64.b85decode(..) |
|
||||
| test_string.py:39 | ok | binary_decode_encode | base64.encodebytes(..) |
|
||||
| test_string.py:40 | ok | binary_decode_encode | base64.decodebytes(..) |
|
||||
| test_string.py:48 | ok | f_strings | Fstring |
|
||||
| test_unpacking.py:18 | ok | extended_unpacking | first |
|
||||
| test_unpacking.py:18 | ok | extended_unpacking | last |
|
||||
|
||||
@@ -123,18 +123,18 @@
|
||||
| test_string.py:114 | ok | percent_fmt | BinaryExpr |
|
||||
| test_string.py:115 | ok | percent_fmt | BinaryExpr |
|
||||
| test_string.py:116 | ok | percent_fmt | BinaryExpr |
|
||||
| test_string.py:126 | fail | binary_decode_encode | base64.b64encode(..) |
|
||||
| test_string.py:127 | fail | binary_decode_encode | base64.b64decode(..) |
|
||||
| test_string.py:129 | fail | binary_decode_encode | base64.standard_b64encode(..) |
|
||||
| test_string.py:130 | fail | binary_decode_encode | base64.standard_b64decode(..) |
|
||||
| test_string.py:132 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
|
||||
| test_string.py:133 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
|
||||
| test_string.py:135 | fail | binary_decode_encode | base64.b32encode(..) |
|
||||
| test_string.py:136 | fail | binary_decode_encode | base64.b32decode(..) |
|
||||
| test_string.py:138 | fail | binary_decode_encode | base64.b16encode(..) |
|
||||
| test_string.py:139 | fail | binary_decode_encode | base64.b16decode(..) |
|
||||
| test_string.py:142 | fail | binary_decode_encode | base64.encodestring(..) |
|
||||
| test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) |
|
||||
| test_string.py:126 | ok | binary_decode_encode | base64.b64encode(..) |
|
||||
| test_string.py:127 | ok | binary_decode_encode | base64.b64decode(..) |
|
||||
| test_string.py:129 | ok | binary_decode_encode | base64.standard_b64encode(..) |
|
||||
| test_string.py:130 | ok | binary_decode_encode | base64.standard_b64decode(..) |
|
||||
| test_string.py:132 | ok | binary_decode_encode | base64.urlsafe_b64encode(..) |
|
||||
| test_string.py:133 | ok | binary_decode_encode | base64.urlsafe_b64decode(..) |
|
||||
| test_string.py:135 | ok | binary_decode_encode | base64.b32encode(..) |
|
||||
| test_string.py:136 | ok | binary_decode_encode | base64.b32decode(..) |
|
||||
| test_string.py:138 | ok | binary_decode_encode | base64.b16encode(..) |
|
||||
| test_string.py:139 | ok | binary_decode_encode | base64.b16decode(..) |
|
||||
| test_string.py:142 | ok | binary_decode_encode | base64.encodestring(..) |
|
||||
| test_string.py:143 | ok | binary_decode_encode | base64.decodestring(..) |
|
||||
| test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) |
|
||||
| test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) |
|
||||
| test_string.py:159 | ok | test_os_path_join | os.path.join(..) |
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
import base64
|
||||
|
||||
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py
|
||||
base64.a85decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Ascii85
|
||||
base64.b85decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base85
|
||||
base64.decodebytes(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
|
||||
@@ -0,0 +1,6 @@
|
||||
import base64
|
||||
|
||||
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py
|
||||
base64.a85encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Ascii85
|
||||
base64.b85encode(bs)# $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base85
|
||||
base64.encodebytes(bs)# $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
|
||||
@@ -1,5 +1,15 @@
|
||||
import pickle
|
||||
import marshal
|
||||
import base64
|
||||
|
||||
pickle.loads(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=pickle $decodeMayExecuteInput
|
||||
marshal.loads(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=marshal $decodeMayExecuteInput
|
||||
|
||||
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
|
||||
base64.b64decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
|
||||
base64.standard_b64decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
|
||||
base64.urlsafe_b64decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
|
||||
base64.b32decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base32
|
||||
base64.b16decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base16
|
||||
# deprecated since Python 3.1, but still works
|
||||
base64.decodestring(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
import pickle
|
||||
import marshal
|
||||
import base64
|
||||
|
||||
pickle.dumps(obj) # $f-:encodeInput=obj $f-:encodeOutput=Attribute() $f-:encodeFormat=pickle $f-:encodeMayExecuteInput
|
||||
marshal.dumps(obj) # $f-:encodeInput=obj $f-:encodeOutput=Attribute() $f-:encodeFormat=marshal $f-:encodeMayExecuteInput
|
||||
|
||||
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
|
||||
base64.b64encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
|
||||
base64.standard_b64encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
|
||||
base64.urlsafe_b64encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
|
||||
base64.b32encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base32
|
||||
base64.b16encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base16
|
||||
# deprecated since Python 3.1, but still works
|
||||
base64.encodestring(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
|
||||
@@ -73,6 +73,38 @@ class DecodingTest extends InlineExpectationsTest {
|
||||
}
|
||||
}
|
||||
|
||||
class EncodingTest extends InlineExpectationsTest {
|
||||
EncodingTest() { this = "EncodingTest" }
|
||||
|
||||
override string getARelevantTag() { result in ["encodeInput", "encodeOutput", "encodeFormat"] }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
exists(Encoding e |
|
||||
exists(DataFlow::Node data |
|
||||
location = data.getLocation() and
|
||||
element = data.toString() and
|
||||
value = value_from_expr(data.asExpr()) and
|
||||
(
|
||||
data = e.getAnInput() and
|
||||
tag = "encodeInput"
|
||||
or
|
||||
data = e.getOutput() and
|
||||
tag = "encodeOutput"
|
||||
)
|
||||
)
|
||||
or
|
||||
exists(string format |
|
||||
location = e.getLocation() and
|
||||
element = format and
|
||||
value = format and
|
||||
format = e.getFormat() and
|
||||
tag = "encodeFormat"
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class CodeExecutionTest extends InlineExpectationsTest {
|
||||
CodeExecutionTest() { this = "CodeExecutionTest" }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user