Merge pull request #4590 from RasmusWL/python-model-base64

Python: Model encoding/decoding with base64 module
This commit is contained in:
Taus
2020-11-02 17:00:21 +01:00
committed by GitHub
9 changed files with 287 additions and 23 deletions

View File

@@ -6,8 +6,9 @@
import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Frameworks
private import experimental.dataflow.RemoteFlowSources
private import experimental.dataflow.TaintTracking
private import experimental.semmle.python.Frameworks
/**
* A data-flow node that executes an operating system command,
@@ -113,8 +114,9 @@ module Path {
* is intended to include deserialization, unmarshalling, decoding, unpickling,
* decompressing, decrypting, parsing etc.
*
* Doing so should normally preserve taint, but it can also be a problem
* in itself, e.g. if it allows code execution or could result in denial-of-service.
* A decoding (automatically) preserves taint from input to output. However, it can
* also be a problem in itself, for example if it allows code execution or could result
* in denial-of-service.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Decoding::Range` instead.
@@ -144,8 +146,9 @@ module Decoding {
* is intended to include deserialization, unmarshalling, decoding, unpickling,
* decompressing, decrypting, parsing etc.
*
* Doing so should normally preserve taint, but it can also be a problem
* in itself, e.g. if it allows code execution or could result in denial-of-service.
* A decoding (automatically) preserves taint from input to output. However, it can
* also be a problem in itself, for example if it allows code execution or could result
* in denial-of-service.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Decoding` instead.
@@ -165,6 +168,73 @@ module Decoding {
}
}
private class DecodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(Decoding decoding |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
)
}
}
/**
* A data-flow node that encodes data to a binary or textual format. This
* is intended to include serialization, marshalling, encoding, pickling,
* compressing, encrypting, etc.
*
* An encoding (automatically) preserves taint from input to output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Encoding::Range` instead.
*/
class Encoding extends DataFlow::Node {
Encoding::Range range;
Encoding() { this = range }
/** Gets an input that is encoded by this function. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
/** Gets the output that contains the encoded data produced by this function. */
DataFlow::Node getOutput() { result = range.getOutput() }
/** Gets an identifier for the format this function decodes from, such as "JSON". */
string getFormat() { result = range.getFormat() }
}
/** Provides a class for modeling new encoding mechanisms. */
module Encoding {
/**
* A data-flow node that encodes data to a binary or textual format. This
* is intended to include serialization, marshalling, encoding, pickling,
* compressing, encrypting, etc.
*
* An encoding (automatically) preserves taint from input to output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Encoding` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that is encoded by this function. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the encoded data produced by this function. */
abstract DataFlow::Node getOutput();
/** Gets an identifier for the format this function decodes from, such as "JSON". */
abstract string getFormat();
}
}
private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(Encoding encoding |
nodeFrom = encoding.getAnInput() and
nodeTo = encoding.getOutput()
)
}
}
/**
* A data-flow node that dynamically executes Python code.
*

View File

@@ -753,6 +753,131 @@ private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
}
}
// ---------------------------------------------------------------------------
// base64
// ---------------------------------------------------------------------------
/** Gets a reference to the `base64` module. */
private DataFlow::Node base64(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("base64")
or
exists(DataFlow::TypeTracker t2 | result = base64(t2).track(t2, t))
}
/** Gets a reference to the `base64` module. */
DataFlow::Node base64() { result = base64(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `base64` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node base64_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["b64encode", "b64decode", "standard_b64encode", "standard_b64decode",
"urlsafe_b64encode", "urlsafe_b64decode", "b32encode", "b32decode", "b16encode",
"b16decode", "encodestring", "decodestring", "a85encode", "a85decode", "b85encode",
"b85decode", "encodebytes", "decodebytes"] and
(
t.start() and
result = DataFlow::importNode("base64" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = base64()
)
or
// Due to bad performance when using normal setup with `base64_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
base64_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate base64_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(base64_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `base64` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node base64_attr(string attr_name) {
result = base64_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** A call to any of the encode functions in the `base64` module. */
private class Base64EncodeCall extends Encoding::Range, DataFlow::CfgNode {
override CallNode node;
Base64EncodeCall() {
exists(string name |
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "b32encode", "b16encode",
"encodestring", "a85encode", "b85encode", "encodebytes"] and
node.getFunction() = base64_attr(name).asCfgNode()
)
}
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() {
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "encodestring", "encodebytes"] and
result = "Base64"
or
name = "b32encode" and result = "Base32"
or
name = "b16encode" and result = "Base16"
or
name = "a85encode" and result = "Ascii85"
or
name = "b85encode" and result = "Base85"
)
}
}
/** A call to any of the decode functions in the `base64` module. */
private class Base64DecodeCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;
Base64DecodeCall() {
exists(string name |
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "b32decode", "b16decode",
"decodestring", "a85decode", "b85decode", "decodebytes"] and
node.getFunction() = base64_attr(name).asCfgNode()
)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() {
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "decodestring", "decodebytes"] and
result = "Base64"
or
name = "b32decode" and result = "Base32"
or
name = "b16decode" and result = "Base16"
or
name = "a85decode" and result = "Ascii85"
or
name = "b85decode" and result = "Base85"
)
}
}
// ---------------------------------------------------------------------------
// OTHER
// ---------------------------------------------------------------------------
/**
* A call to the `startswith` method on a string.
* See https://docs.python.org/3.9/library/stdtypes.html#str.startswith