mirror of
https://github.com/github/codeql.git
synced 2026-05-02 12:15:17 +02:00
Merge pull request #1848 from markshannon/python-rationalize-taint-tracking
Python: Move TaintTracking.qll
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import python
|
||||
import semmle.python.security.TaintTracking
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
private import semmle.python.pointsto.Filters as Filters
|
||||
import semmle.python.dataflow.Legacy
|
||||
|
||||
@@ -1,3 +1,858 @@
|
||||
/* For compatibility with other language implementations */
|
||||
/**
|
||||
* # Python Taint Tracking Library
|
||||
*
|
||||
* The taint tracking library is described in three parts.
|
||||
*
|
||||
* 1. Specification of kinds, sources, sinks and flows.
|
||||
* 2. The high level query API
|
||||
* 3. The implementation.
|
||||
*
|
||||
*
|
||||
* ## Specification
|
||||
*
|
||||
* There are four parts to the specification of a taint tracking query.
|
||||
* These are:
|
||||
*
|
||||
* 1. Kinds
|
||||
*
|
||||
* The Python taint tracking library supports arbitrary kinds of taint.
|
||||
* This is useful where you want to track something related to "taint", but that is in itself not dangerous.
|
||||
* For example, we might want to track the flow of request objects.
|
||||
* Request objects are not in themselves tainted, but they do contain tainted data.
|
||||
* For example, the length or timestamp of a request may not pose a risk, but the GET or POST string probably do.
|
||||
* So, we would want to track request objects distinctly from the request data in the GET or POST field.
|
||||
*
|
||||
* Kinds can also specify additional flow steps, but we recommend using the `DataFlowExtension` module,
|
||||
* which is less likely to cause issues with unwanted recursion.
|
||||
*
|
||||
* 2. Sources
|
||||
*
|
||||
* Sources of taint can be added by importing a predefined sub-type of `TaintSource`, or by defining new ones.
|
||||
*
|
||||
* 3. Sinks (or vulnerabilities)
|
||||
*
|
||||
* Sinks can be added by importing a predefined sub-type of `TaintSink`, or by defining new ones.
|
||||
*
|
||||
* 4. Flow extensions
|
||||
*
|
||||
* Additional flow can be added by importing predefined sub-types of `DataFlowExtension::DataFlowNode`
|
||||
* or `DataFlowExtension::DataFlowVariable` or by defining new ones.
|
||||
*
|
||||
*
|
||||
* ## The high-level query API
|
||||
*
|
||||
* The `TaintedNode` fully describes the taint flow graph.
|
||||
* The full graph can be expressed as:
|
||||
*
|
||||
* ```ql
|
||||
* from TaintedNode n, TaintedNode s
|
||||
* where s = n.getASuccessor()
|
||||
* select n, s
|
||||
* ```
|
||||
*
|
||||
* The source -> sink relation can be expressed either using `TaintedNode`:
|
||||
* ```ql
|
||||
* from TaintedNode src, TaintedNode sink
|
||||
* where src.isSource() and sink.isSink() and src.getASuccessor*() = sink
|
||||
* select src, sink
|
||||
* ```
|
||||
* or, using the specification API:
|
||||
* ```ql
|
||||
* from TaintSource src, TaintSink sink
|
||||
* where src.flowsToSink(sink)
|
||||
* select src, sink
|
||||
* ```
|
||||
*
|
||||
* ## The implementation
|
||||
*
|
||||
* The data-flow graph used by the taint-tracking library is the one created by the points-to analysis,
|
||||
* and consists of the base data-flow graph defined in `semmle/python/essa/Essa.qll`
|
||||
* enhanced with precise variable flows, call graph and type information.
|
||||
* This graph is then enhanced with additional flows as specified above.
|
||||
* Since the call graph and points-to information is context sensitive, the taint graph must also be context sensitive.
|
||||
*
|
||||
* The taint graph is a directed graph where each node consists of a
|
||||
* `(CFG node, context, taint)` triple although it could be thought of more naturally
|
||||
* as a number of distinct graphs, one for each input taint-kind consisting of data flow nodes,
|
||||
* `(CFG node, context)` pairs, labelled with their `taint`.
|
||||
*
|
||||
* The `TrackedValue` used in the implementation is not the taint kind specified by the user,
|
||||
* but describes both the kind of taint and how that taint relates to any object referred to by a data-flow graph node or edge.
|
||||
* Currently, only two types of `taint` are supported: simple taint, where the object is actually tainted;
|
||||
* and attribute taint where a named attribute of the referred object is tainted.
|
||||
*
|
||||
* Support for tainted members (both specific members of tuples and the like,
|
||||
* and generic members for mutable collections) are likely to be added in the near future and other forms are possible.
|
||||
* The types of taints are hard-wired with no user-visible extension method at the moment.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.pointsto.Filters as Filters
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
private import semmle.python.dataflow.Implementation
|
||||
import semmle.python.dataflow.Configuration
|
||||
|
||||
/** A 'kind' of taint. This may be almost anything,
|
||||
* but it is typically something like a "user-defined string".
|
||||
* Examples include, data from a http request object,
|
||||
* data from an SMS or other mobile data source,
|
||||
* or, for a super secure system, environment variables or
|
||||
* the local file system.
|
||||
*/
|
||||
abstract class TaintKind extends string {
|
||||
|
||||
bindingset[this]
|
||||
TaintKind() { any() }
|
||||
|
||||
/** Gets the kind of taint that the named attribute will have if an object is tainted with this taint.
|
||||
* In other words, if `x` has this kind of taint then it implies that `x.name`
|
||||
* has `result` kind of taint.
|
||||
*/
|
||||
TaintKind getTaintOfAttribute(string name) { none() }
|
||||
|
||||
/** Gets the kind of taint results from calling the named method if an object is tainted with this taint.
|
||||
* In other words, if `x` has this kind of taint then it implies that `x.name()`
|
||||
* has `result` kind of taint.
|
||||
*/
|
||||
TaintKind getTaintOfMethodResult(string name) { none() }
|
||||
|
||||
/** Gets the taint resulting from the flow step `fromnode` -> `tonode`.
|
||||
*/
|
||||
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { none() }
|
||||
|
||||
/** Gets the taint resulting from the flow step `fromnode` -> `tonode`, with `edgeLabel`
|
||||
*/
|
||||
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
result = this.getTaintForFlowStep(fromnode, tonode) and edgeLabel = "custom taint flow step for " + this
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use `TaintFlow.additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind)` instead.
|
||||
*
|
||||
* Holds if this kind of taint passes from variable `fromvar` to variable `tovar`
|
||||
* This predicate is present for completeness. It is unlikely that any `TaintKind`
|
||||
* implementation will ever need to override it.
|
||||
*/
|
||||
predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar) { none() }
|
||||
|
||||
/** Holds if this kind of taint "taints" `expr`.
|
||||
*/
|
||||
final predicate taints(ControlFlowNode expr) {
|
||||
exists(TaintedNode n |
|
||||
n.getTaintKind() = this and n.getCfgNode() = expr
|
||||
)
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use getType() instead */
|
||||
ClassObject getClass() {
|
||||
none()
|
||||
}
|
||||
|
||||
/** Gets the class of this kind of taint.
|
||||
* For example, if this were a kind of string taint
|
||||
* the `result` would be `theStrType()`.
|
||||
*/
|
||||
ClassValue getType() {
|
||||
result.(ClassObjectInternal).getSource() = this.getClass()
|
||||
}
|
||||
|
||||
/** Gets the boolean values (may be one, neither, or both) that
|
||||
* may result from the Python expression `bool(this)`
|
||||
*/
|
||||
boolean booleanValue() {
|
||||
/* Default to true as the vast majority of taint is strings and
|
||||
* the empty string is almost always benign.
|
||||
*/
|
||||
result = true
|
||||
}
|
||||
|
||||
string repr() { result = this }
|
||||
|
||||
/** Gets the taint resulting from iterating over this kind of taint.
|
||||
* For example iterating over a text file produces lines. So iterating
|
||||
* over a tainted file would result in tainted strings
|
||||
*/
|
||||
TaintKind getTaintForIteration() { none() }
|
||||
|
||||
predicate flowStep(DataFlow::Node fromnode, DataFlow::Node tonode, string edgeLabel) {
|
||||
this.additionalFlowStepVar(fromnode.asVariable(), tonode.asVariable()) and
|
||||
edgeLabel = "custom taint variable step"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias of `TaintKind`, so the two types can be used interchangeably.
|
||||
*/
|
||||
class FlowLabel = TaintKind;
|
||||
|
||||
/** Taint kinds representing collections of other taint kind.
|
||||
* We use `{kind}` to represent a mapping of string to `kind` and
|
||||
* `[kind]` to represent a flat collection of `kind`.
|
||||
* The use of `{` and `[` is chosen to reflect dict and list literals
|
||||
* in Python. We choose a single character prefix and suffix for simplicity
|
||||
* and ease of preventing infinite recursion.
|
||||
*/
|
||||
abstract class CollectionKind extends TaintKind {
|
||||
|
||||
bindingset[this]
|
||||
CollectionKind() {
|
||||
(this.charAt(0) = "[" or this.charAt(0) = "{") and
|
||||
/* Prevent any collection kinds more than 2 deep */
|
||||
not this.charAt(2) = "[" and not this.charAt(2) = "{"
|
||||
}
|
||||
|
||||
abstract TaintKind getMember();
|
||||
|
||||
abstract predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode);
|
||||
|
||||
abstract predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode);
|
||||
}
|
||||
|
||||
/** A taint kind representing a flat collections of kinds.
|
||||
* Typically a sequence, but can include sets.
|
||||
*/
|
||||
class SequenceKind extends CollectionKind {
|
||||
|
||||
TaintKind itemKind;
|
||||
|
||||
SequenceKind() {
|
||||
this = "[" + itemKind + "]"
|
||||
}
|
||||
|
||||
TaintKind getItem() {
|
||||
result = itemKind
|
||||
}
|
||||
|
||||
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
|
||||
exists(BinaryExprNode mod |
|
||||
mod = tonode and
|
||||
mod.getOp() instanceof Mod and
|
||||
mod.getAnOperand() = fromnode and
|
||||
result = this.getItem() and
|
||||
result.getType() = ObjectInternal::builtin("str")
|
||||
)
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "pop" and result = this.getItem()
|
||||
}
|
||||
|
||||
override string repr() {
|
||||
result = "sequence of " + itemKind
|
||||
}
|
||||
|
||||
override TaintKind getTaintForIteration() {
|
||||
result = itemKind
|
||||
}
|
||||
|
||||
override TaintKind getMember() {
|
||||
result = itemKind
|
||||
}
|
||||
|
||||
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
sequence_construct(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
SequenceKind::itemFlowStep(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
module SequenceKind {
|
||||
|
||||
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
tonode.(BinaryExprNode).getAnOperand() = fromnode and edgeLabel = "binary operation"
|
||||
or
|
||||
Implementation::copyCall(fromnode, tonode) and
|
||||
edgeLabel = "dict copy"
|
||||
or
|
||||
sequence_call(fromnode, tonode) and edgeLabel = "sequence construction"
|
||||
or
|
||||
subscript_slice(fromnode, tonode) and edgeLabel = "slicing"
|
||||
}
|
||||
|
||||
predicate itemFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
|
||||
subscript_index(fromnode, tonode)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
module DictKind {
|
||||
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
Implementation::copyCall(fromnode, tonode) and
|
||||
edgeLabel = "dict copy"
|
||||
or
|
||||
tonode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
|
||||
tonode.(CallNode).getArg(0) = fromnode and
|
||||
edgeLabel = "dict() call"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Helper for sequence flow steps */
|
||||
pragma [noinline]
|
||||
private predicate subscript_index(ControlFlowNode obj, SubscriptNode sub) {
|
||||
sub.isLoad() and
|
||||
sub.getValue() = obj and
|
||||
not sub.getNode().getIndex() instanceof Slice
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
private predicate subscript_slice(ControlFlowNode obj, SubscriptNode sub) {
|
||||
sub.isLoad() and
|
||||
sub.getValue() = obj and
|
||||
sub.getNode().getIndex() instanceof Slice
|
||||
}
|
||||
|
||||
|
||||
/** A taint kind representing a mapping of objects to kinds.
|
||||
* Typically a dict, but can include other mappings.
|
||||
*/
|
||||
class DictKind extends CollectionKind {
|
||||
|
||||
TaintKind valueKind;
|
||||
|
||||
DictKind() {
|
||||
this = "{" + valueKind + "}"
|
||||
}
|
||||
|
||||
TaintKind getValue() {
|
||||
result = valueKind
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "get" and result = valueKind
|
||||
or
|
||||
name = "values" and result.(SequenceKind).getItem() = valueKind
|
||||
or
|
||||
name = "itervalues" and result.(SequenceKind).getItem() = valueKind
|
||||
}
|
||||
|
||||
override string repr() {
|
||||
result = "dict of " + valueKind
|
||||
}
|
||||
|
||||
override TaintKind getMember() {
|
||||
result = valueKind
|
||||
}
|
||||
|
||||
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
dict_construct(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
subscript_index(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** A type of sanitizer of untrusted data.
|
||||
* Examples include sanitizers for http responses, for DB access or for shell commands.
|
||||
* Usually a sanitizer can only sanitize data for one particular use.
|
||||
* For example, a sanitizer for DB commands would not be safe to use for http responses.
|
||||
*/
|
||||
abstract class Sanitizer extends string {
|
||||
|
||||
bindingset[this]
|
||||
Sanitizer() { any() }
|
||||
|
||||
/** Holds if `taint` cannot flow through `node`. */
|
||||
predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { none() }
|
||||
|
||||
/** Holds if `call` removes removes the `taint` */
|
||||
predicate sanitizingCall(TaintKind taint, FunctionObject callee) { none() }
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint` */
|
||||
predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { none() }
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint` */
|
||||
predicate sanitizingSingleEdge(TaintKind taint, SingleSuccessorGuard test) { none() }
|
||||
|
||||
/** Holds if `def` shows value to be untainted with `taint` */
|
||||
predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { none() }
|
||||
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use DataFlowExtension instead.
|
||||
* An extension to taint-flow. For adding library or framework specific flows.
|
||||
* Examples include flow from a request to untrusted part of that request or
|
||||
* from a socket to data from that socket.
|
||||
*/
|
||||
abstract class TaintFlow extends string {
|
||||
|
||||
bindingset[this]
|
||||
TaintFlow() { any() }
|
||||
|
||||
/** Holds if `fromnode` being tainted with `fromkind` will result in `tonode` being tainted with `tokind`.
|
||||
* Extensions to `TaintFlow` should override this to provide additional taint steps.
|
||||
*/
|
||||
predicate additionalFlowStep(ControlFlowNode fromnode, TaintKind fromkind, ControlFlowNode tonode, TaintKind tokind) { none() }
|
||||
|
||||
/** Holds if the given `kind` of taint passes from variable `fromvar` to variable `tovar`.
|
||||
* This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it.
|
||||
*/
|
||||
predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() }
|
||||
|
||||
/** Holds if the given `kind` of taint cannot pass from variable `fromvar` to variable `tovar`.
|
||||
* This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it.
|
||||
*/
|
||||
predicate prunedFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() }
|
||||
|
||||
}
|
||||
|
||||
/** A source of taintedness.
|
||||
* Users of the taint tracking library should override this
|
||||
* class to provide their own sources.
|
||||
*/
|
||||
abstract class TaintSource extends @py_flow_node {
|
||||
|
||||
string toString() { result = "Taint source" }
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind`
|
||||
*
|
||||
* This must be overridden by subclasses to specify sources of taint.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
abstract predicate isSourceOf(TaintKind kind);
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind` for the given context.
|
||||
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
|
||||
context.isTop() and this.isSourceOf(kind)
|
||||
}
|
||||
|
||||
Location getLocation() {
|
||||
result = this.(ControlFlowNode).getLocation()
|
||||
}
|
||||
|
||||
predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) {
|
||||
this.getLocation().hasLocationInfo(fp, bl, bc, el, ec)
|
||||
}
|
||||
|
||||
/** Gets a TaintedNode for this taint source */
|
||||
TaintedNode getATaintNode() {
|
||||
result.getCfgNode() = this and
|
||||
this.isSourceOf(result.getTaintKind(), result.getContext()) and
|
||||
result.getPath().noAttribute()
|
||||
}
|
||||
|
||||
/** Holds if taint can flow from this source to sink `sink` */
|
||||
final predicate flowsToSink(TaintKind srckind, TaintSink sink) {
|
||||
exists(TaintedNode src, TaintedNode tsink |
|
||||
src = this.getATaintNode() and
|
||||
src.getTaintKind() = srckind and
|
||||
src.flowsTo(tsink) and
|
||||
this.isSourceOf(srckind, _) and
|
||||
sink = tsink.getCfgNode() and
|
||||
sink.sinks(tsink.getTaintKind()) and
|
||||
tsink.getPath().noAttribute() and
|
||||
tsink.isSink()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if taint can flow from this source to taint sink `sink` */
|
||||
final predicate flowsToSink(TaintSink sink) {
|
||||
this.flowsToSink(_, sink)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead.
|
||||
* A source of taintedness on the ESSA data-flow graph.
|
||||
* Users of the taint tracking library can override this
|
||||
* class to provide their own sources on the ESSA graph.
|
||||
*/
|
||||
abstract class TaintedDefinition extends EssaNodeDefinition {
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind`
|
||||
*
|
||||
* This should be overridden by subclasses to specify sources of taint.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
abstract predicate isSourceOf(TaintKind kind);
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind` for the given context.
|
||||
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
|
||||
context.isTop() and this.isSourceOf(kind)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class DictUpdate extends DataFlowExtension::DataFlowNode {
|
||||
|
||||
MethodCallsiteRefinement call;
|
||||
|
||||
DictUpdate() {
|
||||
exists(CallNode c |
|
||||
c = call.getCall()
|
||||
|
|
||||
c.getFunction().(AttrNode).getName() = "update" and
|
||||
c.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override EssaVariable getASuccessorVariable() {
|
||||
call.getVariable() = result
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class SequenceExtends extends DataFlowExtension::DataFlowNode {
|
||||
|
||||
MethodCallsiteRefinement call;
|
||||
|
||||
SequenceExtends() {
|
||||
exists(CallNode c |
|
||||
c = call.getCall()
|
||||
|
|
||||
c.getFunction().(AttrNode).getName() = "extend" and
|
||||
c.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override EssaVariable getASuccessorVariable() {
|
||||
call.getVariable() = result
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** A node that is vulnerable to one or more types of taint.
|
||||
* These nodes provide the sinks when computing the taint flow graph.
|
||||
* An example would be an argument to a write to a http response object,
|
||||
* such an argument would be vulnerable to unsanitized user-input (XSS).
|
||||
*
|
||||
* Users of the taint tracking library should extend this
|
||||
* class to provide their own sink nodes.
|
||||
*/
|
||||
abstract class TaintSink extends @py_flow_node {
|
||||
|
||||
string toString() { result = "Taint sink" }
|
||||
|
||||
/**
|
||||
* Holds if `this` "sinks" taint kind `kind`
|
||||
* Typically this means that `this` is vulnerable to taint kind `kind`.
|
||||
*
|
||||
* This must be overridden by subclasses to specify vulnerabilities or other sinks of taint.
|
||||
*/
|
||||
abstract predicate sinks(TaintKind taint);
|
||||
|
||||
Location getLocation() {
|
||||
result = this.(ControlFlowNode).getLocation()
|
||||
}
|
||||
|
||||
predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) {
|
||||
this.getLocation().hasLocationInfo(fp, bl, bc, el, ec)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Extension for data-flow, to help express data-flow paths that are
|
||||
* library or framework specific and cannot be inferred by the general
|
||||
* data-flow machinery.
|
||||
*/
|
||||
module DataFlowExtension {
|
||||
|
||||
/** A control flow node that modifies the basic data-flow. */
|
||||
abstract class DataFlowNode extends @py_flow_node {
|
||||
|
||||
string toString() {
|
||||
result = "Dataflow extension node"
|
||||
}
|
||||
|
||||
/** Gets a successor node for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode() { none() }
|
||||
|
||||
/** Gets a successor variable for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`.
|
||||
* Note: This is an unlikely form of flow. See `DataFlowVariable.getASuccessorVariable()`
|
||||
*/
|
||||
EssaVariable getASuccessorVariable() { none() }
|
||||
|
||||
/** Holds if data cannot flow from `this` to `succ`,
|
||||
* even though it would normally do so.
|
||||
*/
|
||||
predicate prunedSuccessor(ControlFlowNode succ) { none() }
|
||||
|
||||
/** Gets a successor node, where the successor node will be tainted with `tokind`
|
||||
* when `this` is tainted with `fromkind`.
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { none() }
|
||||
|
||||
/** Gets a successor node for data-flow with a change of context from callee to caller
|
||||
* (going *up* the call-stack) across call-site `call`.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getAReturnSuccessorNode(CallNode call) { none() }
|
||||
|
||||
/** Gets a successor node for data-flow with a change of context from caller to callee
|
||||
* (going *down* the call-stack) across call-site `call`.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() }
|
||||
|
||||
}
|
||||
|
||||
/** Data flow variable that modifies the basic data-flow. */
|
||||
class DataFlowVariable extends EssaVariable {
|
||||
|
||||
/** Gets a successor node for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Note: This is an unlikely form of flow. See `DataFlowNode.getASuccessorNode()`
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode() { none() }
|
||||
|
||||
/** Gets a successor variable for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`.
|
||||
*/
|
||||
EssaVariable getASuccessorVariable() { none() }
|
||||
|
||||
/** Holds if data cannot flow from `this` to `succ`,
|
||||
* even though it would normally do so.
|
||||
*/
|
||||
predicate prunedSuccessor(EssaVariable succ) { none() }
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
class TaintedPathSource extends TaintTrackingNode {
|
||||
|
||||
TaintedPathSource() {
|
||||
this.isSource()
|
||||
}
|
||||
|
||||
DataFlow::Node getSource() {
|
||||
result = this.getNode()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class TaintedPathSink extends TaintTrackingNode {
|
||||
|
||||
TaintedPathSink() {
|
||||
this.isSink()
|
||||
}
|
||||
|
||||
DataFlow::Node getSink() {
|
||||
result = this.getNode()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Backwards compatible name */
|
||||
class TaintedNode = TaintTrackingNode;
|
||||
|
||||
/* Helpers for Validating classes */
|
||||
|
||||
private import semmle.python.pointsto.PointsTo
|
||||
|
||||
|
||||
/** Data flow module providing an interface compatible with
|
||||
* the other language implementations.
|
||||
*/
|
||||
module DataFlow {
|
||||
|
||||
/** Generic taint kind, source and sink classes for convenience and
|
||||
* compatibility with other language libraries
|
||||
*/
|
||||
|
||||
class Extension = DataFlowExtension::DataFlowNode;
|
||||
|
||||
deprecated abstract class Configuration extends string {
|
||||
|
||||
bindingset[this]
|
||||
Configuration() { this = this }
|
||||
|
||||
abstract predicate isSource(ControlFlowNode source);
|
||||
|
||||
abstract predicate isSink(ControlFlowNode sink);
|
||||
|
||||
private predicate hasFlowPath(TaintedNode source, TaintedNode sink) {
|
||||
source.getConfiguration() = this and
|
||||
this.isSource(source.getCfgNode()) and
|
||||
this.isSink(sink.getCfgNode()) and
|
||||
source.flowsTo(sink)
|
||||
}
|
||||
|
||||
predicate hasFlow(ControlFlowNode source, ControlFlowNode sink) {
|
||||
exists(TaintedNode psource, TaintedNode psink |
|
||||
psource.getCfgNode() = source and
|
||||
psink.getCfgNode() = sink and
|
||||
this.isSource(source) and
|
||||
this.isSink(sink) and
|
||||
this.hasFlowPath(psource, psink)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class ConfigurationAdapter extends TaintTracking::Configuration {
|
||||
|
||||
ConfigurationAdapter() {
|
||||
this instanceof Configuration
|
||||
}
|
||||
|
||||
override predicate isSource(DataFlow::Node node, TaintKind kind) {
|
||||
this.(Configuration).isSource(node.asCfgNode()) and
|
||||
kind instanceof DataFlowType
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node node, TaintKind kind) {
|
||||
this.(Configuration).isSink(node.asCfgNode()) and
|
||||
kind instanceof DataFlowType
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private newtype TDataFlowNode =
|
||||
TEssaNode(EssaVariable var)
|
||||
or
|
||||
TCfgNode(ControlFlowNode node)
|
||||
|
||||
abstract class Node extends TDataFlowNode {
|
||||
|
||||
abstract ControlFlowNode asCfgNode();
|
||||
|
||||
abstract EssaVariable asVariable();
|
||||
|
||||
abstract string toString();
|
||||
|
||||
abstract Scope getScope();
|
||||
|
||||
abstract BasicBlock getBasicBlock();
|
||||
|
||||
abstract Location getLocation();
|
||||
|
||||
AstNode asAstNode() {
|
||||
result = this.asCfgNode().getNode()
|
||||
}
|
||||
|
||||
/** For backwards compatibility -- Use asAstNode() instead */
|
||||
deprecated AstNode getNode() {
|
||||
result = this.asAstNode()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class CfgNode extends Node, TCfgNode {
|
||||
|
||||
override ControlFlowNode asCfgNode() {
|
||||
this = TCfgNode(result)
|
||||
}
|
||||
|
||||
override EssaVariable asVariable() {
|
||||
none()
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = this.asAstNode().toString()
|
||||
}
|
||||
|
||||
override Scope getScope() {
|
||||
result = this.asCfgNode().getScope()
|
||||
}
|
||||
|
||||
override BasicBlock getBasicBlock() {
|
||||
result = this.asCfgNode().getBasicBlock()
|
||||
}
|
||||
|
||||
override Location getLocation() {
|
||||
result = this.asCfgNode().getLocation()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class EssaNode extends Node, TEssaNode {
|
||||
|
||||
override ControlFlowNode asCfgNode() {
|
||||
none()
|
||||
}
|
||||
|
||||
override EssaVariable asVariable() {
|
||||
this = TEssaNode(result)
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = this.asVariable().toString()
|
||||
}
|
||||
|
||||
override Scope getScope() {
|
||||
result = this.asVariable().getScope()
|
||||
}
|
||||
|
||||
override BasicBlock getBasicBlock() {
|
||||
result = this.asVariable().getDefinition().getBasicBlock()
|
||||
}
|
||||
|
||||
override Location getLocation() {
|
||||
result = this.asVariable().getDefinition().getLocation()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class DataFlowType extends TaintKind {
|
||||
|
||||
DataFlowType() {
|
||||
this = "Data flow" and
|
||||
exists(DataFlow::Configuration c)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
pragma [noinline]
|
||||
private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) {
|
||||
dictnode.(DictNode).getAValue() = itemnode
|
||||
or
|
||||
dictnode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
|
||||
dictnode.(CallNode).getArgByName(_) = itemnode
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode seqnode) {
|
||||
seqnode.isLoad() and
|
||||
(
|
||||
seqnode.(ListNode).getElement(_) = itemnode
|
||||
or
|
||||
seqnode.(TupleNode).getElement(_) = itemnode
|
||||
or
|
||||
seqnode.(SetNode).getAnElement() = itemnode
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/* A call to construct a sequence from a sequence or iterator*/
|
||||
pragma [noinline]
|
||||
private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) {
|
||||
tonode.getArg(0) = fromnode and
|
||||
exists(ControlFlowNode cls |
|
||||
cls = tonode.getFunction() |
|
||||
cls.pointsTo(ObjectInternal::builtin("list"))
|
||||
or
|
||||
cls.pointsTo(ObjectInternal::builtin("tuple"))
|
||||
or
|
||||
cls.pointsTo(ObjectInternal::builtin("set"))
|
||||
)
|
||||
}
|
||||
|
||||
import semmle.python.security.TaintTracking
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
/** DO NOT USE.
|
||||
* Utility module to expose private parts of TaintTracking for internal testing.
|
||||
*/
|
||||
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.Implementation as TaintFlowTest
|
||||
@@ -1,858 +1,2 @@
|
||||
/**
|
||||
* # Python Taint Tracking Library
|
||||
*
|
||||
* The taint tracking library is described in three parts.
|
||||
*
|
||||
* 1. Specification of kinds, sources, sinks and flows.
|
||||
* 2. The high level query API
|
||||
* 3. The implementation.
|
||||
*
|
||||
*
|
||||
* ## Specification
|
||||
*
|
||||
* There are four parts to the specification of a taint tracking query.
|
||||
* These are:
|
||||
*
|
||||
* 1. Kinds
|
||||
*
|
||||
* The Python taint tracking library supports arbitrary kinds of taint.
|
||||
* This is useful where you want to track something related to "taint", but that is in itself not dangerous.
|
||||
* For example, we might want to track the flow of request objects.
|
||||
* Request objects are not in themselves tainted, but they do contain tainted data.
|
||||
* For example, the length or timestamp of a request may not pose a risk, but the GET or POST string probably do.
|
||||
* So, we would want to track request objects distinctly from the request data in the GET or POST field.
|
||||
*
|
||||
* Kinds can also specify additional flow steps, but we recommend using the `DataFlowExtension` module,
|
||||
* which is less likely to cause issues with unwanted recursion.
|
||||
*
|
||||
* 2. Sources
|
||||
*
|
||||
* Sources of taint can be added by importing a predefined sub-type of `TaintSource`, or by defining new ones.
|
||||
*
|
||||
* 3. Sinks (or vulnerabilities)
|
||||
*
|
||||
* Sinks can be added by importing a predefined sub-type of `TaintSink`, or by defining new ones.
|
||||
*
|
||||
* 4. Flow extensions
|
||||
*
|
||||
* Additional flow can be added by importing predefined sub-types of `DataFlowExtension::DataFlowNode`
|
||||
* or `DataFlowExtension::DataFlowVariable` or by defining new ones.
|
||||
*
|
||||
*
|
||||
* ## The high-level query API
|
||||
*
|
||||
* The `TaintedNode` fully describes the taint flow graph.
|
||||
* The full graph can be expressed as:
|
||||
*
|
||||
* ```ql
|
||||
* from TaintedNode n, TaintedNode s
|
||||
* where s = n.getASuccessor()
|
||||
* select n, s
|
||||
* ```
|
||||
*
|
||||
* The source -> sink relation can be expressed either using `TaintedNode`:
|
||||
* ```ql
|
||||
* from TaintedNode src, TaintedNode sink
|
||||
* where src.isSource() and sink.isSink() and src.getASuccessor*() = sink
|
||||
* select src, sink
|
||||
* ```
|
||||
* or, using the specification API:
|
||||
* ```ql
|
||||
* from TaintSource src, TaintSink sink
|
||||
* where src.flowsToSink(sink)
|
||||
* select src, sink
|
||||
* ```
|
||||
*
|
||||
* ## The implementation
|
||||
*
|
||||
* The data-flow graph used by the taint-tracking library is the one created by the points-to analysis,
|
||||
* and consists of the base data-flow graph defined in `semmle/python/essa/Essa.qll`
|
||||
* enhanced with precise variable flows, call graph and type information.
|
||||
* This graph is then enhanced with additional flows as specified above.
|
||||
* Since the call graph and points-to information is context sensitive, the taint graph must also be context sensitive.
|
||||
*
|
||||
* The taint graph is a directed graph where each node consists of a
|
||||
* `(CFG node, context, taint)` triple although it could be thought of more naturally
|
||||
* as a number of distinct graphs, one for each input taint-kind consisting of data flow nodes,
|
||||
* `(CFG node, context)` pairs, labelled with their `taint`.
|
||||
*
|
||||
* The `TrackedValue` used in the implementation is not the taint kind specified by the user,
|
||||
* but describes both the kind of taint and how that taint relates to any object referred to by a data-flow graph node or edge.
|
||||
* Currently, only two types of `taint` are supported: simple taint, where the object is actually tainted;
|
||||
* and attribute taint where a named attribute of the referred object is tainted.
|
||||
*
|
||||
* Support for tainted members (both specific members of tuples and the like,
|
||||
* and generic members for mutable collections) are likely to be added in the near future and other forms are possible.
|
||||
* The types of taints are hard-wired with no user-visible extension method at the moment.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.pointsto.Filters as Filters
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
private import semmle.python.dataflow.Implementation
|
||||
import semmle.python.dataflow.Configuration
|
||||
|
||||
/** A 'kind' of taint. This may be almost anything,
|
||||
* but it is typically something like a "user-defined string".
|
||||
* Examples include, data from a http request object,
|
||||
* data from an SMS or other mobile data source,
|
||||
* or, for a super secure system, environment variables or
|
||||
* the local file system.
|
||||
*/
|
||||
abstract class TaintKind extends string {
|
||||
|
||||
bindingset[this]
|
||||
TaintKind() { any() }
|
||||
|
||||
/** Gets the kind of taint that the named attribute will have if an object is tainted with this taint.
|
||||
* In other words, if `x` has this kind of taint then it implies that `x.name`
|
||||
* has `result` kind of taint.
|
||||
*/
|
||||
TaintKind getTaintOfAttribute(string name) { none() }
|
||||
|
||||
/** Gets the kind of taint results from calling the named method if an object is tainted with this taint.
|
||||
* In other words, if `x` has this kind of taint then it implies that `x.name()`
|
||||
* has `result` kind of taint.
|
||||
*/
|
||||
TaintKind getTaintOfMethodResult(string name) { none() }
|
||||
|
||||
/** Gets the taint resulting from the flow step `fromnode` -> `tonode`.
|
||||
*/
|
||||
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { none() }
|
||||
|
||||
/** Gets the taint resulting from the flow step `fromnode` -> `tonode`, with `edgeLabel`
|
||||
*/
|
||||
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
result = this.getTaintForFlowStep(fromnode, tonode) and edgeLabel = "custom taint flow step for " + this
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use `TaintFlow.additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind)` instead.
|
||||
*
|
||||
* Holds if this kind of taint passes from variable `fromvar` to variable `tovar`
|
||||
* This predicate is present for completeness. It is unlikely that any `TaintKind`
|
||||
* implementation will ever need to override it.
|
||||
*/
|
||||
predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar) { none() }
|
||||
|
||||
/** Holds if this kind of taint "taints" `expr`.
|
||||
*/
|
||||
final predicate taints(ControlFlowNode expr) {
|
||||
exists(TaintedNode n |
|
||||
n.getTaintKind() = this and n.getCfgNode() = expr
|
||||
)
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use getType() instead */
|
||||
ClassObject getClass() {
|
||||
none()
|
||||
}
|
||||
|
||||
/** Gets the class of this kind of taint.
|
||||
* For example, if this were a kind of string taint
|
||||
* the `result` would be `theStrType()`.
|
||||
*/
|
||||
ClassValue getType() {
|
||||
result.(ClassObjectInternal).getSource() = this.getClass()
|
||||
}
|
||||
|
||||
/** Gets the boolean values (may be one, neither, or both) that
|
||||
* may result from the Python expression `bool(this)`
|
||||
*/
|
||||
boolean booleanValue() {
|
||||
/* Default to true as the vast majority of taint is strings and
|
||||
* the empty string is almost always benign.
|
||||
*/
|
||||
result = true
|
||||
}
|
||||
|
||||
string repr() { result = this }
|
||||
|
||||
/** Gets the taint resulting from iterating over this kind of taint.
|
||||
* For example iterating over a text file produces lines. So iterating
|
||||
* over a tainted file would result in tainted strings
|
||||
*/
|
||||
TaintKind getTaintForIteration() { none() }
|
||||
|
||||
predicate flowStep(DataFlow::Node fromnode, DataFlow::Node tonode, string edgeLabel) {
|
||||
this.additionalFlowStepVar(fromnode.asVariable(), tonode.asVariable()) and
|
||||
edgeLabel = "custom taint variable step"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias of `TaintKind`, so the two types can be used interchangeably.
|
||||
*/
|
||||
class FlowLabel = TaintKind;
|
||||
|
||||
/** Taint kinds representing collections of other taint kind.
|
||||
* We use `{kind}` to represent a mapping of string to `kind` and
|
||||
* `[kind]` to represent a flat collection of `kind`.
|
||||
* The use of `{` and `[` is chosen to reflect dict and list literals
|
||||
* in Python. We choose a single character prefix and suffix for simplicity
|
||||
* and ease of preventing infinite recursion.
|
||||
*/
|
||||
abstract class CollectionKind extends TaintKind {
|
||||
|
||||
bindingset[this]
|
||||
CollectionKind() {
|
||||
(this.charAt(0) = "[" or this.charAt(0) = "{") and
|
||||
/* Prevent any collection kinds more than 2 deep */
|
||||
not this.charAt(2) = "[" and not this.charAt(2) = "{"
|
||||
}
|
||||
|
||||
abstract TaintKind getMember();
|
||||
|
||||
abstract predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode);
|
||||
|
||||
abstract predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode);
|
||||
}
|
||||
|
||||
/** A taint kind representing a flat collections of kinds.
|
||||
* Typically a sequence, but can include sets.
|
||||
*/
|
||||
class SequenceKind extends CollectionKind {
|
||||
|
||||
TaintKind itemKind;
|
||||
|
||||
SequenceKind() {
|
||||
this = "[" + itemKind + "]"
|
||||
}
|
||||
|
||||
TaintKind getItem() {
|
||||
result = itemKind
|
||||
}
|
||||
|
||||
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
|
||||
exists(BinaryExprNode mod |
|
||||
mod = tonode and
|
||||
mod.getOp() instanceof Mod and
|
||||
mod.getAnOperand() = fromnode and
|
||||
result = this.getItem() and
|
||||
result.getType() = ObjectInternal::builtin("str")
|
||||
)
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "pop" and result = this.getItem()
|
||||
}
|
||||
|
||||
override string repr() {
|
||||
result = "sequence of " + itemKind
|
||||
}
|
||||
|
||||
override TaintKind getTaintForIteration() {
|
||||
result = itemKind
|
||||
}
|
||||
|
||||
override TaintKind getMember() {
|
||||
result = itemKind
|
||||
}
|
||||
|
||||
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
sequence_construct(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
SequenceKind::itemFlowStep(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
module SequenceKind {
|
||||
|
||||
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
tonode.(BinaryExprNode).getAnOperand() = fromnode and edgeLabel = "binary operation"
|
||||
or
|
||||
Implementation::copyCall(fromnode, tonode) and
|
||||
edgeLabel = "dict copy"
|
||||
or
|
||||
sequence_call(fromnode, tonode) and edgeLabel = "sequence construction"
|
||||
or
|
||||
subscript_slice(fromnode, tonode) and edgeLabel = "slicing"
|
||||
}
|
||||
|
||||
predicate itemFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
|
||||
subscript_index(fromnode, tonode)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
module DictKind {
|
||||
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
|
||||
Implementation::copyCall(fromnode, tonode) and
|
||||
edgeLabel = "dict copy"
|
||||
or
|
||||
tonode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
|
||||
tonode.(CallNode).getArg(0) = fromnode and
|
||||
edgeLabel = "dict() call"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Helper for sequence flow steps */
|
||||
pragma [noinline]
|
||||
private predicate subscript_index(ControlFlowNode obj, SubscriptNode sub) {
|
||||
sub.isLoad() and
|
||||
sub.getValue() = obj and
|
||||
not sub.getNode().getIndex() instanceof Slice
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
private predicate subscript_slice(ControlFlowNode obj, SubscriptNode sub) {
|
||||
sub.isLoad() and
|
||||
sub.getValue() = obj and
|
||||
sub.getNode().getIndex() instanceof Slice
|
||||
}
|
||||
|
||||
|
||||
/** A taint kind representing a mapping of objects to kinds.
|
||||
* Typically a dict, but can include other mappings.
|
||||
*/
|
||||
class DictKind extends CollectionKind {
|
||||
|
||||
TaintKind valueKind;
|
||||
|
||||
DictKind() {
|
||||
this = "{" + valueKind + "}"
|
||||
}
|
||||
|
||||
TaintKind getValue() {
|
||||
result = valueKind
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "get" and result = valueKind
|
||||
or
|
||||
name = "values" and result.(SequenceKind).getItem() = valueKind
|
||||
or
|
||||
name = "itervalues" and result.(SequenceKind).getItem() = valueKind
|
||||
}
|
||||
|
||||
override string repr() {
|
||||
result = "dict of " + valueKind
|
||||
}
|
||||
|
||||
override TaintKind getMember() {
|
||||
result = valueKind
|
||||
}
|
||||
|
||||
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
dict_construct(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
|
||||
subscript_index(fromnode.asCfgNode(), tonode.asCfgNode())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** A type of sanitizer of untrusted data.
|
||||
* Examples include sanitizers for http responses, for DB access or for shell commands.
|
||||
* Usually a sanitizer can only sanitize data for one particular use.
|
||||
* For example, a sanitizer for DB commands would not be safe to use for http responses.
|
||||
*/
|
||||
abstract class Sanitizer extends string {
|
||||
|
||||
bindingset[this]
|
||||
Sanitizer() { any() }
|
||||
|
||||
/** Holds if `taint` cannot flow through `node`. */
|
||||
predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { none() }
|
||||
|
||||
/** Holds if `call` removes removes the `taint` */
|
||||
predicate sanitizingCall(TaintKind taint, FunctionObject callee) { none() }
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint` */
|
||||
predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { none() }
|
||||
|
||||
/** Holds if `test` shows value to be untainted with `taint` */
|
||||
predicate sanitizingSingleEdge(TaintKind taint, SingleSuccessorGuard test) { none() }
|
||||
|
||||
/** Holds if `def` shows value to be untainted with `taint` */
|
||||
predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { none() }
|
||||
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use DataFlowExtension instead.
|
||||
* An extension to taint-flow. For adding library or framework specific flows.
|
||||
* Examples include flow from a request to untrusted part of that request or
|
||||
* from a socket to data from that socket.
|
||||
*/
|
||||
abstract class TaintFlow extends string {
|
||||
|
||||
bindingset[this]
|
||||
TaintFlow() { any() }
|
||||
|
||||
/** Holds if `fromnode` being tainted with `fromkind` will result in `tonode` being tainted with `tokind`.
|
||||
* Extensions to `TaintFlow` should override this to provide additional taint steps.
|
||||
*/
|
||||
predicate additionalFlowStep(ControlFlowNode fromnode, TaintKind fromkind, ControlFlowNode tonode, TaintKind tokind) { none() }
|
||||
|
||||
/** Holds if the given `kind` of taint passes from variable `fromvar` to variable `tovar`.
|
||||
* This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it.
|
||||
*/
|
||||
predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() }
|
||||
|
||||
/** Holds if the given `kind` of taint cannot pass from variable `fromvar` to variable `tovar`.
|
||||
* This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it.
|
||||
*/
|
||||
predicate prunedFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() }
|
||||
|
||||
}
|
||||
|
||||
/** A source of taintedness.
|
||||
* Users of the taint tracking library should override this
|
||||
* class to provide their own sources.
|
||||
*/
|
||||
abstract class TaintSource extends @py_flow_node {
|
||||
|
||||
string toString() { result = "Taint source" }
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind`
|
||||
*
|
||||
* This must be overridden by subclasses to specify sources of taint.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
abstract predicate isSourceOf(TaintKind kind);
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind` for the given context.
|
||||
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
|
||||
context.isTop() and this.isSourceOf(kind)
|
||||
}
|
||||
|
||||
Location getLocation() {
|
||||
result = this.(ControlFlowNode).getLocation()
|
||||
}
|
||||
|
||||
predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) {
|
||||
this.getLocation().hasLocationInfo(fp, bl, bc, el, ec)
|
||||
}
|
||||
|
||||
/** Gets a TaintedNode for this taint source */
|
||||
TaintedNode getATaintNode() {
|
||||
result.getCfgNode() = this and
|
||||
this.isSourceOf(result.getTaintKind(), result.getContext()) and
|
||||
result.getPath().noAttribute()
|
||||
}
|
||||
|
||||
/** Holds if taint can flow from this source to sink `sink` */
|
||||
final predicate flowsToSink(TaintKind srckind, TaintSink sink) {
|
||||
exists(TaintedNode src, TaintedNode tsink |
|
||||
src = this.getATaintNode() and
|
||||
src.getTaintKind() = srckind and
|
||||
src.flowsTo(tsink) and
|
||||
this.isSourceOf(srckind, _) and
|
||||
sink = tsink.getCfgNode() and
|
||||
sink.sinks(tsink.getTaintKind()) and
|
||||
tsink.getPath().noAttribute() and
|
||||
tsink.isSink()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if taint can flow from this source to taint sink `sink` */
|
||||
final predicate flowsToSink(TaintSink sink) {
|
||||
this.flowsToSink(_, sink)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead.
|
||||
* A source of taintedness on the ESSA data-flow graph.
|
||||
* Users of the taint tracking library can override this
|
||||
* class to provide their own sources on the ESSA graph.
|
||||
*/
|
||||
abstract class TaintedDefinition extends EssaNodeDefinition {
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind`
|
||||
*
|
||||
* This should be overridden by subclasses to specify sources of taint.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
abstract predicate isSourceOf(TaintKind kind);
|
||||
|
||||
/**
|
||||
* Holds if `this` is a source of taint kind `kind` for the given context.
|
||||
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
|
||||
*
|
||||
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
|
||||
*/
|
||||
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
|
||||
context.isTop() and this.isSourceOf(kind)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class DictUpdate extends DataFlowExtension::DataFlowNode {
|
||||
|
||||
MethodCallsiteRefinement call;
|
||||
|
||||
DictUpdate() {
|
||||
exists(CallNode c |
|
||||
c = call.getCall()
|
||||
|
|
||||
c.getFunction().(AttrNode).getName() = "update" and
|
||||
c.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override EssaVariable getASuccessorVariable() {
|
||||
call.getVariable() = result
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class SequenceExtends extends DataFlowExtension::DataFlowNode {
|
||||
|
||||
MethodCallsiteRefinement call;
|
||||
|
||||
SequenceExtends() {
|
||||
exists(CallNode c |
|
||||
c = call.getCall()
|
||||
|
|
||||
c.getFunction().(AttrNode).getName() = "extend" and
|
||||
c.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override EssaVariable getASuccessorVariable() {
|
||||
call.getVariable() = result
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** A node that is vulnerable to one or more types of taint.
|
||||
* These nodes provide the sinks when computing the taint flow graph.
|
||||
* An example would be an argument to a write to a http response object,
|
||||
* such an argument would be vulnerable to unsanitized user-input (XSS).
|
||||
*
|
||||
* Users of the taint tracking library should extend this
|
||||
* class to provide their own sink nodes.
|
||||
*/
|
||||
abstract class TaintSink extends @py_flow_node {
|
||||
|
||||
string toString() { result = "Taint sink" }
|
||||
|
||||
/**
|
||||
* Holds if `this` "sinks" taint kind `kind`
|
||||
* Typically this means that `this` is vulnerable to taint kind `kind`.
|
||||
*
|
||||
* This must be overridden by subclasses to specify vulnerabilities or other sinks of taint.
|
||||
*/
|
||||
abstract predicate sinks(TaintKind taint);
|
||||
|
||||
Location getLocation() {
|
||||
result = this.(ControlFlowNode).getLocation()
|
||||
}
|
||||
|
||||
predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) {
|
||||
this.getLocation().hasLocationInfo(fp, bl, bc, el, ec)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Extension for data-flow, to help express data-flow paths that are
|
||||
* library or framework specific and cannot be inferred by the general
|
||||
* data-flow machinery.
|
||||
*/
|
||||
module DataFlowExtension {
|
||||
|
||||
/** A control flow node that modifies the basic data-flow. */
|
||||
abstract class DataFlowNode extends @py_flow_node {
|
||||
|
||||
string toString() {
|
||||
result = "Dataflow extension node"
|
||||
}
|
||||
|
||||
/** Gets a successor node for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode() { none() }
|
||||
|
||||
/** Gets a successor variable for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`.
|
||||
* Note: This is an unlikely form of flow. See `DataFlowVariable.getASuccessorVariable()`
|
||||
*/
|
||||
EssaVariable getASuccessorVariable() { none() }
|
||||
|
||||
/** Holds if data cannot flow from `this` to `succ`,
|
||||
* even though it would normally do so.
|
||||
*/
|
||||
predicate prunedSuccessor(ControlFlowNode succ) { none() }
|
||||
|
||||
/** Gets a successor node, where the successor node will be tainted with `tokind`
|
||||
* when `this` is tainted with `fromkind`.
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { none() }
|
||||
|
||||
/** Gets a successor node for data-flow with a change of context from callee to caller
|
||||
* (going *up* the call-stack) across call-site `call`.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getAReturnSuccessorNode(CallNode call) { none() }
|
||||
|
||||
/** Gets a successor node for data-flow with a change of context from caller to callee
|
||||
* (going *down* the call-stack) across call-site `call`.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
|
||||
*/
|
||||
ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() }
|
||||
|
||||
}
|
||||
|
||||
/** Data flow variable that modifies the basic data-flow. */
|
||||
class DataFlowVariable extends EssaVariable {
|
||||
|
||||
/** Gets a successor node for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`
|
||||
* Note: This is an unlikely form of flow. See `DataFlowNode.getASuccessorNode()`
|
||||
*/
|
||||
ControlFlowNode getASuccessorNode() { none() }
|
||||
|
||||
/** Gets a successor variable for data-flow.
|
||||
* Data (all forms) is assumed to flow from `this` to `result`.
|
||||
*/
|
||||
EssaVariable getASuccessorVariable() { none() }
|
||||
|
||||
/** Holds if data cannot flow from `this` to `succ`,
|
||||
* even though it would normally do so.
|
||||
*/
|
||||
predicate prunedSuccessor(EssaVariable succ) { none() }
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
class TaintedPathSource extends TaintTrackingNode {
|
||||
|
||||
TaintedPathSource() {
|
||||
this.isSource()
|
||||
}
|
||||
|
||||
DataFlow::Node getSource() {
|
||||
result = this.getNode()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class TaintedPathSink extends TaintTrackingNode {
|
||||
|
||||
TaintedPathSink() {
|
||||
this.isSink()
|
||||
}
|
||||
|
||||
DataFlow::Node getSink() {
|
||||
result = this.getNode()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Backwards compatible name */
|
||||
class TaintedNode = TaintTrackingNode;
|
||||
|
||||
/* Helpers for Validating classes */
|
||||
|
||||
private import semmle.python.pointsto.PointsTo
|
||||
|
||||
|
||||
/** Data flow module providing an interface compatible with
|
||||
* the other language implementations.
|
||||
*/
|
||||
module DataFlow {
|
||||
|
||||
/** Generic taint kind, source and sink classes for convenience and
|
||||
* compatibility with other language libraries
|
||||
*/
|
||||
|
||||
class Extension = DataFlowExtension::DataFlowNode;
|
||||
|
||||
deprecated abstract class Configuration extends string {
|
||||
|
||||
bindingset[this]
|
||||
Configuration() { this = this }
|
||||
|
||||
abstract predicate isSource(ControlFlowNode source);
|
||||
|
||||
abstract predicate isSink(ControlFlowNode sink);
|
||||
|
||||
private predicate hasFlowPath(TaintedNode source, TaintedNode sink) {
|
||||
source.getConfiguration() = this and
|
||||
this.isSource(source.getCfgNode()) and
|
||||
this.isSink(sink.getCfgNode()) and
|
||||
source.flowsTo(sink)
|
||||
}
|
||||
|
||||
predicate hasFlow(ControlFlowNode source, ControlFlowNode sink) {
|
||||
exists(TaintedNode psource, TaintedNode psink |
|
||||
psource.getCfgNode() = source and
|
||||
psink.getCfgNode() = sink and
|
||||
this.isSource(source) and
|
||||
this.isSink(sink) and
|
||||
this.hasFlowPath(psource, psink)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class ConfigurationAdapter extends TaintTracking::Configuration {
|
||||
|
||||
ConfigurationAdapter() {
|
||||
this instanceof Configuration
|
||||
}
|
||||
|
||||
override predicate isSource(DataFlow::Node node, TaintKind kind) {
|
||||
this.(Configuration).isSource(node.asCfgNode()) and
|
||||
kind instanceof DataFlowType
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node node, TaintKind kind) {
|
||||
this.(Configuration).isSink(node.asCfgNode()) and
|
||||
kind instanceof DataFlowType
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private newtype TDataFlowNode =
|
||||
TEssaNode(EssaVariable var)
|
||||
or
|
||||
TCfgNode(ControlFlowNode node)
|
||||
|
||||
abstract class Node extends TDataFlowNode {
|
||||
|
||||
abstract ControlFlowNode asCfgNode();
|
||||
|
||||
abstract EssaVariable asVariable();
|
||||
|
||||
abstract string toString();
|
||||
|
||||
abstract Scope getScope();
|
||||
|
||||
abstract BasicBlock getBasicBlock();
|
||||
|
||||
abstract Location getLocation();
|
||||
|
||||
AstNode asAstNode() {
|
||||
result = this.asCfgNode().getNode()
|
||||
}
|
||||
|
||||
/** For backwards compatibility -- Use asAstNode() instead */
|
||||
deprecated AstNode getNode() {
|
||||
result = this.asAstNode()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class CfgNode extends Node, TCfgNode {
|
||||
|
||||
override ControlFlowNode asCfgNode() {
|
||||
this = TCfgNode(result)
|
||||
}
|
||||
|
||||
override EssaVariable asVariable() {
|
||||
none()
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = this.asAstNode().toString()
|
||||
}
|
||||
|
||||
override Scope getScope() {
|
||||
result = this.asCfgNode().getScope()
|
||||
}
|
||||
|
||||
override BasicBlock getBasicBlock() {
|
||||
result = this.asCfgNode().getBasicBlock()
|
||||
}
|
||||
|
||||
override Location getLocation() {
|
||||
result = this.asCfgNode().getLocation()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class EssaNode extends Node, TEssaNode {
|
||||
|
||||
override ControlFlowNode asCfgNode() {
|
||||
none()
|
||||
}
|
||||
|
||||
override EssaVariable asVariable() {
|
||||
this = TEssaNode(result)
|
||||
}
|
||||
|
||||
override string toString() {
|
||||
result = this.asVariable().toString()
|
||||
}
|
||||
|
||||
override Scope getScope() {
|
||||
result = this.asVariable().getScope()
|
||||
}
|
||||
|
||||
override BasicBlock getBasicBlock() {
|
||||
result = this.asVariable().getDefinition().getBasicBlock()
|
||||
}
|
||||
|
||||
override Location getLocation() {
|
||||
result = this.asVariable().getDefinition().getLocation()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class DataFlowType extends TaintKind {
|
||||
|
||||
DataFlowType() {
|
||||
this = "Data flow" and
|
||||
exists(DataFlow::Configuration c)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
pragma [noinline]
|
||||
private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) {
|
||||
dictnode.(DictNode).getAValue() = itemnode
|
||||
or
|
||||
dictnode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
|
||||
dictnode.(CallNode).getArgByName(_) = itemnode
|
||||
}
|
||||
|
||||
pragma [noinline]
|
||||
private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode seqnode) {
|
||||
seqnode.isLoad() and
|
||||
(
|
||||
seqnode.(ListNode).getElement(_) = itemnode
|
||||
or
|
||||
seqnode.(TupleNode).getElement(_) = itemnode
|
||||
or
|
||||
seqnode.(SetNode).getAnElement() = itemnode
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
/* A call to construct a sequence from a sequence or iterator*/
|
||||
pragma [noinline]
|
||||
private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) {
|
||||
tonode.getArg(0) = fromnode and
|
||||
exists(ControlFlowNode cls |
|
||||
cls = tonode.getFunction() |
|
||||
cls.pointsTo(ObjectInternal::builtin("list"))
|
||||
or
|
||||
cls.pointsTo(ObjectInternal::builtin("tuple"))
|
||||
or
|
||||
cls.pointsTo(ObjectInternal::builtin("set"))
|
||||
)
|
||||
}
|
||||
|
||||
/** For backwards compatibility */
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import python
|
||||
import semmle.python.security.TaintTest
|
||||
import TaintLib
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import python
|
||||
import semmle.python.security.TaintTest
|
||||
import TaintLib
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user