Python taint-tracking. Fill in most of new configuration-base taint-tracking implementation.

This commit is contained in:
Mark Shannon
2019-07-29 15:25:11 +01:00
parent e8bd9e7341
commit eed2090168
5 changed files with 570 additions and 30 deletions

View File

@@ -1,5 +1,7 @@
import python
import semmle.python.security.TaintTracking
private import semmle.python.objects.ObjectInternal
private import semmle.python.dataflow.Implementation
module TaintTracking {
@@ -7,12 +9,12 @@ module TaintTracking {
class Sink = TaintSink;
class PathSource = TaintedPathSource;
class PathSink = TaintedPathSink;
class Extension = DataFlowExtension::DataFlowNode;
class PathSource = TaintTrackingNode;
class PathSink = TaintTrackingNode;
abstract class Configuration extends string {
/* Required to prevent compiler warning */
@@ -52,7 +54,7 @@ module TaintTracking {
/**
* Holds if `src -> dest` is a flow edge converting taint from `srckind` to `destkind`.
*/
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node trg, TaintKind srckind, TaintKind destkind) {
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind) {
none()
}
@@ -65,6 +67,11 @@ module TaintTracking {
*/
predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node trg) { none() }
/**
* Holds if control flow from `test` along the `isTrue` edge is prohibited.
*/
predicate isBarrierTest(ControlFlowNode test, boolean isTrue) { none() }
/**
* Holds if flow from `src` to `dest` is prohibited when the incoming taint is `srckind` and the outgoing taint is `destkind`.
* Note that `srckind` and `destkind` can be the same.
@@ -74,9 +81,7 @@ module TaintTracking {
/* Common query API */
predicate hasFlowPath(PathSource source, PathSink sink) {
this.isSource(source.getNode()) and
this.isSink(sink.getNode()) and
source.flowsTo(sink)
this.(TaintTrackingImplementation).hasFlowPath(source, sink)
}
/* Old query API */
@@ -84,8 +89,8 @@ module TaintTracking {
deprecated predicate hasFlow(Source source, Sink sink) {
exists(PathSource psource, PathSink psink |
this.hasFlowPath(psource, psink) and
source = psource.getCfgNode() and
sink = psink.getCfgNode()
source = psource.getNode().asCfgNode() and
sink = psink.getNode().asCfgNode()
)
}
@@ -103,4 +108,3 @@ module TaintTracking {
}
}

View File

@@ -0,0 +1,440 @@
import python
import semmle.python.security.TaintTracking
private import semmle.python.objects.ObjectInternal
newtype TTaintTrackingContext =
TNoParam()
or
TParamContext(TaintKind param, int n) {
exists(CallNode call |
param.taints(call.getArg(n))
)
}
class TaintTrackingContext extends TTaintTrackingContext {
string toString() {
this = TNoParam() and result = "No context"
or
exists(TaintKind param, int n |
this = TParamContext(param, n) and
result = "Parameter " + n.toString() + " is " + param
)
}
TaintKind getParameterTaint(int n) {
this = TParamContext(result, n)
}
TaintTrackingContext getCaller() {
exists(TaintKind param, int n |
this = TParamContext(param, n) and
exists(TaintTrackingImplementation impl |
impl.callWithTaintedArgument(_, _, result, _, n, TNoAttribute(), param)
)
)
}
}
private newtype TAttributePath =
TNoAttribute()
or
TAttribute(string name) {
none()
}
or
TAttributeAttribute(string name1, string name2) {
none()
}
abstract class AttributePath extends TAttributePath {
abstract string toString();
abstract string extension();
abstract AttributePath fromAttribute(string name);
AttributePath getAttribute(string name) {
this = result.fromAttribute(name)
}
predicate noAttribute() {
this = TNoAttribute()
}
}
class NoAttribute extends TNoAttribute, AttributePath {
override string toString() { result = "no attribute" }
override string extension() { result = "" }
override AttributePath fromAttribute(string name) {
none()
}
}
newtype TTaintTrackingNode =
TTaintTrackingNode_(DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, TaintTracking::Configuration config) {
config.(TaintTrackingImplementation).flowStep(_, node, context, path, kind)
or
config.isSource(node, kind) and context = TNoParam() and path = TNoAttribute()
or
exists(TaintSource source |
config.isSource(source) and
node.asCfgNode() = source and
source.isSourceOf(kind)
) and
context = TNoParam() and path = TNoAttribute()
}
class TaintTrackingNode extends TTaintTrackingNode {
string toString() { result = this.getTaintKind() + " at " + this.getNode().getLocation() }
DataFlow::Node getNode() {
this = TTaintTrackingNode_(result, _, _, _, _)
}
TaintKind getTaintKind() {
this = TTaintTrackingNode_(_, _, _, result, _)
}
TaintTrackingContext getContext() {
this = TTaintTrackingNode_(_, result, _, _, _)
}
AttributePath getPath() {
this = TTaintTrackingNode_(_, _, result, _, _)
}
TaintTracking::Configuration getConfiguration() {
this = TTaintTrackingNode_(_, _, _, _, result)
}
Location getLocation() {
result = this.getNode().getLocation()
}
TaintTrackingNode getASuccessor() {
exists(DataFlow::Node node, TaintTrackingContext ctx, AttributePath path,
TaintKind kind, TaintTracking::Configuration config |
result = TTaintTrackingNode_(node, ctx, path, kind, config) and
config.(TaintTrackingImplementation).flowStep(this, node, ctx, path, kind)
)
}
predicate isSource() {
this.getConfiguration().(TaintTrackingImplementation).isPathSource(this)
}
predicate isSink() {
this.getConfiguration().(TaintTrackingImplementation).isPathSink(this)
}
}
class TaintTrackingImplementation extends string {
predicate hasFlowPath(TaintTrackingNode source, TaintTrackingNode sink) {
this.isPathSource(source) and
this.isPathSink(sink) and
sink = source.getASuccessor*()
}
predicate isPathSource(TaintTrackingNode source) {
exists(DataFlow::Node srcnode, TaintKind kind |
source = TTaintTrackingNode_(srcnode, TNoParam(), TNoAttribute(), kind, this) and
this.(TaintTracking::Configuration).isSource(srcnode, kind)
)
}
predicate isPathSink(TaintTrackingNode sink) {
exists(DataFlow::Node sinknode, TaintKind kind |
sink = TTaintTrackingNode_(sinknode, TNoParam(), TNoAttribute(), kind, this) and
this.(TaintTracking::Configuration).isSink(sinknode, kind)
)
}
predicate flowStep(TaintTrackingNode src, TaintTrackingNode dest) {
exists(DataFlow::Node node, TaintTrackingContext ctx, AttributePath path, TaintKind kind |
dest = TTaintTrackingNode_(node, ctx, path, kind, this) and
this.flowStep(src, node, ctx, path, kind)
)
}
TaintTrackingImplementation() { this instanceof TaintTracking::Configuration }
predicate flowStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
this.unprunedStep(src, node, context, path, kind) and
node.getBasicBlock().likelyReachable()
}
predicate unprunedStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
this.importStep(src, node, context, path, kind)
or
this.fromImportStep(src, node, context, path, kind)
or
this.attributeLoadStep(src, node, context, path, kind)
or
this.getattrStep(src, node, context, path, kind)
or
this.useStep(src, node, context, path, kind)
or
this.callTaintStep(src, node, context, path, kind)
or
this.callFlowStep(src, node, context, path, kind)
or
this.iterationStep(src, node, context, path, kind)
or
this.yieldStep(src, node, context, path, kind)
or
exists(DataFlow::Node srcnode, TaintKind srckind |
this.(TaintTracking::Configuration).isAdditionalFlowStep(srcnode, node, srckind, kind) and
src = TTaintTrackingNode_(srcnode, context, path, srckind, this) and
path.noAttribute()
)
or
exists(DataFlow::Node srcnode |
this.(TaintTracking::Configuration).isAdditionalFlowStep(srcnode, node) and
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
path.noAttribute()
)
or
exists(DataFlow::Node srcnode, TaintKind srckind |
kind = srckind.getTaintForFlowStep(srcnode.asCfgNode(), node.asCfgNode()) and
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
path.noAttribute()
)
}
pragma [noinline]
predicate importStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
// TO DO
none()
}
pragma [noinline]
predicate fromImportStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
// TO DO
none()
}
pragma [noinline]
predicate attributeLoadStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode, AttributePath srcpath, string attrname |
src = TTaintTrackingNode_(srcnode, context, srcpath, kind, this) and
node.asCfgNode() = srcnode.asCfgNode().(AttrNode).getObject(attrname) and
path = srcpath.fromAttribute(attrname)
)
}
pragma [noinline]
predicate getattrStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode, AttributePath srcpath, string attrname |
src = TTaintTrackingNode_(srcnode, context, srcpath, kind, this) and
exists(CallNode call, ControlFlowNode arg |
call = node.asCfgNode() and
call.getFunction().pointsTo(ObjectInternal::builtin("getattr")) and
arg = call.getArg(0) and
attrname = call.getArg(1).getNode().(StrConst).getText() and
arg = srcnode.asCfgNode() and
path = srcpath.fromAttribute(attrname)
)
)
}
pragma [noinline]
predicate useStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
node.asCfgNode() = srcnode.asVariable().getASourceUse()
)
}
pragma [noinline]
predicate callFlowStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(CallNode call, PythonFunctionObjectInternal pyfunc, int arg |
this.callWithTaintedArgument(src, call, _, pyfunc, arg, path, kind) and
node.asCfgNode() = pyfunc.getParameter(arg) and
context = TParamContext(kind, arg)
)
or
exists(CallNode call, PythonFunctionObjectInternal pyfunc, int arg |
this.callWithTaintedArgument(src, call, context, pyfunc, arg, path, kind) and
src.getContext() = TParamContext(kind, arg)
)
// TO DO... named parameters
}
predicate callWithTaintedArgument(TaintTrackingNode src, CallNode call, TaintTrackingContext caller, PythonFunctionObjectInternal pyfunc, int arg, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, caller, path, kind, this) and
srcnode.asCfgNode() = call.getArg(arg) and
pyfunc.getACall() = call
)
}
pragma [noinline]
predicate callTaintStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode, CallNode call, string name |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
call.getFunction().(AttrNode).getObject(name) = src.getNode().asCfgNode() and
kind = src.getTaintKind().getTaintOfMethodResult(name) and
node.asCfgNode() = call
)
}
pragma [noinline]
predicate iterationStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(ForNode for, DataFlow::Node sequence, TaintKind seqkind |
src = TTaintTrackingNode_(sequence, context, path, seqkind, this) and
for.iterates(_, sequence.asCfgNode()) and
node.asCfgNode() = for and
path.noAttribute() and
kind = seqkind.getTaintForIteration()
)
}
pragma [noinline]
predicate yieldStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode, TaintKind itemkind |
src = TTaintTrackingNode_(srcnode, context, path, itemkind, this) and
itemkind = kind.getTaintForIteration() and
exists(PyFunctionObject func |
func.getFunction().isGenerator() and
func.getACall() = node.asCfgNode() and
exists(Yield yield |
yield.getScope() = func.getFunction() and
yield.getValue() = srcnode.asCfgNode().getNode()
)
)
)
}
pragma [noinline]
predicate subscriptStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode, SequenceKind seqkind |
src = TTaintTrackingNode_(srcnode, context, path, seqkind, this) and
srcnode.asCfgNode() = node.asCfgNode().(SubscriptNode).getObject() and
kind = seqkind.getItem()
)
}
pragma [noinline]
predicate ifExprStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
srcnode.asCfgNode() = node.asCfgNode().(IfExprNode).getAnOperand()
)
}
pragma [noinline]
predicate essaFlowStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) {
this.taintedDefinition(src, node.asVariable().getDefinition(), context, path, kind)
}
pragma [noinline]
predicate taintedDefinition(TaintTrackingNode src, EssaDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
this.taintedPhi(src, defn, context, path, kind)
or
this.taintedAssignment(src, defn, context, path, kind)
or
this.taintedAttributeAssignment(src, defn, context, path, kind)
or
this.taintedParameterDefinition(src, defn, context, path, kind)
or
this.taintedCallsite(src, defn, context, path, kind)
or
this.taintedMethodCallsite(src, defn, context, path, kind)
or
this.taintedUniEdge(src, defn, context, path, kind)
or
this.taintedPiNode(src, defn, context, path, kind)
}
pragma [noinline]
predicate taintedPhi(TaintTrackingNode src, PhiFunction defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode, BasicBlock pred, EssaVariable predvar |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
predvar = defn.getInput(pred) and
not pred.unlikelySuccessor(defn.getBasicBlock()) and
not predvar.(DataFlowExtension::DataFlowVariable).prunedSuccessor(defn.getVariable())
)
}
pragma [noinline]
predicate taintedAssignment(TaintTrackingNode src, AssignmentDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
defn.getValue() = srcnode.asCfgNode()
)
}
pragma [noinline]
predicate taintedAttributeAssignment(TaintTrackingNode src, AttributeAssignment defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode, AttributePath srcpath, string attrname |
src = TTaintTrackingNode_(srcnode, context, srcpath, kind, this) and
defn.getValue() = srcnode.asCfgNode() and
defn.getName() = attrname and
path = srcpath.getAttribute(attrname)
)
}
pragma [noinline]
predicate taintedParameterDefinition(TaintTrackingNode src, ParameterDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
defn.getDefiningNode() = srcnode.asCfgNode()
)
// TO DO... class intializers
}
pragma [noinline]
predicate taintedCallsite(TaintTrackingNode src, CallsiteRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
/* In the interest of simplicity and performance we assume that tainted escaping variables remain tainted across calls.
* In the cases were this assumption is false, it is easy enough to add an additional barrier.
*/
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
srcnode.asVariable() = defn.getInput()
)
}
pragma [noinline]
predicate taintedMethodCallsite(TaintTrackingNode src, MethodCallsiteRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
srcnode.asVariable() = defn.getInput()
)
}
pragma [noinline]
predicate taintedUniEdge(TaintTrackingNode src, SingleSuccessorGuard defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
srcnode.asVariable() = defn.getInput() and
not this.(TaintTracking::Configuration).isBarrierTest(defn.getTest(), defn.getSense())
)
}
pragma [noinline]
predicate taintedPiNode(TaintTrackingNode src, SingleSuccessorGuard defn, TaintTrackingContext context, AttributePath path, TaintKind kind) {
exists(DataFlow::Node srcnode |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
srcnode.asVariable() = defn.getInput() and
not this.(TaintTracking::Configuration).isBarrierTest(defn.getTest(), defn.getSense())
)
}
}

View File

@@ -170,6 +170,8 @@ abstract class EssaDefinition extends TEssaDefinition {
result.getDefinition() = this
}
abstract BasicBlock getBasicBlock();
}
/** An ESSA definition corresponding to an edge refinement of the underlying variable.
@@ -233,6 +235,10 @@ class EssaEdgeRefinement extends EssaDefinition, TEssaEdgeDefinition {
result = this.getPredecessor().getScope()
}
override BasicBlock getBasicBlock(){
result = this.getSuccessor()
}
}
/** A Phi-function as specified in classic SSA form. */
@@ -295,7 +301,7 @@ class PhiFunction extends EssaDefinition, TPhiFunction {
}
/** Gets the basic block that succeeds this phi node. */
BasicBlock getBasicBlock() {
override BasicBlock getBasicBlock() {
this = TPhiFunction(_, result)
}
@@ -446,6 +452,10 @@ class EssaNodeDefinition extends EssaDefinition, TEssaNodeDefinition {
)
}
override BasicBlock getBasicBlock(){
result = this.getDefiningNode().getBasicBlock()
}
}
/** A definition of an ESSA variable that takes another ESSA variable as an input.
@@ -512,6 +522,10 @@ class EssaNodeRefinement extends EssaDefinition, TEssaNodeRefinement {
)
}
override BasicBlock getBasicBlock(){
result = this.getDefiningNode().getBasicBlock()
}
}
pragma[noopt]

View File

@@ -1,8 +1,9 @@
import python
import semmle.python.security.TaintTracking
private import semmle.python.dataflow.Implementation
query predicate edges(TaintedNode fromnode, TaintedNode tonode) {
query predicate edges(TaintTrackingNode fromnode, TaintTrackingNode tonode) {
fromnode.getASuccessor() = tonode and
/* Don't record flow past sinks */
not fromnode.isSink()

View File

@@ -89,6 +89,7 @@
import python
private import semmle.python.pointsto.Filters as Filters
private import semmle.python.objects.ObjectInternal
private import semmle.python.dataflow.Implementation
import semmle.python.dataflow.Configuration
/** A 'kind' of taint. This may be almost anything,
@@ -681,6 +682,15 @@ class TaintedNode extends TTaintedNode {
this = TTaintedNode_(_, _, result)
}
/** Gets the Dataflow node for this node. */
DataFlow::Node getDataFlowNode() {
/* FIX ME! -- Needs to handle ESSA nodes as well */
exists(ControlFlowNode cfgnode |
this = TTaintedNode_(_, _, cfgnode) and
result.asCfgNode() = cfgnode
)
}
/** Gets the data-flow context for this node. */
CallContext getContext() {
this = TTaintedNode_(_, result, _)
@@ -741,10 +751,10 @@ class TaintedNode extends TTaintedNode {
}
class TaintedPathSource extends TaintedNode {
class TaintedPathSource extends TaintTrackingNode {
TaintedPathSource() {
this.getNode().(TaintSource).isSourceOf(this.getTaintKind(), this.getContext())
this.isSource()
}
/** Holds if taint can flow from this source to sink `sink` */
@@ -752,22 +762,18 @@ class TaintedPathSource extends TaintedNode {
this.getASuccessor*() = sink
}
TaintSource getSource() {
result = this.getNode()
}
}
class TaintedPathSink extends TaintedNode {
class TaintedPathSink extends TaintTrackingNode {
TaintedPathSink() {
this.getNode().(TaintSink).sinks(this.getTaintKind())
this.isSink()
}
TaintSink getSink() {
DataFlow::Node getSink() {
result = this.getNode()
}
}
/** This module contains the implementation of taint-flow.
@@ -1613,26 +1619,24 @@ module DataFlow {
* compatibility with other language libraries
*/
class Node = ControlFlowNode;
class Extension = DataFlowExtension::DataFlowNode;
abstract class Configuration extends string {
deprecated abstract class Configuration extends string {
bindingset[this]
Configuration() { this = this }
abstract predicate isSource(Node source);
abstract predicate isSource(ControlFlowNode source);
abstract predicate isSink(Node sink);
abstract predicate isSink(ControlFlowNode sink);
private predicate hasFlowPath(TaintedNode source, TaintedNode sink) {
this.isSource(source.getNode()) and
this.isSink(sink.getNode()) and
this.isSource(source.getCfgNode()) and
this.isSink(sink.getCfgNode()) and
source.getASuccessor*() = sink
}
predicate hasFlow(Node source, Node sink) {
predicate hasFlow(ControlFlowNode source, ControlFlowNode sink) {
exists(TaintedNode psource, TaintedNode psink |
psource.getNode() = source and
psink.getNode() = sink and
@@ -1644,6 +1648,83 @@ module DataFlow {
}
private newtype TDataFlowNode =
TEssaNode(EssaVariable var)
or
TCfgNode(ControlFlowNode node)
abstract class Node extends TDataFlowNode {
abstract ControlFlowNode asCfgNode();
abstract EssaVariable asVariable();
abstract string toString();
abstract Scope getScope();
abstract BasicBlock getBasicBlock();
abstract Location getLocation();
}
class CfgNode extends Node, TCfgNode {
override ControlFlowNode asCfgNode() {
this = TCfgNode(result)
}
override EssaVariable asVariable() {
none()
}
override string toString() {
result = this.asCfgNode().toString()
}
override Scope getScope() {
result = this.asCfgNode().getScope()
}
override BasicBlock getBasicBlock() {
result = this.asCfgNode().getBasicBlock()
}
override Location getLocation() {
result = this.asCfgNode().getLocation()
}
}
class EssaNode extends Node, TEssaNode {
override ControlFlowNode asCfgNode() {
none()
}
override EssaVariable asVariable() {
this = TEssaNode(result)
}
override string toString() {
result = this.asVariable().toString()
}
override Scope getScope() {
result = this.asVariable().getScope()
}
override BasicBlock getBasicBlock() {
result = this.asVariable().getDefinition().getBasicBlock()
}
override Location getLocation() {
result = this.asVariable().getDefinition().getLocation()
}
}
}
private class DataFlowType extends TaintKind {