Merge pull request #3701 from yoff/SharedDataflow

Python: Start using the shared data flow libraries
This commit is contained in:
Taus
2020-07-03 16:03:20 +02:00
committed by GitHub
53 changed files with 8975 additions and 8 deletions

View File

@@ -0,0 +1,26 @@
/**
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis: deciding whether data can flow from a _source_ to a
* _sink_.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `experimental.dataflow.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
* (intraprocedural) data flow, call `DataFlow::localFlow` or
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
*/
import python
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import experimental.dataflow.internal.DataFlowImpl
}

View File

@@ -0,0 +1,26 @@
/**
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis: deciding whether data can flow from a _source_ to a
* _sink_.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `experimental.dataflow.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
* (intraprocedural) data flow, call `DataFlow::localFlow` or
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
*/
import python
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow2 {
import experimental.dataflow.internal.DataFlowImpl2
}

View File

@@ -0,0 +1,19 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*
* To use global (interprocedural) taint tracking, extend the class
* `TaintTracking::Configuration` as documented on that class. To use local
* (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
*/
import python
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking {
import experimental.dataflow.internal.tainttracking1.TaintTrackingImpl
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,812 @@
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
import Cached
cached
private module Cached {
/**
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
* The instance parameter is considered to have index `-1`.
*/
pragma[nomagic]
private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
p.isParameterOf(viableCallable(call), i)
}
/**
* Holds if `arg` is a possible argument to `p` in `call`, taking virtual
* dispatch into account.
*/
cached
predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
exists(int i |
viableParam(call, i, p) and
arg.argumentOf(call, i) and
compatibleTypes(getNodeType(arg), getNodeType(p))
)
}
pragma[nomagic]
private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) {
viableCallable(call) = result.getCallable() and
kind = result.getKind()
}
/**
* Holds if a value at return position `pos` can be returned to `out` via `call`,
* taking virtual dispatch into account.
*/
cached
predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) {
exists(ReturnKindExt kind |
pos = viableReturnPos(call, kind) and
out = kind.getAnOutNode(call)
)
}
/** Provides predicates for calculating flow-through summaries. */
private module FlowThrough {
/**
* The first flow-through approximation:
*
* - Input access paths are abstracted with a Boolean parameter
* that indicates (non-)emptiness.
*/
private module Cand {
/**
* Holds if `p` can flow to `node` in the same callable using only
* value-preserving steps.
*
* `read` indicates whether it is contents of `p` that can flow to `node`.
*/
pragma[nomagic]
private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) {
p = node and
read = false
or
// local flow
exists(Node mid |
parameterValueFlowCand(p, mid, read) and
simpleLocalFlowStep(mid, node)
)
or
// read
exists(Node mid |
parameterValueFlowCand(p, mid, false) and
readStep(mid, _, node) and
read = true
)
or
// flow through: no prior read
exists(ArgumentNode arg |
parameterValueFlowArgCand(p, arg, false) and
argumentValueFlowsThroughCand(arg, node, read)
)
or
// flow through: no read inside method
exists(ArgumentNode arg |
parameterValueFlowArgCand(p, arg, read) and
argumentValueFlowsThroughCand(arg, node, false)
)
}
pragma[nomagic]
private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) {
parameterValueFlowCand(p, arg, read)
}
pragma[nomagic]
predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) {
parameterValueFlowCand(p, n.getPreUpdateNode(), false)
}
/**
* Holds if `p` can flow to a return node of kind `kind` in the same
* callable using only value-preserving steps, not taking call contexts
* into account.
*
* `read` indicates whether it is contents of `p` that can flow to the return
* node.
*/
predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) {
exists(ReturnNode ret |
parameterValueFlowCand(p, ret, read) and
kind = ret.getKind()
)
}
pragma[nomagic]
private predicate argumentValueFlowsThroughCand0(
DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read
) {
exists(ParameterNode param | viableParamArg(call, param, arg) |
parameterValueFlowReturnCand(param, kind, read)
)
}
/**
* Holds if `arg` flows to `out` through a call using only value-preserving steps,
* not taking call contexts into account.
*
* `read` indicates whether it is contents of `arg` that can flow to `out`.
*/
predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) {
exists(DataFlowCall call, ReturnKind kind |
argumentValueFlowsThroughCand0(call, arg, kind, read) and
out = getAnOutNode(call, kind)
)
}
predicate cand(ParameterNode p, Node n) {
parameterValueFlowCand(p, n, _) and
(
parameterValueFlowReturnCand(p, _, _)
or
parameterValueFlowsToPreUpdateCand(p, _)
)
}
}
/**
* The final flow-through calculation:
*
* - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`)
* or summarized as a single read step with before and after types recorded
* in the `ReadStepTypesOption` parameter.
* - Types are checked using the `compatibleTypes()` relation.
*/
private module Final {
/**
* Holds if `p` can flow to `node` in the same callable using only
* value-preserving steps and possibly a single read step, not taking
* call contexts into account.
*
* If a read step was taken, then `read` captures the `Content`, the
* container type, and the content type.
*/
predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
parameterValueFlow0(p, node, read) and
if node instanceof CastingNode
then
// normal flow through
read = TReadStepTypesNone() and
compatibleTypes(getNodeType(p), getNodeType(node))
or
// getter
compatibleTypes(read.getContentType(), getNodeType(node))
else any()
}
pragma[nomagic]
private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
p = node and
Cand::cand(p, _) and
read = TReadStepTypesNone()
or
// local flow
exists(Node mid |
parameterValueFlow(p, mid, read) and
simpleLocalFlowStep(mid, node)
)
or
// read
exists(Node mid |
parameterValueFlow(p, mid, TReadStepTypesNone()) and
readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
read.getContentType()) and
Cand::parameterValueFlowReturnCand(p, _, true) and
compatibleTypes(getNodeType(p), read.getContainerType())
)
or
parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
}
pragma[nomagic]
private predicate parameterValueFlow0_0(
ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
) {
// flow through: no prior read
exists(ArgumentNode arg |
parameterValueFlowArg(p, arg, mustBeNone) and
argumentValueFlowsThrough(arg, read, node)
)
or
// flow through: no read inside method
exists(ArgumentNode arg |
parameterValueFlowArg(p, arg, read) and
argumentValueFlowsThrough(arg, mustBeNone, node)
)
}
pragma[nomagic]
private predicate parameterValueFlowArg(
ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
) {
parameterValueFlow(p, arg, read) and
Cand::argumentValueFlowsThroughCand(arg, _, _)
}
pragma[nomagic]
private predicate argumentValueFlowsThrough0(
DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
) {
exists(ParameterNode param | viableParamArg(call, param, arg) |
parameterValueFlowReturn(param, kind, read)
)
}
/**
* Holds if `arg` flows to `out` through a call using only
* value-preserving steps and possibly a single read step, not taking
* call contexts into account.
*
* If a read step was taken, then `read` captures the `Content`, the
* container type, and the content type.
*/
pragma[nomagic]
predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
exists(DataFlowCall call, ReturnKind kind |
argumentValueFlowsThrough0(call, arg, kind, read) and
out = getAnOutNode(call, kind)
|
// normal flow through
read = TReadStepTypesNone() and
compatibleTypes(getNodeType(arg), getNodeType(out))
or
// getter
compatibleTypes(getNodeType(arg), read.getContainerType()) and
compatibleTypes(read.getContentType(), getNodeType(out))
)
}
/**
* Holds if `arg` flows to `out` through a call using only
* value-preserving steps and a single read step, not taking call
* contexts into account, thus representing a getter-step.
*/
predicate getterStep(ArgumentNode arg, Content c, Node out) {
argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
}
/**
* Holds if `p` can flow to a return node of kind `kind` in the same
* callable using only value-preserving steps and possibly a single read
* step.
*
* If a read step was taken, then `read` captures the `Content`, the
* container type, and the content type.
*/
private predicate parameterValueFlowReturn(
ParameterNode p, ReturnKind kind, ReadStepTypesOption read
) {
exists(ReturnNode ret |
parameterValueFlow(p, ret, read) and
kind = ret.getKind()
)
}
}
import Final
}
import FlowThrough
cached
private module DispatchWithCallContext {
/**
* Holds if the call context `ctx` reduces the set of viable run-time
* dispatch targets of call `call` in `c`.
*/
cached
predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) {
exists(int tgts, int ctxtgts |
mayBenefitFromCallContext(call, c) and
c = viableCallable(ctx) and
ctxtgts = count(viableImplInCallContext(call, ctx)) and
tgts = strictcount(viableCallable(call)) and
ctxtgts < tgts
)
}
/**
* Gets a viable run-time dispatch target for the call `call` in the
* context `ctx`. This is restricted to those calls for which a context
* makes a difference.
*/
cached
DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
result = viableImplInCallContext(call, ctx) and
reducedViableImplInCallContext(call, _, ctx)
}
/**
* Holds if flow returning from callable `c` to call `call` might return
* further and if this path restricts the set of call sites that can be
* returned to.
*/
cached
predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) {
exists(int tgts, int ctxtgts |
mayBenefitFromCallContext(call, _) and
c = viableCallable(call) and
ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContext(call, ctx)) and
tgts = strictcount(DataFlowCall ctx | viableCallable(ctx) = call.getEnclosingCallable()) and
ctxtgts < tgts
)
}
/**
* Gets a viable run-time dispatch target for the call `call` in the
* context `ctx`. This is restricted to those calls and results for which
* the return flow from the result to `call` restricts the possible context
* `ctx`.
*/
cached
DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) {
result = viableImplInCallContext(call, ctx) and
reducedViableImplInReturn(result, call)
}
}
import DispatchWithCallContext
/**
* Holds if `p` can flow to the pre-update node associated with post-update
* node `n`, in the same callable, using only value-preserving steps.
*/
cached
predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
}
private predicate store(
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
storeStep(node1, c, node2) and
readStep(_, c, _) and
contentType = getNodeType(node1) and
containerType = getNodeType(node2)
or
exists(Node n1, Node n2 |
n1 = node1.(PostUpdateNode).getPreUpdateNode() and
n2 = node2.(PostUpdateNode).getPreUpdateNode()
|
argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
or
readStep(n2, c, n1) and
contentType = getNodeType(n1) and
containerType = getNodeType(n2)
)
}
/**
* Holds if data can flow from `node1` to `node2` via a direct assignment to
* `f`.
*
* This includes reverse steps through reads when the result of the read has
* been stored into, in order to handle cases like `x.f1.f2 = y`.
*/
cached
predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) {
store(node1, tc.getContent(), node2, contentType, tc.getContainerType())
}
/**
* Holds if the call context `call` either improves virtual dispatch in
* `callable` or if it allows us to prune unreachable nodes in `callable`.
*/
cached
predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
reducedViableImplInCallContext(_, callable, call)
or
exists(Node n | n.getEnclosingCallable() = callable | isUnreachableInCall(n, call))
}
cached
newtype TCallContext =
TAnyCallContext() or
TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or
TSomeCall() or
TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) }
cached
newtype TReturnPosition =
TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) {
exists(ReturnNodeExt ret |
c = returnNodeGetEnclosingCallable(ret) and
kind = ret.getKind()
)
}
cached
newtype TLocalFlowCallContext =
TAnyLocalCall() or
TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) }
cached
newtype TReturnKindExt =
TValueReturn(ReturnKind kind) or
TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) }
cached
newtype TBooleanOption =
TBooleanNone() or
TBooleanSome(boolean b) { b = true or b = false }
cached
newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
cached
newtype TAccessPathFront =
TFrontNil(DataFlowType t) or
TFrontHead(TypedContent tc)
cached
newtype TAccessPathFrontOption =
TAccessPathFrontNone() or
TAccessPathFrontSome(AccessPathFront apf)
}
/**
* A `Node` at which a cast can occur such that the type should be checked.
*/
class CastingNode extends Node {
CastingNode() {
this instanceof ParameterNode or
this instanceof CastNode or
this instanceof OutNodeExt or
// For reads, `x.f`, we want to check that the tracked type after the read (which
// is obtained by popping the head of the access path stack) is compatible with
// the type of `x.f`.
readStep(_, _, this)
}
}
private predicate readStepWithTypes(
Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
) {
readStep(n1, c, n2) and
container = getNodeType(n1) and
content = getNodeType(n2)
}
private newtype TReadStepTypesOption =
TReadStepTypesNone() or
TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) {
readStepWithTypes(_, container, c, _, content)
}
private class ReadStepTypesOption extends TReadStepTypesOption {
predicate isSome() { this instanceof TReadStepTypesSome }
DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) }
Content getContent() { this = TReadStepTypesSome(_, result, _) }
DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) }
string toString() { if this.isSome() then result = "Some(..)" else result = "None()" }
}
/**
* A call context to restrict the targets of virtual dispatch, prune local flow,
* and match the call sites of flow into a method with flow out of a method.
*
* There are four cases:
* - `TAnyCallContext()` : No restrictions on method flow.
* - `TSpecificCall(DataFlowCall call)` : Flow entered through the
* given `call`. This call improves the set of viable
* dispatch targets for at least one method call in the current callable
* or helps prune unreachable nodes in the current callable.
* - `TSomeCall()` : Flow entered through a parameter. The
* originating call does not improve the set of dispatch targets for any
* method call in the current callable and was therefore not recorded.
* - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and
* this dispatch target of `call` implies a reduced set of dispatch origins
* to which data may flow if it should reach a `return` statement.
*/
abstract class CallContext extends TCallContext {
abstract string toString();
/** Holds if this call context is relevant for `callable`. */
abstract predicate relevantFor(DataFlowCallable callable);
}
class CallContextAny extends CallContext, TAnyCallContext {
override string toString() { result = "CcAny" }
override predicate relevantFor(DataFlowCallable callable) { any() }
}
abstract class CallContextCall extends CallContext { }
class CallContextSpecificCall extends CallContextCall, TSpecificCall {
override string toString() {
exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")")
}
override predicate relevantFor(DataFlowCallable callable) {
recordDataFlowCallSite(getCall(), callable)
}
DataFlowCall getCall() { this = TSpecificCall(result) }
}
class CallContextSomeCall extends CallContextCall, TSomeCall {
override string toString() { result = "CcSomeCall" }
override predicate relevantFor(DataFlowCallable callable) {
exists(ParameterNode p | p.getEnclosingCallable() = callable)
}
}
class CallContextReturn extends CallContext, TReturn {
override string toString() {
exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")")
}
override predicate relevantFor(DataFlowCallable callable) {
exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable)
}
}
/**
* A call context that is relevant for pruning local flow.
*/
abstract class LocalCallContext extends TLocalFlowCallContext {
abstract string toString();
/** Holds if this call context is relevant for `callable`. */
abstract predicate relevantFor(DataFlowCallable callable);
}
class LocalCallContextAny extends LocalCallContext, TAnyLocalCall {
override string toString() { result = "LocalCcAny" }
override predicate relevantFor(DataFlowCallable callable) { any() }
}
class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall {
LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) }
DataFlowCall call;
DataFlowCall getCall() { result = call }
override string toString() { result = "LocalCcCall(" + call + ")" }
override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) }
}
private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
exists(Node n | n.getEnclosingCallable() = callable and isUnreachableInCall(n, call))
}
/**
* Gets the local call context given the call context and the callable that
* the contexts apply to.
*/
LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) {
ctx.relevantFor(callable) and
if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable)
then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall()
else result instanceof LocalCallContextAny
}
/**
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
*/
class ReturnNodeExt extends Node {
ReturnNodeExt() {
this instanceof ReturnNode or
parameterValueFlowsToPreUpdate(_, this)
}
/** Gets the kind of this returned value. */
ReturnKindExt getKind() {
result = TValueReturn(this.(ReturnNode).getKind())
or
exists(ParameterNode p, int pos |
parameterValueFlowsToPreUpdate(p, this) and
p.isParameterOf(_, pos) and
result = TParamUpdate(pos)
)
}
}
/**
* A node to which data can flow from a call. Either an ordinary out node
* or a post-update node associated with a call argument.
*/
class OutNodeExt extends Node {
OutNodeExt() {
this instanceof OutNode
or
this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode
}
}
/**
* An extended return kind. A return kind describes how data can be returned
* from a callable. This can either be through a returned value or an updated
* parameter.
*/
abstract class ReturnKindExt extends TReturnKindExt {
/** Gets a textual representation of this return kind. */
abstract string toString();
/** Gets a node corresponding to data flow out of `call`. */
abstract OutNodeExt getAnOutNode(DataFlowCall call);
}
class ValueReturnKind extends ReturnKindExt, TValueReturn {
private ReturnKind kind;
ValueReturnKind() { this = TValueReturn(kind) }
ReturnKind getKind() { result = kind }
override string toString() { result = kind.toString() }
override OutNodeExt getAnOutNode(DataFlowCall call) {
result = getAnOutNode(call, this.getKind())
}
}
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
private int pos;
ParamUpdateReturnKind() { this = TParamUpdate(pos) }
int getPosition() { result = pos }
override string toString() { result = "param update " + pos }
override OutNodeExt getAnOutNode(DataFlowCall call) {
exists(ArgumentNode arg |
result.(PostUpdateNode).getPreUpdateNode() = arg and
arg.argumentOf(call, this.getPosition())
)
}
}
/** A callable tagged with a relevant return kind. */
class ReturnPosition extends TReturnPosition0 {
private DataFlowCallable c;
private ReturnKindExt kind;
ReturnPosition() { this = TReturnPosition0(c, kind) }
/** Gets the callable. */
DataFlowCallable getCallable() { result = c }
/** Gets the return kind. */
ReturnKindExt getKind() { result = kind }
/** Gets a textual representation of this return position. */
string toString() { result = "[" + kind + "] " + c }
}
pragma[noinline]
private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) {
result = ret.getEnclosingCallable()
}
pragma[noinline]
private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) {
result.getCallable() = returnNodeGetEnclosingCallable(ret) and
kind = result.getKind()
}
pragma[noinline]
ReturnPosition getReturnPosition(ReturnNodeExt ret) {
result = getReturnPosition0(ret, ret.getKind())
}
bindingset[cc, callable]
predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
cc instanceof CallContextAny and callable = viableCallable(call)
or
exists(DataFlowCallable c0, DataFlowCall call0 |
call0.getEnclosingCallable() = callable and
cc = TReturn(c0, call0) and
c0 = prunedViableImplInCallContextReverse(call0, call)
)
}
bindingset[call, cc]
DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
if reducedViableImplInCallContext(call, _, ctx)
then result = prunedViableImplInCallContext(call, ctx)
else result = viableCallable(call)
)
or
result = viableCallable(call) and cc instanceof CallContextSomeCall
or
result = viableCallable(call) and cc instanceof CallContextAny
or
result = viableCallable(call) and cc instanceof CallContextReturn
}
predicate read = readStep/3;
/** An optional Boolean value. */
class BooleanOption extends TBooleanOption {
string toString() {
this = TBooleanNone() and result = "<none>"
or
this = TBooleanSome(any(boolean b | result = b.toString()))
}
}
/** Content tagged with the type of a containing object. */
class TypedContent extends MkTypedContent {
private Content c;
private DataFlowType t;
TypedContent() { this = MkTypedContent(c, t) }
/** Gets the content. */
Content getContent() { result = c }
/** Gets the container type. */
DataFlowType getContainerType() { result = t }
/** Gets a textual representation of this content. */
string toString() { result = c.toString() }
}
/**
* The front of an access path. This is either a head or a nil.
*/
abstract class AccessPathFront extends TAccessPathFront {
abstract string toString();
abstract DataFlowType getType();
abstract boolean toBoolNonEmpty();
predicate headUsesContent(TypedContent tc) { this = TFrontHead(tc) }
predicate isClearedAt(Node n) {
exists(TypedContent tc |
this.headUsesContent(tc) and
clearsContent(n, tc.getContent())
)
}
}
class AccessPathFrontNil extends AccessPathFront, TFrontNil {
private DataFlowType t;
AccessPathFrontNil() { this = TFrontNil(t) }
override string toString() { result = ppReprType(t) }
override DataFlowType getType() { result = t }
override boolean toBoolNonEmpty() { result = false }
}
class AccessPathFrontHead extends AccessPathFront, TFrontHead {
private TypedContent tc;
AccessPathFrontHead() { this = TFrontHead(tc) }
override string toString() { result = tc.toString() }
override DataFlowType getType() { result = tc.getContainerType() }
override boolean toBoolNonEmpty() { result = true }
}
/** An optional access path front. */
class AccessPathFrontOption extends TAccessPathFrontOption {
string toString() {
this = TAccessPathFrontNone() and result = "<none>"
or
this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString()))
}
}

View File

@@ -0,0 +1,166 @@
/**
* Provides consistency queries for checking invariants in the language-specific
* data-flow classes and predicates.
*/
private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public
module Consistency {
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
this instanceof ParameterNode or
this instanceof ReturnNode or
this = getAnOutNode(_, _) or
simpleLocalFlowStep(this, _) or
simpleLocalFlowStep(_, this) or
jumpStep(this, _) or
jumpStep(_, this) or
storeStep(this, _, _) or
storeStep(_, _, this) or
readStep(this, _, _) or
readStep(_, _, this) or
defaultAdditionalTaintStep(this, _) or
defaultAdditionalTaintStep(_, this)
}
}
query predicate uniqueEnclosingCallable(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(n.getEnclosingCallable()) and
c != 1 and
msg = "Node should have one enclosing callable but has " + c + "."
)
}
query predicate uniqueType(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(getNodeType(n)) and
c != 1 and
msg = "Node should have one type but has " + c + "."
)
}
query predicate uniqueNodeLocation(Node n, string msg) {
exists(int c |
c =
count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
) and
c != 1 and
msg = "Node should have one location but has " + c + "."
)
}
query predicate missingLocation(string msg) {
exists(int c |
c =
strictcount(Node n |
not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
) and
msg = "Nodes without location: " + c
)
}
query predicate uniqueNodeToString(Node n, string msg) {
exists(int c |
c = count(n.toString()) and
c != 1 and
msg = "Node should have one toString but has " + c + "."
)
}
query predicate missingToString(string msg) {
exists(int c |
c = strictcount(Node n | not exists(n.toString())) and
msg = "Nodes without toString: " + c
)
}
query predicate parameterCallable(ParameterNode p, string msg) {
exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
msg = "Callable mismatch for parameter."
}
query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
simpleLocalFlowStep(n1, n2) and
n1.getEnclosingCallable() != n2.getEnclosingCallable() and
msg = "Local flow step does not preserve enclosing callable."
}
private DataFlowType typeRepr() { result = getNodeType(_) }
query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
t = typeRepr() and
not compatibleTypes(t, t) and
msg = "Type compatibility predicate is not reflexive."
}
query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
isUnreachableInCall(n, call) and
exists(DataFlowCallable c |
c = n.getEnclosingCallable() and
not viableCallable(call) = c
) and
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
}
query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
(
n = getAnOutNode(call, _) and
msg = "OutNode and call does not share enclosing callable."
or
n.(ArgumentNode).argumentOf(call, _) and
msg = "ArgumentNode and call does not share enclosing callable."
) and
n.getEnclosingCallable() != call.getEnclosingCallable()
}
query predicate postIsNotPre(PostUpdateNode n, string msg) {
n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
}
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
exists(int c |
c = count(n.getPreUpdateNode()) and
c != 1 and
msg = "PostUpdateNode should have one pre-update node but has " + c + "."
)
}
query predicate uniquePostUpdate(Node n, string msg) {
1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
msg = "Node has multiple PostUpdateNodes."
}
query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
msg = "PostUpdateNode does not share callable with its pre-update node."
}
private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
query predicate reverseRead(Node n, string msg) {
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
msg = "Origin of readStep is missing a PostUpdateNode."
}
query predicate storeIsPostUpdate(Node n, string msg) {
storeStep(_, _, n) and
not n instanceof PostUpdateNode and
msg = "Store targets should be PostUpdateNodes."
}
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
}

View File

@@ -0,0 +1,12 @@
/**
* Provides Python-specific definitions for use in the data flow library.
*/
module Private {
import DataFlowPrivate
// import DataFlowDispatch
}
module Public {
import DataFlowPublic
import DataFlowUtil
}

View File

@@ -0,0 +1,280 @@
private import python
private import DataFlowPublic
//--------
// Data flow graph
//--------
//--------
// Nodes
//--------
/**
* A node associated with an object after an operation that might have
* changed its state.
*
* This can be either the argument to a callable after the callable returns
* (which might have mutated the argument), or the qualifier of a field after
* an update to the field.
*
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update with the exception of `ObjectCreation`,
* which represents the value after the constructor has run.
*/
abstract class PostUpdateNode extends Node {
/** Gets the node before the state update. */
abstract Node getPreUpdateNode();
}
class DataFlowExpr = Expr;
/**
* Flow between ESSA variables.
* This includes both local and global variables.
* Flow comes from definitions, uses and refinements.
*/
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
module EssaFlow {
predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
// Definition
// `x = f(42)`
// nodeFrom is `f(42)`, cfg node
// nodeTo is `x`, essa var
nodeFrom.(CfgNode).getNode() =
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
or
// With definition
// `with f(42) as x:`
// nodeFrom is `f(42)`, cfg node
// nodeTo is `x`, essa var
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
nodeFrom.(CfgNode).getNode() = contextManager and
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and
contextManager.strictlyDominates(var)
)
or
// Use
// `y = 42`
// `x = f(y)`
// nodeFrom is `y` on first line, essa var
// nodeTo is `y` on second line, cfg node
nodeFrom.(EssaNode).getVar().getAUse() = nodeTo.(CfgNode).getNode()
or
// Refinements
exists(EssaEdgeRefinement r |
nodeTo.(EssaNode).getVar() = r.getVariable() and
nodeFrom.(EssaNode).getVar() = r.getInput()
)
or
exists(EssaNodeRefinement r |
nodeTo.(EssaNode).getVar() = r.getVariable() and
nodeFrom.(EssaNode).getVar() = r.getInput()
)
or
exists(PhiFunction p |
nodeTo.(EssaNode).getVar() = p.getVariable() and
nodeFrom.(EssaNode).getVar() = p.getAnInput()
)
}
}
//--------
// Local flow
//--------
/**
* This is the local flow predicate that is used as a building block in global
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
* excludes SSA flow through instance fields.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
}
// TODO: Make modules for these headings
//--------
// Global flow
//--------
/** Represents a callable */
class DataFlowCallable = CallableValue;
/** Represents a call to a callable */
class DataFlowCall extends CallNode {
DataFlowCallable callable;
DataFlowCall() { this = callable.getACall() }
/** Get the callable to which this call goes. */
DataFlowCallable getCallable() { result = callable }
/** Gets the enclosing callable of this call. */
DataFlowCallable getEnclosingCallable() { result.getScope() = this.getNode().getScope() }
}
/** A data flow node that represents a call argument. */
class ArgumentNode extends CfgNode {
ArgumentNode() { exists(DataFlowCall call, int pos | node = call.getArg(pos)) }
/** Holds if this argument occurs at the given position in the given call. */
predicate argumentOf(DataFlowCall call, int pos) { node = call.getArg(pos) }
/** Gets the call in which this node is an argument. */
final DataFlowCall getCall() { this.argumentOf(result, _) }
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) { result = call.getCallable() }
private newtype TReturnKind = TNormalReturnKind()
/**
* A return kind. A return kind describes how a value can be returned
* from a callable. For Python, this is simply a method return.
*/
class ReturnKind extends TReturnKind {
/** Gets a textual representation of this element. */
string toString() { result = "return" }
}
/** A data flow node that represents a value returned by a callable. */
class ReturnNode extends CfgNode {
Return ret;
// See `TaintTrackingImplementation::returnFlowStep`
ReturnNode() { node = ret.getValue().getAFlowNode() }
/** Gets the kind of this return node. */
ReturnKind getKind() { any() }
override DataFlowCallable getEnclosingCallable() {
result.getScope().getAStmt() = ret // TODO: check nested function definitions
}
}
/** A data flow node that represents the output of a call. */
class OutNode extends CfgNode {
OutNode() { node instanceof CallNode }
}
/**
* Gets a node that can read the value returned from `call` with return kind
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
call = result.getNode() and
kind = TNormalReturnKind()
}
//--------
// Type pruning
//--------
newtype TDataFlowType = TAnyFlow()
class DataFlowType extends TDataFlowType {
/** Gets a textual representation of this element. */
string toString() { result = "DataFlowType" }
}
/** A node that performs a type cast. */
class CastNode extends Node {
CastNode() { none() }
}
/**
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
* a node of type `t1` to a node of type `t2`.
*/
pragma[inline]
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
/**
* Gets the type of `node`.
*/
DataFlowType getNodeType(Node node) { result = TAnyFlow() }
/** Gets a string representation of a type returned by `getErasedRepr`. */
string ppReprType(DataFlowType t) { none() }
//--------
// Extra flow
//--------
/**
* Holds if `pred` can flow to `succ`, by jumping from one callable to
* another. Additional steps specified by the configuration are *not*
* taken into account.
*/
predicate jumpStep(Node pred, Node succ) {
// As we have ESSA variables for global variables,
// we include ESSA flow steps involving global variables.
(
pred.(EssaNode).getVar() instanceof GlobalSsaVariable
or
succ.(EssaNode).getVar() instanceof GlobalSsaVariable
) and
EssaFlow::essaFlowStep(pred, succ)
}
//--------
// Field flow
//--------
/**
* Holds if data can flow from `node1` to `node2` via an assignment to
* content `c`.
*/
predicate storeStep(Node node1, Content c, Node node2) { none() }
/**
* Holds if data can flow from `node1` to `node2` via a read of content `c`.
*/
predicate readStep(Node node1, Content c, Node node2) { none() }
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
cached
predicate clearsContent(Node n, Content c) { none() }
//--------
// Fancy context-sensitive guards
//--------
/**
* Holds if the node `n` is unreachable when the call context is `call`.
*/
predicate isUnreachableInCall(Node n, DataFlowCall call) { none() }
//--------
// Virtual dispatch with call context
//--------
/**
* Gets a viable dispatch target of `call` in the context `ctx`. This is
* restricted to those `call`s for which a context might make a difference.
*/
DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }
/**
* Holds if the set of viable implementations that can be called by `call`
* might be improved by knowing the call context. This is the case if the qualifier accesses a parameter of
* the enclosing callable `c` (including the implicit `this` parameter).
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
//--------
// Misc
//--------
/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) { none() }
int accessPathLimit() { result = 5 }
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }

View File

@@ -0,0 +1,143 @@
/**
* Provides Python-specific definitions for use in the data flow library.
*/
import python
private import DataFlowPrivate
/**
* IPA type for data flow nodes.
*
* Flow between SSA variables are computed in `Essa.qll`
*
* Flow from SSA variables to control flow nodes are generally via uses.
*
* Flow from control flow nodes to SSA variables are generally via assignments.
*
* The current implementation of these cross flows can be seen in `EssaTaintTracking`.
*/
newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(ControlFlowNode node)
/**
* An element, viewed as a node in a data flow graph. Either an SSA variable
* (`EssaNode`) or a control flow node (`CfgNode`).
*/
class Node extends TNode {
/** Gets a textual representation of this element. */
string toString() { result = "Data flow node" }
/** Gets the scope of this node. */
Scope getScope() { none() }
/** Gets the enclosing callable of this node. */
DataFlowCallable getEnclosingCallable() { result.getScope() = this.getScope() }
/** Gets the location of this node */
Location getLocation() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
class EssaNode extends Node, TEssaNode {
EssaVariable var;
EssaNode() { this = TEssaNode(var) }
EssaVariable getVar() { result = var }
/** Gets a textual representation of this element. */
override string toString() { result = var.toString() }
override Scope getScope() { result = var.getScope() }
override Location getLocation() { result = var.getDefinition().getLocation() }
}
class CfgNode extends Node, TCfgNode {
ControlFlowNode node;
CfgNode() { this = TCfgNode(node) }
ControlFlowNode getNode() { result = node }
/** Gets a textual representation of this element. */
override string toString() { result = node.toString() }
override Scope getScope() { result = node.getScope() }
override Location getLocation() { result = node.getLocation() }
}
/**
* An expression, viewed as a node in a data flow graph.
*
* Note that because of control-flow splitting, one `Expr` may correspond
* to multiple `ExprNode`s, just like it may correspond to multiple
* `ControlFlow::Node`s.
*/
class ExprNode extends Node { }
/** Gets a node corresponding to expression `e`. */
ExprNode exprNode(DataFlowExpr e) { none() }
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParameterNode extends EssaNode {
ParameterNode() { var instanceof ParameterDefinition }
/**
* Holds if this node is the parameter of callable `c` at the
* (zero-based) index `i`.
*/
predicate isParameterOf(DataFlowCallable c, int i) {
var.(ParameterDefinition).getDefiningNode() = c.getParameter(i)
}
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
}
/**
* A guard that validates some expression.
*
* To use this in a configuration, extend the class and provide a
* characteristic predicate precisely specifying the guard, and override
* `checks` to specify what is being validated and in which branch.
*
* It is important that all extending classes in scope are disjoint.
*/
class BarrierGuard extends Expr {
// /** Holds if this guard validates `e` upon evaluating to `v`. */
// abstract predicate checks(Expr e, AbstractValue v);
/** Gets a node guarded by this guard. */
final ExprNode getAGuardedNode() {
none()
// exists(Expr e, AbstractValue v |
// this.checks(e, v) and
// this.controlsNode(result.getControlFlowNode(), e, v)
// )
}
}
/**
* A reference contained in an object. This is either a field or a property.
*/
class Content extends string {
Content() { this = "Content" }
}

View File

@@ -0,0 +1,18 @@
/**
* Contains utility functions for writing data flow queries
*/
import DataFlowPrivate
import DataFlowPublic
/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
/**
* Holds if data flows from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }

View File

@@ -0,0 +1,21 @@
private import python
private import TaintTrackingPublic
private import experimental.dataflow.DataFlow
private import experimental.dataflow.internal.DataFlowPrivate
/**
* Holds if `node` should be a barrier in all global taint flow configurations
* but not in local taint.
*/
predicate defaultTaintBarrier(DataFlow::Node node) { none() }
/**
* Holds if the additional step from `pred` to `succ` should be included in all
* global taint flow configurations.
*/
predicate defaultAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
none()
// localAdditionalTaintStep(pred, succ)
// or
// succ = pred.(DataFlow::NonLocalJumpNode).getAJumpSuccessor(false)
}

View File

@@ -0,0 +1,36 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*/
private import python
private import TaintTrackingPrivate
private import experimental.dataflow.DataFlow
// /**
// * Holds if taint propagates from `source` to `sink` in zero or more local
// * (intra-procedural) steps.
// */
// predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
// // /**
// // * Holds if taint can flow from `e1` to `e2` in zero or more
// // * local (intra-procedural) steps.
// // */
// // predicate localExprTaint(Expr e1, Expr e2) {
// // localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
// // }
// // /** A member (property or field) that is tainted if its containing object is tainted. */
// // abstract class TaintedMember extends AssignableMember { }
// /**
// * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
// * (intra-procedural) step.
// */
// predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// // Ordinary data flow
// DataFlow::localFlowStep(nodeFrom, nodeTo)
// or
// localAdditionalTaintStep(nodeFrom, nodeTo)
// }

View File

@@ -0,0 +1,138 @@
# Using the shared dataflow library
## File organisation
The files currently live in `experimental` (whereas the existing implementation lives in `semmle\python\dataflow`).
In there is found `DataFlow.qll`, `DataFlow2.qll` etc. which refer to `internal\DataFlowImpl`, `internal\DataFlowImpl2` etc. respectively. The `DataFlowImplN`-files are all identical copies to avoid mutual recursion. They start off by including two files `internal\DataFlowImplCommon` and `internal\DataFlowImplSpecific`. The former contains all the language-agnostic definitions, while the latter is where we describe our favorite language. `Sepcific` simply forwards to two other files `internal\DataFlowPrivate.qll` and `internal\DataFlowPublic.qll`. Definitions in the former will be hidden behind a `private` modifier, while those in the latter can be referred to in data flow queries. For instance, the definition of `DataFlow::Node` should likely be in `DataFlowPublic.qll`.
## Define the dataflow graph
In order to use the dataflow library, we need to define the dataflow graph,
that is define the nodes and the edges.
### Define the nodes
The nodes are defined in the type `DataFlow::Node` (found in `DataFlowPublic.qll`).
This should likely be an IPA type, so we can extend it as needed.
Typical cases needed to construct the call graph include
- argument node
- parameter node
- return node
Typical extensions include
- postupdate nodes
- implicit `this`-nodes
### Define the edges
The edges split into local flow (within a function) and global flow (the call graph, between functions/procedures).
Extra flow, such as reading from and writing to global variables, can be captured in `jumpStep`.
The local flow should be obtainalble from an SSA computation.
Local flow nodes are generally either control flow nodes or SSA variables.
Flow from control flow nodes to SSA variables comes from SSA variable definitions, while flow from SSA variables to control flow nodes comes from def-use pairs.
The global flow should be obtainable from a `PointsTo` analysis. It is specified via `viableCallable` and
`getAnOutNode`. Consider making `ReturnKind` a singleton IPA type as in java.
Global flow includes local flow within a consistent call context. Thus, for local flow to count as global flow, all relevant nodes should implement `getEnclosingCallable`.
If complicated dispatch needs to be modelled, try using the `[reduced|pruned]viable*` predicates.
## Field flow
To track flow through fields we need to provide a model of fields, that is the `Content` class.
Field access is specified via `read_step` and `store_step`.
Work is being done to make field flow handle lists and dictionaries and the like.
`PostUpdateNode`s become important when field flow is used, as they track modifications to fields resulting from function calls.
## Type pruning
If type information is available, flows can be discarded on the grounds of type mismatch.
Tracked types are given by the class `DataFlowType` and the predicate `getTypeBound`, and compatibility is recorded in the predicate `compatibleTypes`.
If type pruning is not used, `compatibleTypes` should be implemented as `any`; if it is implemented, say, as `none`, all flows will be pruned.
Further, possible casts are given by the class `CastNode`.
---
# Plan
## Stage I, data flow
### Phase 0, setup
Define minimal IPA type for `DataFlow::Node`
Define all required predicates empty (via `none()`),
except `compatibleTypes` which should be `any()`.
Define `ReturnKind`, `DataFlowType`, and `Content` as singleton IPA types.
### Phase 1, local flow
Implement `simpleLocalFlowStep` based on the existing SSA computation
### Phase 2, local flow
Implement `viableCallable` and `getAnOutNode` based on the existing predicate `PointsTo`.
### Phase 3, field flow
Redefine `Content` and implement `read_step` and `store_step`.
Review use of post-update nodes.
### Phase 4, type pruning
Use type trackers to obtain relevant type information and redefine `DataFlowType` to contain appropriate cases. Record the type information in `getTypeBound`.
Implement `compatibleTypes` (perhaps simply as the identity).
If necessary, re-implement `getErasedRepr` and `ppReprType`.
If necessary, redefine `CastNode`.
### Phase 5, bonus
Review possible use of `[reduced|pruned]viable*` predicates.
Review need for more elaborate `ReturnKind`.
Review need for non-empty `jumpStep`.
Review need for non-empty `isUnreachableInCall`.
## Stage II, taint tracking
# Phase 0, setup
Implement all predicates empty.
# Phase 1, experiments
Try recovering an existing taint tracking query by implementing sources, sinks, sanitizers, and barriers.
---
# Status
## Achieved
- Copy of shared library; implemented enough predicates to make it compile.
- Simple flow into, out of, and through functions.
- Some tests, in particular a sceleton for something comprehensive.
## TODO
- Implementation has largely been done by finding a plausibly-sounding predicate in the python library to refer to. We should review that we actually have the intended semantics in all places.
- Comprehensive testing.
- The regression tests track the value of guards in order to eliminate impossible data flow. We currently have regressions because of this. We cannot readily replicate the existing method, as it uses the interdefinedness of data flow and taint tracking (there is a boolean taint kind). C++ [does something similar](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/controlflow/internal/ConstantExprs.qll#L27-L36) for eliminating impossible control flow, which we might be able to replicate (they infer values of "interesting" control flow nodes, which are those needed to determine values of guards).
- Flow for some syntactic constructs are done via extra taint steps in the existing implementation, we should find a way to get data flow for it. Some of this should be covered by field flow.
- A document is being written about proper use of the shared data flow library, this should be adhered to. In particular, we should consider replacing def-use with def-to-first-use and use-to-next-use in local flow.
- We seem to get duplicated results for global flow, as well as flow with and without type (so four times the "unique" results).
- We currently consider control flow nodes like exit nodes for functions, we should probably filter down which ones are of interest.
- We should probably override ToString for a number of data flow nodes.
- Test flow through classes, constructors and methods.
- What happens with named arguments? What does C# do?
- What should the enclosable callable for global variables be? C++ [makes it the variable itself](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll#L417), C# seems to not have nodes for these but only for their reads and writes.
- Is `yield` another return type? If not, how is it handled?
- Should `OutNode` include magic function calls?
- Consider creating an internal abstract class for nodes as C# does. Among other things, this can help the optimizer by stating that `getEnclosingCallable` [is functional](https://github.com/github/codeql/blob/master/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll#L62).

View File

@@ -0,0 +1,115 @@
/**
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
* exposed through the `Configuration` class. For some languages, this file
* exists in several identical copies, allowing queries to use multiple
* `Configuration` classes that depend on each other without introducing
* mutual recursion among those configurations.
*/
import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
* this abstract class.
*
* A taint-tracking configuration is a special data flow configuration
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
* necessarily preserve values but are still relevant from a taint tracking
* perspective. (For example, string concatenation, where one of the operands
* is tainted.)
*
* To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string. For example, write
*
* ```ql
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isSanitizer`.
* // Optionally override `isSanitizerIn`.
* // Optionally override `isSanitizerOut`.
* // Optionally override `isSanitizerGuard`.
* // Optionally override `isAdditionalTaintStep`.
* }
* ```
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but it is unsupported to depend on
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
* overridden predicates that define sources, sinks, or additional steps.
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant taint source.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
/**
* Holds if `sink` is a relevant taint sink.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
defaultTaintBarrier(node)
}
/** Holds if data flow into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
/** Holds if data flow out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
/** Holds if data flow through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/
// overridden to provide taint-tracking specific qldoc
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
super.hasFlow(source, sink)
}
}

View File

@@ -0,0 +1,6 @@
import experimental.dataflow.internal.TaintTrackingPublic as Public
module Private {
import experimental.dataflow.DataFlow::DataFlow as DataFlow
import experimental.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -0,0 +1,13 @@
import experimental.dataflow.DataFlow
/**
* A configuration to find all flows.
* To be used on tiny programs.
*/
class AllFlowsConfig extends DataFlow::Configuration {
AllFlowsConfig() { this = "AllFlowsConfig" }
override predicate isSource(DataFlow::Node node) { any() }
override predicate isSink(DataFlow::Node node) { any() }
}

View File

@@ -0,0 +1,3 @@
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,9 @@
import callGraphConfig
from
DataFlow::Node source,
DataFlow::Node sink
where
exists(CallGraphConfig cfg | cfg.hasFlow(source, sink))
select
source, sink

View File

@@ -0,0 +1,20 @@
import experimental.dataflow.DataFlow
/**
* A configuration to find the call graph edges.
*/
class CallGraphConfig extends DataFlow::Configuration {
CallGraphConfig() { this = "CallGraphConfig" }
override predicate isSource(DataFlow::Node node) {
node instanceof DataFlow::ReturnNode
or
node instanceof DataFlow::ArgumentNode
}
override predicate isSink(DataFlow::Node node) {
node instanceof DataFlow::OutNode
or
node instanceof DataFlow::ParameterNode
}
}

View File

@@ -0,0 +1,2 @@
| test.py:1:19:1:19 | SSA variable x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,5 @@
import callGraphConfig
from DataFlow::Node sink
where exists(CallGraphConfig cfg | cfg.isSink(sink))
select sink

View File

@@ -0,0 +1,2 @@
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -0,0 +1,5 @@
import callGraphConfig
from DataFlow::Node source
where exists(CallGraphConfig cfg | cfg.isSource(source))
select source

View File

@@ -0,0 +1,95 @@
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:1:7:1 | GSSA Variable b |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:1:7:1 | GSSA Variable b |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | Exit node for Module test |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:1:19:1:19 | SSA variable x | test.py:0:0:0:0 | Exit node for Module test |
| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | SSA variable x | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:19:1:19 | SSA variable x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:2:3:2:3 | SSA variable y | test.py:0:0:0:0 | Exit node for Module test |
| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | SSA variable y | test.py:7:1:7:1 | GSSA Variable b |
| test.py:2:3:2:3 | SSA variable y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:0:0:0:0 | Exit node for Module test |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:3:3:3:3 | SSA variable z | test.py:0:0:0:0 | Exit node for Module test |
| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z | test.py:7:1:7:1 | GSSA Variable b |
| test.py:3:3:3:3 | SSA variable z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:0:0:0:0 | Exit node for Module test |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:1:7:1 | GSSA Variable b |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:0:0:0:0 | Exit node for Module test |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:1:7:1 | GSSA Variable b |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
| test.py:6:1:6:1 | GSSA Variable a | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:6:1:6:1 | GSSA Variable a | test.py:1:19:1:19 | SSA variable x |
| test.py:6:1:6:1 | GSSA Variable a | test.py:2:3:2:3 | SSA variable y |
| test.py:6:1:6:1 | GSSA Variable a | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:6:1:6:1 | GSSA Variable a | test.py:3:3:3:3 | SSA variable z |
| test.py:6:1:6:1 | GSSA Variable a | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:6:1:6:1 | GSSA Variable a | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:1:7:1 | GSSA Variable b |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | Exit node for Module test |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:19:1:19 | SSA variable x |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:3:2:3 | SSA variable y |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:3:3:3 | SSA variable z |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | GSSA Variable b |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | GSSA Variable a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:3:2:3 | SSA variable y |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:3:3:3 | SSA variable z |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,10 @@
import allFlowsConfig
from
DataFlow::Node source,
DataFlow::Node sink
where
source != sink and
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink))
select
source, sink

View File

@@ -0,0 +1,118 @@
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -0,0 +1,9 @@
import allFlowsConfig
from
DataFlow::PathNode fromNode,
DataFlow::PathNode toNode
where
toNode = fromNode.getASuccessor()
select
fromNode, toNode

View File

@@ -0,0 +1,51 @@
| test.py:0:0:0:0 | Entry node for Module test | test.py:0:0:0:0 | Entry node for Module test |
| test.py:0:0:0:0 | Exit node for Module test | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | Entry node for Function obfuscated_id | test.py:1:1:1:21 | Entry node for Function obfuscated_id |
| test.py:1:1:1:21 | Exit node for Function obfuscated_id | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:19:1:19 | SSA variable x | test.py:1:19:1:19 | SSA variable x |
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | ControlFlowNode for y | test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:3:2:3 | SSA variable y | test.py:2:3:2:3 | SSA variable y |
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | ControlFlowNode for z | test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:3:3:3 | SSA variable z | test.py:3:3:3:3 | SSA variable z |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:4:3:4:10 | ControlFlowNode for Return | test.py:4:3:4:10 | ControlFlowNode for Return |
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a | test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:1:6:1 | GSSA Variable a | test.py:6:1:6:1 | GSSA Variable a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -0,0 +1,9 @@
import experimental.dataflow.DataFlow
from
DataFlow::Node fromNode,
DataFlow::Node toNode
where
DataFlow::localFlow(fromNode, toNode)
select
fromNode, toNode

View File

@@ -0,0 +1,9 @@
| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | Exit node for Module test |
| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |

View File

@@ -0,0 +1,9 @@
import experimental.dataflow.DataFlow
from
DataFlow::Node fromNode,
DataFlow::Node toNode
where
DataFlow::localFlowStep(fromNode, toNode)
select
fromNode, toNode

View File

@@ -0,0 +1,13 @@
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:1:7:1 | GSSA Variable b |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:1:7:1 | GSSA Variable b |
| test.py:0:0:0:0 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | SSA variable x | test.py:7:1:7:1 | GSSA Variable b |
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:2:3:2:3 | SSA variable y | test.py:7:1:7:1 | GSSA Variable b |
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z | test.py:7:1:7:1 | GSSA Variable b |
| test.py:6:1:6:1 | GSSA Variable a | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:1:7:1 | GSSA Variable b |
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |

View File

@@ -0,0 +1,10 @@
import maximalFlowsConfig
from
DataFlow::Node source,
DataFlow::Node sink
where
source != sink and
exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink))
select
source, sink

View File

@@ -0,0 +1,25 @@
import experimental.dataflow.DataFlow
/**
* A configuration to find all "maximal" flows.
* To be used on small programs.
*/
class MaximalFlowsConfig extends DataFlow::Configuration {
MaximalFlowsConfig() { this = "AllFlowsConfig" }
override predicate isSource(DataFlow::Node node) {
node instanceof DataFlow::ParameterNode
or
node instanceof DataFlow::EssaNode and
not exists(DataFlow::EssaNode pred |
DataFlow::localFlowStep(pred, node)
)
}
override predicate isSink(DataFlow::Node node) {
node instanceof DataFlow::ReturnNode
or
node instanceof DataFlow::EssaNode and
not exists(node.(DataFlow::EssaNode).getVar().getASourceUse())
}
}

View File

@@ -0,0 +1,30 @@
| test.py:0:0:0:0 | Entry node for Module test |
| test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | Entry node for Function obfuscated_id |
| test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x |
| test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:4:3:4:10 | ControlFlowNode for Return |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:1:6:1 | GSSA Variable a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -0,0 +1,5 @@
import allFlowsConfig
from DataFlow::Node sink
where exists(AllFlowsConfig cfg | cfg.isSink(sink))
select sink

View File

@@ -0,0 +1,30 @@
| test.py:0:0:0:0 | Entry node for Module test |
| test.py:0:0:0:0 | Exit node for Module test |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |
| test.py:0:0:0:0 | SSA variable $ |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| test.py:1:1:1:21 | Entry node for Function obfuscated_id |
| test.py:1:1:1:21 | Exit node for Function obfuscated_id |
| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:1:19:1:19 | SSA variable x |
| test.py:2:3:2:3 | ControlFlowNode for y |
| test.py:2:3:2:3 | SSA variable y |
| test.py:2:7:2:7 | ControlFlowNode for x |
| test.py:3:3:3:3 | ControlFlowNode for z |
| test.py:3:3:3:3 | SSA variable z |
| test.py:3:7:3:7 | ControlFlowNode for y |
| test.py:4:3:4:10 | ControlFlowNode for Return |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:6:1:6:1 | ControlFlowNode for a |
| test.py:6:1:6:1 | GSSA Variable a |
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
| test.py:7:1:7:1 | ControlFlowNode for b |
| test.py:7:1:7:1 | GSSA Variable b |
| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
| test.py:7:5:7:20 | GSSA Variable a |
| test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -0,0 +1,5 @@
import allFlowsConfig
from DataFlow::Node source
where exists(AllFlowsConfig cfg | cfg.isSource(source))
select source

View File

@@ -0,0 +1,7 @@
def obfuscated_id(x):
y = x
z = y
return z
a = 42
b = obfuscated_id(a)

View File

@@ -0,0 +1,103 @@
uniqueEnclosingCallable
| test.py:0:0:0:0 | Exit node for Module test | Node should have one enclosing callable but has 0. |
| test.py:0:0:0:0 | GSSA Variable __name__ | Node should have one enclosing callable but has 0. |
| test.py:0:0:0:0 | GSSA Variable __package__ | Node should have one enclosing callable but has 0. |
| test.py:0:0:0:0 | GSSA Variable test23 | Node should have one enclosing callable but has 0. |
| test.py:0:0:0:0 | GSSA Variable test24 | Node should have one enclosing callable but has 0. |
| test.py:0:0:0:0 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. |
| test.py:0:0:0:0 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. |
| test.py:0:0:0:0 | SSA variable $ | Node should have one enclosing callable but has 0. |
| test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:6:5:6:9 | GSSA Variable test1 | Node should have one enclosing callable but has 0. |
| test.py:9:1:9:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:9:5:9:9 | GSSA Variable test2 | Node should have one enclosing callable but has 0. |
| test.py:13:1:13:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:13:5:13:10 | GSSA Variable source | Node should have one enclosing callable but has 0. |
| test.py:16:1:16:14 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:16:5:16:8 | GSSA Variable sink | Node should have one enclosing callable but has 0. |
| test.py:19:1:19:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:19:5:19:9 | GSSA Variable test3 | Node should have one enclosing callable but has 0. |
| test.py:23:1:23:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:23:5:23:9 | GSSA Variable test4 | Node should have one enclosing callable but has 0. |
| test.py:27:1:27:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:27:5:27:9 | GSSA Variable test5 | Node should have one enclosing callable but has 0. |
| test.py:31:1:31:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:31:5:31:9 | GSSA Variable test6 | Node should have one enclosing callable but has 0. |
| test.py:39:1:39:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:39:5:39:9 | GSSA Variable test7 | Node should have one enclosing callable but has 0. |
| test.py:47:1:47:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:47:5:47:11 | GSSA Variable source2 | Node should have one enclosing callable but has 0. |
| test.py:50:1:50:15 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:50:5:50:9 | GSSA Variable sink2 | Node should have one enclosing callable but has 0. |
| test.py:53:1:53:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:53:5:53:9 | GSSA Variable sink3 | Node should have one enclosing callable but has 0. |
| test.py:57:1:57:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:57:5:57:9 | GSSA Variable test8 | Node should have one enclosing callable but has 0. |
| test.py:62:1:62:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:62:5:62:9 | GSSA Variable test9 | Node should have one enclosing callable but has 0. |
| test.py:69:1:69:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:69:5:69:10 | GSSA Variable test10 | Node should have one enclosing callable but has 0. |
| test.py:76:1:76:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:76:5:76:7 | GSSA Variable hub | Node should have one enclosing callable but has 0. |
| test.py:79:1:79:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:79:5:79:10 | GSSA Variable test11 | Node should have one enclosing callable but has 0. |
| test.py:84:1:84:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:84:5:84:10 | GSSA Variable test12 | Node should have one enclosing callable but has 0. |
| test.py:89:8:89:13 | ControlFlowNode for ImportExpr | Node should have one enclosing callable but has 0. |
| test.py:89:8:89:13 | GSSA Variable module | Node should have one enclosing callable but has 0. |
| test.py:91:1:91:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:91:5:91:10 | GSSA Variable test13 | Node should have one enclosing callable but has 0. |
| test.py:95:1:95:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:95:5:95:10 | GSSA Variable test14 | Node should have one enclosing callable but has 0. |
| test.py:99:1:99:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:99:5:99:10 | GSSA Variable test15 | Node should have one enclosing callable but has 0. |
| test.py:103:1:103:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:103:5:103:10 | GSSA Variable test16 | Node should have one enclosing callable but has 0. |
| test.py:108:1:108:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:108:5:108:10 | GSSA Variable test20 | Node should have one enclosing callable but has 0. |
| test.py:118:1:118:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:118:5:118:10 | GSSA Variable test21 | Node should have one enclosing callable but has 0. |
| test.py:128:1:128:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:128:5:128:10 | GSSA Variable test22 | Node should have one enclosing callable but has 0. |
| test.py:139:20:139:38 | ControlFlowNode for ImportMember | Node should have one enclosing callable but has 0. |
| test.py:139:33:139:38 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. |
| test.py:140:1:140:12 | ControlFlowNode for SINK() | Node should have one enclosing callable but has 0. |
| test.py:140:1:140:12 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. |
| test.py:140:6:140:11 | ControlFlowNode for unsafe | Node should have one enclosing callable but has 0. |
| test.py:142:1:142:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:142:5:142:10 | GSSA Variable test23 | Node should have one enclosing callable but has 0. |
| test.py:146:1:146:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:146:5:146:10 | GSSA Variable test24 | Node should have one enclosing callable but has 0. |
| test.py:151:1:151:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:151:5:151:22 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. |
| test.py:161:1:161:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| test.py:161:5:161:14 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. |
uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
missingToString
parameterCallable
localFlowIsLocal
compatibleTypesReflexive
unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
uniquePostUpdate
postIsInSameCallable
reverseRead
storeIsPostUpdate
argHasPostUpdate
| test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:48:19:48:21 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
| test.py:51:10:51:12 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
| test.py:55:14:55:16 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
| test.py:59:11:59:11 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:67:11:67:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
| test.py:67:17:67:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:74:11:74:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
| test.py:74:17:74:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:81:13:81:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
| test.py:86:13:86:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |

View File

@@ -0,0 +1 @@
import experimental.dataflow.internal.DataFlowImplConsistency::Consistency

View File

@@ -0,0 +1,171 @@
# This is currently a copy of the integration tests.
# It should contain many syntactic constructs, so should
# perhaps be taken from coverage once that is done.
# (We might even put the consistency check in there.)
def test1():
SINK(SOURCE)
def test2():
s = SOURCE
SINK(s)
def source():
return SOURCE
def sink(arg):
SINK(arg)
def test3():
t = source()
SINK(t)
def test4():
t = SOURCE
sink(t)
def test5():
t = source()
sink(t)
def test6(cond):
if cond:
t = "Safe"
else:
t = SOURCE
if cond:
SINK(t)
def test7(cond):
if cond:
t = SOURCE
else:
t = "Safe"
if cond:
SINK(t)
def source2(arg):
return source(arg)
def sink2(arg):
sink(arg)
def sink3(cond, arg):
if cond:
sink(arg)
def test8(cond):
t = source2()
sink2(t)
#False positive
def test9(cond):
if cond:
t = "Safe"
else:
t = SOURCE
sink3(cond, t)
def test10(cond):
if cond:
t = SOURCE
else:
t = "Safe"
sink3(cond, t)
def hub(arg):
return arg
def test11():
t = SOURCE
t = hub(t)
SINK(t)
def test12():
t = "safe"
t = hub(t)
SINK(t)
import module
def test13():
t = module.dangerous
SINK(t)
def test14():
t = module.safe
SINK(t)
def test15():
t = module.safe2
SINK(t)
def test16():
t = module.dangerous_func()
SINK(t)
def test20(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
if cond:
CUSTOM_SINK(t)
else:
SINK(t)
def test21(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
if not cond:
CUSTOM_SINK(t)
else:
SINK(t)
def test22(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
t = TAINT_FROM_ARG(t)
if cond:
CUSTOM_SINK(t)
else:
SINK(t)
from module import dangerous as unsafe
SINK(unsafe)
def test23():
with SOURCE as t:
SINK(t)
def test24():
s = SOURCE
SANITIZE(s)
SINK(s)
def test_update_extend(x, y):
l = [SOURCE]
d = {"key" : SOURCE}
x.extend(l)
y.update(d)
SINK(x[0])
SINK(y["key"])
l2 = list(l)
d2 = dict(d)
def test_truth():
t = SOURCE
if t:
SINK(t)
else:
SINK(t) # Regression: FP here
if not t:
SINK(t) # Regression: FP here
else:
SINK(t)

View File

@@ -0,0 +1,6 @@
| test.py:20:9:20:14 | ControlFlowNode for SOURCE | test.py:21:10:21:10 | ControlFlowNode for x |
| test.py:25:9:25:16 | ControlFlowNode for Str | test.py:26:10:26:10 | ControlFlowNode for x |
| test.py:29:9:29:17 | ControlFlowNode for Str | test.py:30:10:30:10 | ControlFlowNode for x |
| test.py:33:9:33:10 | ControlFlowNode for IntegerLiteral | test.py:34:10:34:10 | ControlFlowNode for x |
| test.py:37:9:37:12 | ControlFlowNode for FloatLiteral | test.py:38:10:38:10 | ControlFlowNode for x |
| test.py:46:10:46:15 | ControlFlowNode for SOURCE | test.py:47:10:47:10 | ControlFlowNode for x |

View File

@@ -0,0 +1,9 @@
import experimental.dataflow.testConfig
from
DataFlow::Node source,
DataFlow::Node sink
where
exists(TestConfiguration cfg | cfg.hasFlow(source, sink))
select
source, sink

View File

@@ -0,0 +1,7 @@
| test.py:13:5:13:5 | SSA variable x | test.py:12:1:12:33 | Exit node for Function test_tuple_with_local_flow |
| test.py:13:5:13:5 | SSA variable x | test.py:14:9:14:9 | ControlFlowNode for x |
| test.py:13:10:13:18 | ControlFlowNode for Tuple | test.py:13:5:13:5 | SSA variable x |
| test.py:14:5:14:5 | SSA variable y | test.py:15:5:15:11 | SSA variable y |
| test.py:14:5:14:5 | SSA variable y | test.py:15:10:15:10 | ControlFlowNode for y |
| test.py:14:9:14:12 | ControlFlowNode for Subscript | test.py:14:5:14:5 | SSA variable y |
| test.py:15:5:15:11 | SSA variable y | test.py:12:1:12:33 | Exit node for Function test_tuple_with_local_flow |

View File

@@ -0,0 +1,8 @@
import python
import experimental.dataflow.DataFlow
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
where
DataFlow::localFlowStep(nodeFrom, nodeTo) and
nodeFrom.getEnclosingCallable().getName().matches("%\\_with\\_local\\_flow")
select nodeFrom, nodeTo

View File

@@ -0,0 +1,131 @@
# This should cover all the syntactical constructs that we hope to support
# Intended sources should be the variable `SOURCE` and intended sinks should be
# arguments to the function `SINK` (see python/ql/test/experimental/dataflow/testConfig.qll).
#
# Functions whose name ends with "_with_local_flow" will also be tested for local flow.
# These are included so that we can easily evaluate the test code
SOURCE = "source"
def SINK(x):
print(x)
def test_tuple_with_local_flow():
x = (3, SOURCE)
y = x[1]
SINK(y)
# List taken from https://docs.python.org/3/reference/expressions.html
# 6.2.1. Identifiers (Names)
def test_names():
x = SOURCE
SINK(x)
# 6.2.2. Literals
def test_string_literal():
x = "source"
SINK(x)
def test_bytes_literal():
x = b"source"
SINK(x)
def test_integer_literal():
x = 42
SINK(x)
def test_floatnumber_literal():
x = 42.0
SINK(x)
def test_imagnumber_literal():
x = 42j
SINK(x)
# 6.2.3. Parenthesized forms
def test_parenthesized_form():
x = (SOURCE)
SINK(x)
# 6.2.5. List displays
def test_list_display():
x = [SOURCE]
SINK(x[0])
def test_list_comprehension():
x = [SOURCE for y in [3]]
SINK(x[0])
def test_nested_list_display():
x = [* [SOURCE]]
SINK(x[0])
# 6.2.6. Set displays
def test_set_display():
x = {SOURCE}
SINK(x.pop())
def test_set_comprehension():
x = {SOURCE for y in [3]}
SINK(x.pop())
def test_nested_set_display():
x = {* {SOURCE}}
SINK(x.pop())
# 6.2.7. Dictionary displays
def test_dict_display():
x = {"s": SOURCE}
SINK(x["s"])
def test_dict_comprehension():
x = {y: SOURCE for y in ["s"]}
SINK(x["s"])
def test_nested_dict_display():
x = {** {"s": SOURCE}}
SINK(x["s"])
# 6.2.8. Generator expressions
def test_generator():
x = (SOURCE for y in [3])
SINK([*x][0])
# List taken from https://docs.python.org/3/reference/expressions.html
# 6. Expressions
# 6.1. Arithmetic conversions
# 6.2. Atoms
# 6.2.1. Identifiers (Names)
# 6.2.2. Literals
# 6.2.3. Parenthesized forms
# 6.2.4. Displays for lists, sets and dictionaries
# 6.2.5. List displays
# 6.2.6. Set displays
# 6.2.7. Dictionary displays
# 6.2.8. Generator expressions
# 6.2.9. Yield expressions
# 6.2.9.1. Generator-iterator methods
# 6.2.9.2. Examples
# 6.2.9.3. Asynchronous generator functions
# 6.2.9.4. Asynchronous generator-iterator methods
# 6.3. Primaries
# 6.3.1. Attribute references
# 6.3.2. Subscriptions
# 6.3.3. Slicings
# 6.3.4. Calls
# 6.4. Await expression
# 6.5. The power operator
# 6.6. Unary arithmetic and bitwise operations
# 6.7. Binary arithmetic operations
# 6.8. Shifting operations
# 6.9. Binary bitwise operations
# 6.10. Comparisons
# 6.10.1. Value comparisons
# 6.10.2. Membership test operations
# 6.10.3. Identity comparisons
# 6.11. Boolean operations
# 6.12. Assignment expressions
# 6.13. Conditional expressions
# 6.14. Lambdas
# 6.15. Expression lists
# 6.16. Evaluation order
# 6.17. Operator precedence

View File

@@ -0,0 +1,16 @@
| test.py:3:10:3:15 | ControlFlowNode for SOURCE | test.py:3:10:3:15 | ControlFlowNode for SOURCE |
| test.py:6:9:6:14 | ControlFlowNode for SOURCE | test.py:7:10:7:10 | ControlFlowNode for s |
| test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
| test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:17:10:17:10 | ControlFlowNode for t |
| test.py:20:9:20:14 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
| test.py:37:13:37:18 | ControlFlowNode for SOURCE | test.py:41:14:41:14 | ControlFlowNode for t |
| test.py:62:13:62:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
| test.py:67:13:67:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
| test.py:76:9:76:14 | ControlFlowNode for SOURCE | test.py:78:10:78:10 | ControlFlowNode for t |
| test.py:108:13:108:18 | ControlFlowNode for SOURCE | test.py:112:14:112:14 | ControlFlowNode for t |
| test.py:139:10:139:15 | ControlFlowNode for SOURCE | test.py:140:14:140:14 | ControlFlowNode for t |
| test.py:143:9:143:14 | ControlFlowNode for SOURCE | test.py:145:10:145:10 | ControlFlowNode for s |
| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t |
| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:162:14:162:14 | ControlFlowNode for t |
| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:164:14:164:14 | ControlFlowNode for t |
| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:166:14:166:14 | ControlFlowNode for t |

View File

@@ -0,0 +1,16 @@
/**
* This should be compared to
* python/ql/test/library-tests/taint/dataflow/Dataflow.ql
* A first goal is to have identical results; after that we
* hope to remove the false positive.
*/
import experimental.dataflow.testConfig
from
DataFlow::Node source,
DataFlow::Node sink
where
exists(TestConfiguration cfg | cfg.hasFlow(source, sink))
select
source, sink

View File

@@ -0,0 +1,167 @@
def test1():
SINK(SOURCE)
def test2():
s = SOURCE
SINK(s)
def source():
return SOURCE
def sink(arg):
SINK(arg)
def test3():
t = source()
SINK(t)
def test4():
t = SOURCE
sink(t)
def test5():
t = source()
sink(t)
def test6(cond):
if cond:
t = "Safe"
else:
t = SOURCE
if cond:
SINK(t)
def test7(cond):
if cond:
t = SOURCE
else:
t = "Safe"
if cond:
SINK(t)
def source2(arg):
return source(arg)
def sink2(arg):
sink(arg)
def sink3(cond, arg):
if cond:
sink(arg)
def test8(cond):
t = source2()
sink2(t)
#False positive
def test9(cond):
if cond:
t = "Safe"
else:
t = SOURCE
sink3(cond, t)
def test10(cond):
if cond:
t = SOURCE
else:
t = "Safe"
sink3(cond, t)
def hub(arg):
return arg
def test11():
t = SOURCE
t = hub(t)
SINK(t)
def test12():
t = "safe"
t = hub(t)
SINK(t)
import module
def test13():
t = module.dangerous
SINK(t)
def test14():
t = module.safe
SINK(t)
def test15():
t = module.safe2
SINK(t)
def test16():
t = module.dangerous_func()
SINK(t)
def test20(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
if cond:
CUSTOM_SINK(t)
else:
SINK(t)
def test21(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
if not cond:
CUSTOM_SINK(t)
else:
SINK(t)
def test22(cond):
if cond:
t = CUSTOM_SOURCE
else:
t = SOURCE
t = TAINT_FROM_ARG(t)
if cond:
CUSTOM_SINK(t)
else:
SINK(t)
from module import dangerous as unsafe
SINK(unsafe)
def test23():
with SOURCE as t:
SINK(t)
def test24():
s = SOURCE
SANITIZE(s)
SINK(s)
def test_update_extend(x, y):
l = [SOURCE]
d = {"key" : SOURCE}
x.extend(l)
y.update(d)
SINK(x[0])
SINK(y["key"])
l2 = list(l)
d2 = dict(d)
def test_truth():
t = SOURCE
if t:
SINK(t)
else:
SINK(t) # Regression: FP here
if not t:
SINK(t) # Regression: FP here
else:
SINK(t)

View File

@@ -0,0 +1,45 @@
/**
* Configuration to test selected data flow
* Sources in the source code are denoted by the special name `SOURCE`,
* and sinks are denoted by arguments to the special function `SINK`.
* For example, given the test code
* ```python
* def test():
* s = SOURCE
* SINK(s)
* ```
* `SOURCE` will be a source and the second occurance of `s` will be a sink.
*
* In order to test literals, alternative sources are defined for each type:
*
* for | use
* ----------
* string | `"source"`
* integer | `42`
* float | `42.0`
* complex | `42j` (not supported yet)
*/
import experimental.dataflow.DataFlow
class TestConfiguration extends DataFlow::Configuration {
TestConfiguration() { this = "TestConfiguration" }
override predicate isSource(DataFlow::Node node) {
node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE"
or
node.(DataFlow::CfgNode).getNode().getNode().(StrConst).getS() = "source"
or
node.(DataFlow::CfgNode).getNode().getNode().(IntegerLiteral).getN() = "42"
or
node.(DataFlow::CfgNode).getNode().getNode().(FloatLiteral).getN() = "42.0"
// No support for complex numbers
}
override predicate isSink(DataFlow::Node node) {
exists(CallNode call |
call.getFunction().(NameNode).getId() in ["SINK", "SINK_F"] and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
}