Merge pull request #13178 from yoff/python-ruby/track-through-summaries-pm

ruby/python: Shared module for typetracking through flow summaries
This commit is contained in:
yoff
2023-06-20 11:19:45 +02:00
committed by GitHub
13 changed files with 1314 additions and 264 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Type tracking is now aware of flow summaries. This leads to a richer API graph, and may lead to more results in some queries.

View File

@@ -251,6 +251,9 @@ abstract class LibraryCallable extends string {
/** Gets a call to this library callable. */
abstract CallCfgNode getACall();
/** Same as `getACall` but without referring to the call graph or API graph. */
CallCfgNode getACallSimple() { none() }
/** Gets a data-flow node, where this library callable is used as a call-back. */
abstract ArgumentNode getACallback();
}

View File

@@ -0,0 +1,391 @@
/**
* Provides the implementation of type tracking steps through flow summaries.
* To use this, you must implement the `Input` signature. You can then use the predicates in the `Output`
* signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`.
*/
/** The classes and predicates needed to generate type-tracking steps from summaries. */
signature module Input {
// Dataflow nodes
class Node;
// Content
class TypeTrackerContent;
class TypeTrackerContentFilter;
// Relating content and filters
/**
* Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`)
* or has no result if
* the step should be treated as ordinary flow.
*
* `WithoutContent` is often used to perform strong updates on individual collection elements, but for
* type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful
* for restricting the type of an object, and in these cases we translate it to a filter.
*/
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content);
/**
* Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`)
* or has no result if
* the step cannot be handled by type-tracking.
*
* `WithContent` is often used to perform strong updates on individual collection elements (or rather
* to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive.
* However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter.
*/
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content);
// Summaries and their stacks
class SummaryComponent;
class SummaryComponentStack {
SummaryComponent head();
}
/** Gets a singleton stack containing `component`. */
SummaryComponentStack singleton(SummaryComponent component);
/**
* Gets the stack obtained by pushing `head` onto `tail`.
*/
SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail);
/** Gets a singleton stack representing a return. */
SummaryComponent return();
// Relating content to summaries
/** Gets a summary component for content `c`. */
SummaryComponent content(TypeTrackerContent contents);
/** Gets a summary component where data is not allowed to be stored in `contents`. */
SummaryComponent withoutContent(TypeTrackerContent contents);
/** Gets a summary component where data must be stored in `contents`. */
SummaryComponent withContent(TypeTrackerContent contents);
// Callables
class SummarizedCallable {
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
);
}
// Relating nodes to summaries
/** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */
Node argumentOf(Node call, SummaryComponent arg);
/** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */
Node parameterOf(Node callable, SummaryComponent param);
/** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */
Node returnOf(Node callable, SummaryComponent return);
// Relating callables to nodes
/** Gets a dataflow node respresenting a call to `callable`. */
Node callTo(SummarizedCallable callable);
}
/**
* The predicates provided by a summary type tracker.
* These are meant to be used in `TypeTrackerSpecific.qll`
* inside the predicates of the same names.
*/
signature module Output<Input I> {
/**
* Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph.
*/
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo);
/**
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
*/
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
/**
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
*/
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
I::TypeTrackerContent storeContent
);
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
*/
predicate basicWithoutContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
);
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
*/
predicate basicWithContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
);
}
/**
* Implementation of the summary type tracker, that is type tracking through flow summaries.
*/
module SummaryFlow<Input I> implements Output<I> {
pragma[nomagic]
private predicate isNonLocal(I::SummaryComponent component) {
component = I::content(_)
or
component = I::withContent(_)
}
pragma[nomagic]
private predicate hasLoadSummary(
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
callable.propagatesFlow(I::push(I::content(contents), input), output, true) and
not isNonLocal(input.head()) and
not isNonLocal(output.head())
}
pragma[nomagic]
private predicate hasStoreSummary(
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
not isNonLocal(input.head()) and
not isNonLocal(output.head()) and
(
callable.propagatesFlow(input, I::push(I::content(contents), output), true)
or
// Allow the input to start with an arbitrary WithoutContent[X].
// Since type-tracking only tracks one content deep, and we're about to store into another content,
// we're already preventing the input from being in a content.
callable
.propagatesFlow(I::push(I::withoutContent(_), input),
I::push(I::content(contents), output), true)
)
}
pragma[nomagic]
private predicate hasLoadStoreSummary(
I::SummarizedCallable callable, I::TypeTrackerContent loadContents,
I::TypeTrackerContent storeContents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
callable
.propagatesFlow(I::push(I::content(loadContents), input),
I::push(I::content(storeContents), output), true) and
not isNonLocal(input.head()) and
not isNonLocal(output.head())
}
pragma[nomagic]
private predicate hasWithoutContentSummary(
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
I::SummaryComponentStack input, I::SummaryComponentStack output
) {
exists(I::TypeTrackerContent content |
callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and
filter = I::getFilterFromWithoutContentStep(content) and
not isNonLocal(input.head()) and
not isNonLocal(output.head()) and
input != output
)
}
pragma[nomagic]
private predicate hasWithContentSummary(
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
I::SummaryComponentStack input, I::SummaryComponentStack output
) {
exists(I::TypeTrackerContent content |
callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and
filter = I::getFilterFromWithContentStep(content) and
not isNonLocal(input.head()) and
not isNonLocal(output.head()) and
input != output
)
}
private predicate componentLevelStep(I::SummaryComponent component) {
exists(I::TypeTrackerContent content |
component = I::withoutContent(content) and
not exists(I::getFilterFromWithoutContentStep(content))
)
}
/**
* Gets a data flow `I::Node` corresponding an argument or return value of `call`,
* as specified by `component`.
*/
bindingset[call, component]
private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) {
result = I::argumentOf(call, component)
or
component = I::return() and
result = call
}
/**
* Holds if `callable` is relevant for type-tracking and we therefore want `stack` to
* be evaluated locally at its call sites.
*/
pragma[nomagic]
private predicate dependsOnSummaryComponentStack(
I::SummarizedCallable callable, I::SummaryComponentStack stack
) {
exists(I::callTo(callable)) and
(
callable.propagatesFlow(stack, _, true)
or
callable.propagatesFlow(_, stack, true)
or
// include store summaries as they may skip an initial step at the input
hasStoreSummary(callable, _, stack, _)
)
or
dependsOnSummaryComponentStackCons(callable, _, stack)
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackCons(
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
) {
dependsOnSummaryComponentStack(callable, I::push(head, tail))
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackConsLocal(
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
) {
dependsOnSummaryComponentStackCons(callable, head, tail) and
not isNonLocal(head)
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackLeaf(
I::SummarizedCallable callable, I::SummaryComponent leaf
) {
dependsOnSummaryComponentStack(callable, I::singleton(leaf))
}
/**
* Gets a data flow I::Node corresponding to the local input or output of `call`
* identified by `stack`, if possible.
*/
pragma[nomagic]
private I::Node evaluateSummaryComponentStackLocal(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack
) {
exists(I::SummaryComponent component |
dependsOnSummaryComponentStackLeaf(callable, component) and
stack = I::singleton(component) and
call = I::callTo(callable) and
result = evaluateSummaryComponentLocal(call, component)
)
or
exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail |
prev = evaluateSummaryComponentStackLocal(callable, call, tail) and
dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head),
pragma[only_bind_out](tail)) and
stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail))
|
result = I::parameterOf(prev, head)
or
result = I::returnOf(prev, head)
or
componentLevelStep(head) and
result = prev
)
}
// Implement Output
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
callable.propagatesFlow(input, output, true) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasLoadSummary(callable, content, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasStoreSummary(callable, content, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicLoadStoreStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
I::TypeTrackerContent storeContent
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicWithoutContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicWithContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasWithContentSummary(callable, filter, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
}

View File

@@ -61,7 +61,9 @@ predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { none() }
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) {
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
}
/**
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
@@ -108,6 +110,12 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
nodeFrom = a.getValue() and
nodeTo = a.getObject()
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
)
}
/**
@@ -119,13 +127,24 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
nodeFrom = a.getObject() and
nodeTo = a
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
)
}
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) {
none()
exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents |
loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and
storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent
|
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents)
)
}
/**
@@ -144,3 +163,93 @@ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter)
class Boolean extends boolean {
Boolean() { this = true or this = false }
}
private import SummaryTypeTracker as SummaryTypeTracker
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
pragma[noinline]
private predicate argumentPositionMatch(
DataFlowPublic::CallCfgNode call, DataFlowPublic::Node arg,
DataFlowDispatch::ParameterPosition ppos
) {
exists(DataFlowDispatch::ArgumentPosition apos |
DataFlowDispatch::parameterMatch(ppos, apos) and
DataFlowDispatch::normalCallArg(call.getNode(), arg, apos)
)
}
private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
// Dataflow nodes
class Node = DataFlowPublic::Node;
// Content
class TypeTrackerContent = DataFlowPublic::ContentSet;
class TypeTrackerContentFilter = ContentFilter;
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { none() }
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() }
// Callables
class SummarizedCallable = FlowSummary::SummarizedCallable;
// Summaries and their stacks
class SummaryComponent = FlowSummary::SummaryComponent;
class SummaryComponentStack = FlowSummary::SummaryComponentStack;
predicate singleton = FlowSummary::SummaryComponentStack::singleton/1;
predicate push = FlowSummary::SummaryComponentStack::push/2;
// Relating content to summaries
predicate content = FlowSummary::SummaryComponent::content/1;
SummaryComponent withoutContent(TypeTrackerContent contents) { none() }
SummaryComponent withContent(TypeTrackerContent contents) { none() }
predicate return = FlowSummary::SummaryComponent::return/0;
// Relating nodes to summaries
Node argumentOf(Node call, SummaryComponent arg) {
exists(DataFlowDispatch::ParameterPosition pos |
arg = FlowSummary::SummaryComponent::argument(pos) and
argumentPositionMatch(call, result, pos)
)
}
Node parameterOf(Node callable, SummaryComponent param) {
exists(
DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p
|
param = FlowSummary::SummaryComponent::parameter(apos) and
DataFlowDispatch::parameterMatch(ppos, apos) and
// pick the SsaNode rather than the CfgNode
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
(
exists(int i | ppos.isPositional(i) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)
)
or
exists(string name | ppos.isKeyword(name) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name)
)
)
)
}
Node returnOf(Node callable, SummaryComponent return) {
return = FlowSummary::SummaryComponent::return() and
// `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
result.asCfgNode() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
}
// Relating callables to nodes
Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() }
}
private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>;

View File

@@ -60,7 +60,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
}
private class SummarizedCallableReversed extends SummarizedCallable {
SummarizedCallableReversed() { this = "reversed" }
SummarizedCallableReversed() { this = "list_reversed" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this

View File

@@ -66,3 +66,21 @@ SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]"
from json import loads as json_loads
tainted_resultlist = json_loads(SOURCE)
SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]"
# Class methods are not handled right now
class MyClass:
@staticmethod
def foo(x):
return x
def bar(self, x):
return x
through_staticmethod = apply_lambda(MyClass.foo, SOURCE)
through_staticmethod # $ MISSING: flow
mc = MyClass()
through_method = apply_lambda(mc.bar, SOURCE)
through_method # $ MISSING: flow

View File

@@ -0,0 +1,189 @@
private import python
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
/**
* This module ensures that the `callStep` predicate in
* our type tracker implementation does not refer to the
* `getACall` predicate on `SummarizedCallable`.
*/
module RecursionGuard {
private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT
private class RecursionGuard extends SummarizedCallable {
RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this and
(TT::callStep(_, _) implies any())
}
override DataFlow::CallCfgNode getACallSimple() { none() }
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
}
predicate test(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
TT::levelStepNoCall(nodeFrom, nodeTo)
}
}
private class SummarizedCallableIdentity extends SummarizedCallable {
SummarizedCallableIdentity() { this = "TTS_identity" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = true
}
}
// For lambda flow to work, implement lambdaCall and lambdaCreation
private class SummarizedCallableApplyLambda extends SummarizedCallable {
SummarizedCallableApplyLambda() { this = "TTS_apply_lambda" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1]" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue" and
preservesValue = true
}
}
private class SummarizedCallableReversed extends SummarizedCallable {
SummarizedCallableReversed() { this = "TTS_reversed" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0].ListElement" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
private class SummarizedCallableMap extends SummarizedCallable {
SummarizedCallableMap() { this = "TTS_list_map" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1].ListElement" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
private class SummarizedCallableAppend extends SummarizedCallable {
SummarizedCallableAppend() { this = "TTS_append_to_list" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
or
input = "Argument[1]" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
private class SummarizedCallableJsonLoads extends SummarizedCallable {
SummarizedCallableJsonLoads() { this = "TTS_json.loads" }
override DataFlow::CallCfgNode getACall() {
result = API::moduleImport("json").getMember("loads").getACall()
}
override DataFlow::CallCfgNode getACallSimple() { none() }
override DataFlow::ArgumentNode getACallback() {
result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
// read and store
private class SummarizedCallableReadSecret extends SummarizedCallable {
SummarizedCallableReadSecret() { this = "TTS_read_secret" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0].Attribute[secret]" and
output = "ReturnValue" and
preservesValue = true
}
}
private class SummarizedCallableSetSecret extends SummarizedCallable {
SummarizedCallableSetSecret() { this = "TTS_set_secret" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1]" and
output = "Argument[0].Attribute[secret]" and
preservesValue = true
}
}

View File

@@ -0,0 +1,78 @@
import sys
import os
# Simple summary
tainted = TTS_identity(tracked) # $ tracked
tainted # $ tracked
# Lambda summary
# I think the missing result is expected because type tracking
# is not allowed to flow back out of a call.
tainted_lambda = TTS_apply_lambda(lambda x: x, tracked) # $ tracked
tainted_lambda # $ MISSING: tracked
# A lambda that directly introduces taint
bad_lambda = TTS_apply_lambda(lambda x: tracked, 1) # $ tracked
bad_lambda # $ tracked
# A lambda that breaks the flow
untainted_lambda = TTS_apply_lambda(lambda x: 1, tracked) # $ tracked
untainted_lambda
# Collection summaries
tainted_list = TTS_reversed([tracked]) # $ tracked
tl = tainted_list[0]
tl # $ MISSING: tracked
# Complex summaries
def add_colon(x):
return x + ":"
tainted_mapped = TTS_list_map(add_colon, [tracked]) # $ tracked
tm = tainted_mapped[0]
tm # $ MISSING: tracked
def explicit_identity(x):
return x
tainted_mapped_explicit = TTS_list_map(explicit_identity, [tracked]) # $ tracked
tainted_mapped_explicit[0] # $ MISSING: tracked
tainted_mapped_summary = TTS_list_map(identity, [tracked]) # $ tracked
tms = tainted_mapped_summary[0]
tms # $ MISSING: tracked
another_tainted_list = TTS_append_to_list([], tracked) # $ tracked
atl = another_tainted_list[0]
atl # $ MISSING: tracked
# This will not work, as the call is not found by `getACallSimple`.
from json import loads as json_loads
tainted_resultlist = json_loads(tracked) # $ tracked
tr = tainted_resultlist[0]
tr # $ MISSING: tracked
x.secret = tracked # $ tracked=secret tracked
r = TTS_read_secret(x) # $ tracked=secret tracked
r # $ tracked
y # $ tracked=secret
TTS_set_secret(y, tracked) # $ tracked tracked=secret
y.secret # $ tracked tracked=secret
# Class methods are not handled right now
class MyClass:
@staticmethod
def foo(x):
return x
def bar(self, x):
return x
through_staticmethod = TTS_apply_lambda(MyClass.foo, tracked) # $ tracked
through_staticmethod # $ MISSING: tracked
mc = MyClass()
through_method = TTS_apply_lambda(mc.bar, tracked) # $ tracked
through_method # $ MISSING: tracked

View File

@@ -0,0 +1,36 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TypeTracker
import TestUtilities.InlineExpectationsTest
import semmle.python.ApiGraphs
import TestSummaries
// -----------------------------------------------------------------------------
// tracked
// -----------------------------------------------------------------------------
private DataFlow::TypeTrackingNode tracked(TypeTracker t) {
t.start() and
result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
or
exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
}
class TrackedTest extends InlineExpectationsTest {
TrackedTest() { this = "TrackedTest" }
override string getARelevantTag() { result = "tracked" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node e, TypeTracker t |
exists(e.getLocation().getFile().getRelativePath()) and
e.getLocation().getStartLine() > 0 and
tracked(t).flowsTo(e) and
// Module variables have no sensible location, and hence can't be annotated.
not e instanceof DataFlow::ModuleVariableNode and
tag = "tracked" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}