ruby/python: implement shared module

ruby:
- create new shared file `SummaryTypeTracker.qll`
- move much logic into the module
- instantiate the module
- remove old logic, now provided by module

python:
- clone shared file
- instantiate module
- use (some of the) steps provided by the module
This commit is contained in:
Rasmus Lerchedahl Petersen
2023-05-16 14:20:29 +02:00
parent 47b2d48da2
commit 2daa9577bb
6 changed files with 1011 additions and 269 deletions

View File

@@ -0,0 +1,382 @@
/**
* Provides the implementation of a summary type tracker, that is type tracking through flow summaries.
* To use this, you must implement the `Input` signature. You can then use the predicates in the `Output`
* signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`.
*/
/** The classes and predicates needed to generate a summary type tracker. */
signature module Input {
// Dataflow nodes
class Node;
// Content
class TypeTrackerContent;
class TypeTrackerContentFilter;
// Relating content and filters
/**
* Gets a content filter to use for a `WithoutContent[content]` step, or has no result if
* the step should be treated as ordinary flow.
*
* `WithoutContent` is often used to perform strong updates on individual collection elements, but for
* type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful
* for restricting the type of an object, and in these cases we translate it to a filter.
*/
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content);
/**
* Gets a content filter to use for a `WithContent[content]` step, or has no result if
* the step cannot be handled by type-tracking.
*
* `WithContent` is often used to perform strong updates on individual collection elements (or rather
* to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive.
* However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter.
*/
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content);
// Summaries and their stacks
class SummaryComponent;
class SummaryComponentStack {
SummaryComponent head();
}
/** Gets a singleton stack containing `component`. */
SummaryComponentStack singleton(SummaryComponent component);
/**
* Gets the stack obtained by pushing `head` onto `tail`.
*/
SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack);
/** Gets a singleton stack representing a return. */
SummaryComponent return();
// Relating content to summaries
/** Gets a summary component for content `c`. */
SummaryComponent content(TypeTrackerContent contents);
/** Gets a summary component where data is not allowed to be stored in `c`. */
SummaryComponent withoutContent(TypeTrackerContent contents);
/** Gets a summary component where data must be stored in `c`. */
SummaryComponent withContent(TypeTrackerContent contents);
// Callables
class SummarizedCallable {
predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
);
}
// Relating nodes to summaries
/** Gets a dataflow node respresenting the argument of `call` indicated by `arg`. */
Node argumentOf(Node call, SummaryComponent arg);
/** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */
Node parameterOf(Node callable, SummaryComponent param);
/** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */
Node returnOf(Node callable, SummaryComponent return);
// Specific summary handling
/** Holds if component should be treated as a level step by type tracking. */
predicate componentLevelStep(SummaryComponent component);
/** Holds if the given component can't be evaluated by `evaluateSummaryComponentStackLocal`. */
predicate isNonLocal(SummaryComponent component);
// Relating callables to nodes
/** Gets a dataflow node respresenting a call to `callable`. */
Node callTo(SummarizedCallable callable);
}
/**
* The predicates provided by a summary type tracker.
* These are meant to be used in `TypeTrackerSpecific.qll`
* inside the predicates of the same names.
*/
signature module Output<Input I> {
/**
* Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph.
*/
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo);
/**
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
*/
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
/**
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
*/
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
I::TypeTrackerContent storeContent
);
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
*/
predicate basicWithoutContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
);
/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
*/
predicate basicWithContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
);
}
/**
* Implementation of the summary type tracker, that is type tracking through flow summaries.
*/
module SummaryFlow<Input I> implements Output<I> {
pragma[nomagic]
private predicate hasLoadSummary(
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
callable.propagatesFlow(I::push(I::content(contents), input), output, true) and
not I::isNonLocal(input.head()) and
not I::isNonLocal(output.head())
}
pragma[nomagic]
private predicate hasStoreSummary(
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
not I::isNonLocal(input.head()) and
not I::isNonLocal(output.head()) and
(
callable.propagatesFlow(input, I::push(I::content(contents), output), true)
or
// Allow the input to start with an arbitrary WithoutContent[X].
// Since type-tracking only tracks one content deep, and we're about to store into another content,
// we're already preventing the input from being in a content.
callable
.propagatesFlow(I::push(I::withoutContent(_), input),
I::push(I::content(contents), output), true)
)
}
pragma[nomagic]
private predicate hasLoadStoreSummary(
I::SummarizedCallable callable, I::TypeTrackerContent loadContents,
I::TypeTrackerContent storeContents, I::SummaryComponentStack input,
I::SummaryComponentStack output
) {
callable
.propagatesFlow(I::push(I::content(loadContents), input),
I::push(I::content(storeContents), output), true) and
not I::isNonLocal(input.head()) and
not I::isNonLocal(output.head())
}
pragma[nomagic]
private predicate hasWithoutContentSummary(
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
I::SummaryComponentStack input, I::SummaryComponentStack output
) {
exists(I::TypeTrackerContent content |
callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and
filter = I::getFilterFromWithoutContentStep(content) and
not I::isNonLocal(input.head()) and
not I::isNonLocal(output.head()) and
input != output
)
}
pragma[nomagic]
private predicate hasWithContentSummary(
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
I::SummaryComponentStack input, I::SummaryComponentStack output
) {
exists(I::TypeTrackerContent content |
callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and
filter = I::getFilterFromWithContentStep(content) and
not I::isNonLocal(input.head()) and
not I::isNonLocal(output.head()) and
input != output
)
}
/**
* Gets a data flow I::Node corresponding an argument or return value of `call`,
* as specified by `component`.
*/
bindingset[call, component]
private I::Node evaluateSummaryComponentLocal(I::Node call, I::SummaryComponent component) {
result = I::argumentOf(call, component)
or
component = I::return() and
result = call
}
/**
* Holds if `callable` is relevant for type-tracking and we therefore want `stack` to
* be evaluated locally at its call sites.
*/
pragma[nomagic]
private predicate dependsOnSummaryComponentStack(
I::SummarizedCallable callable, I::SummaryComponentStack stack
) {
exists(I::callTo(callable)) and
(
callable.propagatesFlow(stack, _, true)
or
callable.propagatesFlow(_, stack, true)
or
// include store summaries as they may skip an initial step at the input
hasStoreSummary(callable, _, stack, _)
)
or
dependsOnSummaryComponentStackCons(callable, _, stack)
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackCons(
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
) {
dependsOnSummaryComponentStack(callable, I::push(head, tail))
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackConsLocal(
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
) {
dependsOnSummaryComponentStackCons(callable, head, tail) and
not I::isNonLocal(head)
}
pragma[nomagic]
private predicate dependsOnSummaryComponentStackLeaf(
I::SummarizedCallable callable, I::SummaryComponent leaf
) {
dependsOnSummaryComponentStack(callable, I::singleton(leaf))
}
/**
* Gets a data flow I::Node corresponding to the local input or output of `call`
* identified by `stack`, if possible.
*/
pragma[nomagic]
private I::Node evaluateSummaryComponentStackLocal(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack
) {
exists(I::SummaryComponent component |
dependsOnSummaryComponentStackLeaf(callable, component) and
stack = I::singleton(component) and
call = I::callTo(callable) and
result = evaluateSummaryComponentLocal(call, component)
)
or
exists(I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail |
prev = evaluateSummaryComponentStackLocal(callable, call, tail) and
dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head),
pragma[only_bind_out](tail)) and
stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail))
|
result = I::parameterOf(prev, head)
or
result = I::returnOf(prev, head)
or
I::componentLevelStep(head) and
result = prev
)
}
// Implement Output
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
callable.propagatesFlow(input, output, true) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasLoadSummary(callable, content, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasStoreSummary(callable, content, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicLoadStoreStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
I::TypeTrackerContent storeContent
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicWithoutContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
predicate basicWithContentStep(
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
) {
exists(
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
I::SummaryComponentStack output
|
hasWithContentSummary(callable, filter, pragma[only_bind_into](input),
pragma[only_bind_into](output)) and
call = I::callTo(callable) and
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input) and
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output)
)
}
}

View File

@@ -61,7 +61,9 @@ predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { none() }
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) {
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
}
/**
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
@@ -108,6 +110,12 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
nodeFrom = a.getValue() and
nodeTo = a.getObject()
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
)
}
/**
@@ -119,13 +127,24 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
nodeFrom = a.getObject() and
nodeTo = a
)
or
exists(DataFlowPublic::ContentSet contents |
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
)
}
/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) {
none()
exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents |
loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and
storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent
|
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents)
)
}
/**
@@ -144,3 +163,108 @@ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter)
class Boolean extends boolean {
Boolean() { this = true or this = false }
}
private import SummaryTypeTracker as SummaryTypeTracker
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
pragma[noinline]
private predicate argumentPositionMatch(
DataFlowPublic::CallCfgNode call, DataFlowPublic::ArgumentNode arg,
DataFlowDispatch::ParameterPosition ppos
) {
exists(DataFlowDispatch::ArgumentPosition apos, DataFlowPrivate::DataFlowCall c |
c.getNode() = call.asCfgNode() and
arg.argumentOf(c, apos) and
DataFlowDispatch::parameterMatch(ppos, apos)
)
}
module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
// Dataflow nodes
class Node = DataFlowPublic::Node;
// Content
class TypeTrackerContent = DataFlowPublic::ContentSet;
class TypeTrackerContentFilter = ContentFilter;
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { none() }
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() }
// Callables
class SummarizedCallable = FlowSummary::SummarizedCallable;
// Summaries and their stacks
class SummaryComponent = FlowSummary::SummaryComponent;
class SummaryComponentStack = FlowSummary::SummaryComponentStack;
SummaryComponentStack singleton(SummaryComponent component) {
result = FlowSummary::SummaryComponentStack::singleton(component)
}
SummaryComponentStack push(SummaryComponent component, SummaryComponentStack stack) {
result = FlowSummary::SummaryComponentStack::push(component, stack)
}
// Relating content to summaries
SummaryComponent content(TypeTrackerContent contents) {
result = FlowSummary::SummaryComponent::content(contents)
}
SummaryComponent withoutContent(TypeTrackerContent contents) { none() }
SummaryComponent withContent(TypeTrackerContent contents) { none() }
SummaryComponent return() { result = FlowSummary::SummaryComponent::return() }
// Relating nodes to summaries
Node argumentOf(Node call, SummaryComponent arg) {
exists(DataFlowDispatch::ParameterPosition pos |
arg = FlowSummary::SummaryComponent::argument(pos) and
argumentPositionMatch(call, result, pos)
)
}
Node parameterOf(Node callable, SummaryComponent param) {
exists(
DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p
|
param = FlowSummary::SummaryComponent::parameter(apos) and
DataFlowDispatch::parameterMatch(ppos, apos) and
// pick the SsaNode rather than the CfgNode
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
(
exists(int i | ppos.isPositional(i) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)
)
or
exists(string name | ppos.isKeyword(name) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name)
)
)
)
}
Node returnOf(Node callable, SummaryComponent return) {
return = FlowSummary::SummaryComponent::return() and
// result should be return value of callable which should be a lambda
result.asCfgNode() =
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
}
// Specific summary handling
predicate componentLevelStep(SummaryComponent component) { none() }
pragma[nomagic]
predicate isNonLocal(SummaryComponent component) {
component = FlowSummary::SummaryComponent::content(_)
}
// Relating callables to nodes
Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() }
}
module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>;

View File

@@ -3,7 +3,7 @@ import os
# Simple summary
tainted = identity(tracked) # $ tracked
tainted # $ MISSING: tracked
tainted # $ tracked
# Lambda summary
# I think the missing result is expected because type tracking
@@ -13,7 +13,7 @@ tainted_lambda # $ MISSING: tracked
# A lambda that directly introduces taint
bad_lambda = apply_lambda(lambda x: tracked, 1) # $ tracked
bad_lambda # $ MISSING: tracked
bad_lambda # $ tracked
# A lambda that breaks the flow
untainted_lambda = apply_lambda(lambda x: 1, tracked) # $ tracked
@@ -52,9 +52,9 @@ tr = tainted_resultlist[0]
tr # $ MISSING: tracked
x.secret = tracked # $ tracked=secret tracked
r = read_secret(x) # $ tracked=secret MISSING: tracked
r # $ MISSING: tracked
r = read_secret(x) # $ tracked=secret tracked
r # $ tracked
y # $ MISSING: tracked=secret
set_secret(y, tracked) # $ tracked MISSING: tracked=secret
y.secret # $ MISSING: tracked tracked=secret
y # $ tracked=secret
set_secret(y, tracked) # $ tracked tracked=secret
y.secret # $ tracked tracked=secret