python: some summary flows

This commit is contained in:
Rasmus Lerchedahl Petersen
2022-04-04 15:31:52 +02:00
committed by GitHub
parent 8c263b349f
commit 4024ce4777
8 changed files with 298 additions and 7 deletions

View File

@@ -115,3 +115,63 @@ private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable
}
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
// private module ForTesting {
// private class SummarizedCallableIdentity extends SummarizedCallable {
// SummarizedCallableIdentity() { this = "identity" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "Argument[0]" and
// output = "ReturnValue" and
// preservesValue = true
// }
// }
// // For lambda flow to work, implement lambdaCall and lambdaCreation
// private class SummarizedCallableApplyLambda extends SummarizedCallable {
// SummarizedCallableApplyLambda() { this = "apply_lambda" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "Argument[1]" and
// output = "Parameter[0] of Argument[0]" and
// preservesValue = true
// or
// input = "ReturnValue of Argument[0]" and
// output = "ReturnValue" and
// preservesValue = true
// }
// }
// private class SummarizedCallableReversed extends SummarizedCallable {
// SummarizedCallableReversed() { this = "reversed" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "ListElement of Argument[0]" and
// output = "ListElement of ReturnValue" and
// preservesValue = true
// }
// }
// private class SummarizedCallableMap extends SummarizedCallable {
// SummarizedCallableMap() { this = "map" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "ListElement of Argument[1]" and
// output = "Parameter[0] of Argument[0]" and
// preservesValue = true
// or
// input = "ReturnValue of Argument[0]" and
// output = "ListElement of ReturnValue" and
// preservesValue = true
// }
// }
// // Typetracking needs to use a local flow step not including summaries
// // Typetracking needs to use a call graph not including summaries
// // private class SummarizedCallableJsonLoads extends SummarizedCallable {
// // SummarizedCallableJsonLoads() { this = "json.loads" }
// // override Call getACall() {
// // result = API::moduleImport("json").getMember("loads").getACall().asExpr()
// // }
// // override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// // input = "Argument[0]" and
// // output = "ListElement of ReturnValue" and
// // preservesValue = true
// // }
// // }
// }

View File

@@ -365,6 +365,8 @@ class DataFlowLambda extends DataFlowCallable, TLambda {
override FunctionValue getCallableValue() {
result.getOrigin().getNode() = lambda.getDefinition()
}
Expr getDefinition() { result = lambda.getDefinition() }
}
/** A class representing the scope in which a `ModuleVariableNode` appears. */
@@ -386,6 +388,24 @@ class DataFlowModuleScope extends DataFlowCallable, TModule {
override CallableValue getCallableValue() { none() }
}
class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
LibraryCallable callable;
LibraryCallableValue() { this = TLibraryCallable(callable) }
override string toString() { result = callable.toString() }
override CallNode getACall() { result.getNode() = callable.getACall() }
override Scope getScope() { none() }
override NameNode getParameter(int n) { none() }
override string getName() { result = callable }
override LibraryCallable asLibraryCallable() { result = callable }
}
/**
* IPA type for DataFlowCall.
*
@@ -406,11 +426,15 @@ newtype TDataFlowCall =
TMethodCall(CallNode call) { call = any(FunctionValue f).getAMethodCall() } or
TClassCall(CallNode call) { call = any(ClassValue c | not c.isAbsent()).getACall() } or
TSpecialCall(SpecialMethodCallNode special) or
/** A call to a summarized callable */
TLibraryCall(CallNode call) { call.getNode() = any(LibraryCallable lc).getACall() } or
/** A synthesized inside a summarized callable */
TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
class TDataFlowSourceCall = TFunctionCall or TMethodCall or TClassCall or TSpecialCall;
class TDataFlowSourceCall =
TFunctionCall or TMethodCall or TClassCall or TSpecialCall or TLibraryCall;
/** A call. */
abstract class DataFlowCall extends TDataFlowCall {
@@ -562,6 +586,24 @@ class SpecialCall extends DataFlowSourceCall, TSpecialCall {
}
}
class LibraryCall extends DataFlowSourceCall, TLibraryCall {
CallNode call;
LibraryCallable callable;
LibraryCall() { this = TLibraryCall(call) and call.getNode() = callable.getACall() }
override string toString() { result = call.toString() }
// TODO: Implement Python calling convention?
override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
override ControlFlowNode getNode() { result = call }
override DataFlowCallable getCallable() { result.asLibraryCallable() = callable }
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
}
/**
* A synthesized call inside a callable with a flow summary.
*
@@ -601,7 +643,7 @@ class SummaryParameterNode extends ParameterNode, TSummaryParameterNode {
}
/** A data-flow node used to model flow summaries. */
private class SummaryNode extends Node, TSummaryNode {
class SummaryNode extends Node, TSummaryNode {
private FlowSummaryImpl::Public::SummarizedCallable c;
private FlowSummaryImpl::Private::SummaryNodeState state;
@@ -637,7 +679,14 @@ private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
}
/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowSourceCall call) { result = call.getCallable() }
DataFlowCallable viableCallable(DataFlowSourceCall call) {
result = call.getCallable()
or
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.getNode().getNode() = callable.getACall()
)
}
private newtype TReturnKind = TNormalReturnKind()

View File

@@ -22,7 +22,7 @@ import DataFlowDispatchPointsTo
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
predicate isParameterNode(SourceParameterNode p, DataFlowCallable c, ParameterPosition pos) {
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
p.isParameterOf(c, pos)
}
@@ -892,10 +892,26 @@ predicate nodeIsHidden(Node n) {
class LambdaCallKind = Unit;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
// lambda
kind = kind and
creation.asExpr() = c.(DataFlowLambda).getDefinition()
or
// normal function
kind = kind and
exists(Call call, Name f, FunctionDef def |
f = call.getAnArg() and
def.getDefinedFunction().getName() = f.getId() and
// c.getCallableValue() = def.getDefinedFunction().getDefinition() and
c.getName() = f.getId()
)
}
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
receiver = call.(SummaryCall).getReceiver() and
kind = kind
}
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }

View File

@@ -5,12 +5,19 @@
private import python
private import DataFlowPrivate
import DataFlowPublic
private import FlowSummaryImpl as FlowSummaryImpl
/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
or
// Simple flow through library code is included in the exposed local
// step relation, even though flow is technically inter-procedural
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
}
/**
* Holds if data flows from `source` to `sink` in zero or more local

View File

@@ -0,0 +1,73 @@
private import python
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
private class SummarizedCallableIdentity extends SummarizedCallable {
SummarizedCallableIdentity() { this = "identity" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = true
}
}
// For lambda flow to work, implement lambdaCall and lambdaCreation
private class SummarizedCallableApplyLambda extends SummarizedCallable {
SummarizedCallableApplyLambda() { this = "apply_lambda" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1]" and
output = "Parameter[0] of Argument[0]" and
preservesValue = true
or
input = "ReturnValue of Argument[0]" and
output = "ReturnValue" and
preservesValue = true
}
}
private class SummarizedCallableReversed extends SummarizedCallable {
SummarizedCallableReversed() { this = "reversed" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "ListElement of Argument[0]" and
output = "ListElement of ReturnValue" and
preservesValue = true
}
}
private class SummarizedCallableMap extends SummarizedCallable {
SummarizedCallableMap() { this = "map" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "ListElement of Argument[1]" and
output = "Parameter[0] of Argument[0]" and
preservesValue = true
or
input = "ReturnValue of Argument[0]" and
output = "ListElement of ReturnValue" and
preservesValue = true
}
}
// Typetracking needs to use a local flow step not including summaries
// Typetracking needs to use a call graph not including summaries
// private class SummarizedCallableJsonLoads extends SummarizedCallable {
// SummarizedCallableJsonLoads() { this = "json.loads" }
// override Call getACall() {
// result = API::moduleImport("json").getMember("loads").getACall().asExpr()
// }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "Argument[0]" and
// output = "ListElement of ReturnValue" and
// preservesValue = true
// }
// }

View File

@@ -0,0 +1,19 @@
edges
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | summaries.py:3:7:3:13 | ControlFlowNode for tainted |
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | summaries.py:13:26:13:34 | ControlFlowNode for List |
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | summaries.py:34:7:34:27 | ControlFlowNode for Subscript |
| summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:2:12:2:28 | ControlFlowNode for identity() |
| summaries.py:13:26:13:34 | ControlFlowNode for List | summaries.py:14:7:14:21 | ControlFlowNode for Subscript |
nodes
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | semmle.label | ControlFlowNode for identity() |
| summaries.py:2:21:2:27 | ControlFlowNode for Str | semmle.label | ControlFlowNode for Str |
| summaries.py:3:7:3:13 | ControlFlowNode for tainted | semmle.label | ControlFlowNode for tainted |
| summaries.py:13:26:13:34 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
| summaries.py:14:7:14:21 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| summaries.py:34:7:34:27 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
invalidSpecComponent
#select
| summaries.py:3:7:3:13 | ControlFlowNode for tainted | summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:3:7:3:13 | ControlFlowNode for tainted | $@ | summaries.py:2:21:2:27 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:14:7:14:21 | ControlFlowNode for Subscript | summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:14:7:14:21 | ControlFlowNode for Subscript | $@ | summaries.py:2:21:2:27 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:34:7:34:27 | ControlFlowNode for Subscript | summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:34:7:34:27 | ControlFlowNode for Subscript | $@ | summaries.py:2:21:2:27 | ControlFlowNode for Str | ControlFlowNode for Str |

View File

@@ -0,0 +1,34 @@
# Simple summary
tainted = identity("taint")
sink(tainted)
# Lambda summary
tainted_lambda = apply_lambda(lambda x: x + 1, tainted)
sink(tainted_lambda)
untainted_lambda = apply_lambda(lambda x: 1, tainted)
sink(tainted_lambda) # should not see flow
# Collection summaries
tainted_list = reversed([tainted])
sink(tainted_list[0])
# Complex summaries
def add_colon(x):
return x + ":"
tainted_mapped = map(add_colon, [tainted])
sink(tainted_mapped[0])
def explicit_identity(x):
return x
tainted_mapped_explicit = map(explicit_identity, [tainted])
sink(tainted_mapped_explicit[0])
tainted_mapped_summary = map(identity, [tainted])
sink(tainted_mapped_summary[0])
from json import loads as json_loads
tainted_resultlist = json_loads(tainted)
sink(tainted_resultlist[0])

View File

@@ -0,0 +1,33 @@
/**
* @kind path-problem
*/
import python
import semmle.python.dataflow.new.FlowSummary
import DataFlow::PathGraph
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.internal.FlowSummaryImpl
import semmle.python.ApiGraphs
private import TestSummaries
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
Private::External::invalidSpecComponent(s, c)
}
class Conf extends TaintTracking::Configuration {
Conf() { this = "FlowSummaries" }
override predicate isSource(DataFlow::Node src) { src.asExpr().(StrConst).getS() = "taint" }
override predicate isSink(DataFlow::Node sink) {
exists(Call mc |
mc.getFunc().(Name).getId() = "sink" and
mc.getAnArg() = sink.asExpr()
)
}
}
from DataFlow::PathNode source, DataFlow::PathNode sink, Conf conf
where conf.hasFlowPath(source, sink)
select sink, source, sink, "$@", source, source.toString()