python: use new syntax for flow summaries

also convert to inline tests
This commit is contained in:
Rasmus Lerchedahl Petersen
2022-04-05 08:58:28 +02:00
committed by GitHub
parent 4024ce4777
commit 177dea5307
9 changed files with 216 additions and 112 deletions

View File

@@ -7,7 +7,9 @@ private import internal.DataFlowUtil
private import internal.DataFlowPrivate
// import all instances below
private module Summaries { }
private module Summaries {
// private import TestSummaries
}
class SummaryComponent = Impl::Public::SummaryComponent;
@@ -115,63 +117,3 @@ private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable
}
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
// private module ForTesting {
// private class SummarizedCallableIdentity extends SummarizedCallable {
// SummarizedCallableIdentity() { this = "identity" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "Argument[0]" and
// output = "ReturnValue" and
// preservesValue = true
// }
// }
// // For lambda flow to work, implement lambdaCall and lambdaCreation
// private class SummarizedCallableApplyLambda extends SummarizedCallable {
// SummarizedCallableApplyLambda() { this = "apply_lambda" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "Argument[1]" and
// output = "Parameter[0] of Argument[0]" and
// preservesValue = true
// or
// input = "ReturnValue of Argument[0]" and
// output = "ReturnValue" and
// preservesValue = true
// }
// }
// private class SummarizedCallableReversed extends SummarizedCallable {
// SummarizedCallableReversed() { this = "reversed" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "ListElement of Argument[0]" and
// output = "ListElement of ReturnValue" and
// preservesValue = true
// }
// }
// private class SummarizedCallableMap extends SummarizedCallable {
// SummarizedCallableMap() { this = "map" }
// override Call getACall() { result.getFunc().(Name).getId() = this }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "ListElement of Argument[1]" and
// output = "Parameter[0] of Argument[0]" and
// preservesValue = true
// or
// input = "ReturnValue of Argument[0]" and
// output = "ListElement of ReturnValue" and
// preservesValue = true
// }
// }
// // Typetracking needs to use a local flow step not including summaries
// // Typetracking needs to use a call graph not including summaries
// // private class SummarizedCallableJsonLoads extends SummarizedCallable {
// // SummarizedCallableJsonLoads() { this = "json.loads" }
// // override Call getACall() {
// // result = API::moduleImport("json").getMember("loads").getACall().asExpr()
// // }
// // override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// // input = "Argument[0]" and
// // output = "ListElement of ReturnValue" and
// // preservesValue = true
// // }
// // }
// }

View File

@@ -0,0 +1,73 @@
private import python
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
private class SummarizedCallableIdentity extends SummarizedCallable {
SummarizedCallableIdentity() { this = "identity" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = true
}
}
// For lambda flow to work, implement lambdaCall and lambdaCreation
private class SummarizedCallableApplyLambda extends SummarizedCallable {
SummarizedCallableApplyLambda() { this = "apply_lambda" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1]" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue" and
preservesValue = true
}
}
private class SummarizedCallableReversed extends SummarizedCallable {
SummarizedCallableReversed() { this = "reversed" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0].ListElement" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
private class SummarizedCallableMap extends SummarizedCallable {
SummarizedCallableMap() { this = "map" }
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1].ListElement" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
// Typetracking needs to use a local flow step not including summaries
// Typetracking needs to use a call graph not including summaries
// private class SummarizedCallableJsonLoads extends SummarizedCallable {
// SummarizedCallableJsonLoads() { this = "json.loads" }
// override Call getACall() {
// result = API::moduleImport("json").getMember("loads").getACall().asExpr()
// }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "Argument[0]" and
// output = "ReturnValue.ListElement" and
// preservesValue = true
// }
// }

View File

@@ -54,10 +54,12 @@ predicate summaryElement(DataFlowCallable c, string input, string output, string
/**
* Gets the summary component for specification component `c`, if any.
*
* This covers all the Python-specific components of a flow summary, and
* is currently empty.
* This covers all the Python-specific components of a flow summary.
*/
SummaryComponent interpretComponentSpecific(AccessPathToken c) {
c = "Argument[_]" and // Ruby has this
result = FlowSummary::SummaryComponent::argument(any(ParameterPosition pos | pos.isPositional(_)))
or
c = "ListElement" and
result = FlowSummary::SummaryComponent::listElement()
}

View File

@@ -0,0 +1,2 @@
missingAnnotationOnSINK
failures

View File

@@ -0,0 +1,3 @@
import python
private import TestSummaries
import experimental.dataflow.TestUtil.NormalDataflowTest

View File

@@ -22,10 +22,10 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1]" and
output = "Parameter[0] of Argument[0]" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "ReturnValue of Argument[0]" and
input = "Argument[0].ReturnValue" and
output = "ReturnValue" and
preservesValue = true
}
@@ -37,8 +37,8 @@ private class SummarizedCallableReversed extends SummarizedCallable {
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "ListElement of Argument[0]" and
output = "ListElement of ReturnValue" and
input = "Argument[0].ListElement" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
@@ -49,12 +49,12 @@ private class SummarizedCallableMap extends SummarizedCallable {
override Call getACall() { result.getFunc().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "ListElement of Argument[1]" and
output = "Parameter[0] of Argument[0]" and
input = "Argument[1].ListElement" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "ReturnValue of Argument[0]" and
output = "ListElement of ReturnValue" and
input = "Argument[0].ReturnValue" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
@@ -67,7 +67,7 @@ private class SummarizedCallableMap extends SummarizedCallable {
// }
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// input = "Argument[0]" and
// output = "ListElement of ReturnValue" and
// output = "ReturnValue.ListElement" and
// preservesValue = true
// }
// }

View File

@@ -1,19 +1,70 @@
edges
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | summaries.py:3:7:3:13 | ControlFlowNode for tainted |
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | summaries.py:13:26:13:34 | ControlFlowNode for List |
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | summaries.py:34:7:34:27 | ControlFlowNode for Subscript |
| summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:2:12:2:28 | ControlFlowNode for identity() |
| summaries.py:13:26:13:34 | ControlFlowNode for List | summaries.py:14:7:14:21 | ControlFlowNode for Subscript |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:32:20:32:25 | ControlFlowNode for SOURCE |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:36:48:36:53 | ControlFlowNode for SOURCE |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:44:25:44:32 | ControlFlowNode for List |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:44:26:44:31 | ControlFlowNode for SOURCE |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:51:34:51:39 | ControlFlowNode for SOURCE |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:57:51:57:56 | ControlFlowNode for SOURCE |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:60:41:60:46 | ControlFlowNode for SOURCE |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:65:6:65:26 | ControlFlowNode for Subscript |
| summaries.py:32:11:32:26 | ControlFlowNode for identity() | summaries.py:33:6:33:12 | ControlFlowNode for tainted |
| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | summaries.py:32:11:32:26 | ControlFlowNode for identity() |
| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda |
| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() |
| summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] |
| summaries.py:44:25:44:32 | ControlFlowNode for List | summaries.py:45:6:45:20 | ControlFlowNode for Subscript |
| summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] |
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | summaries.py:44:25:44:32 | ControlFlowNode for List [List element] |
| summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | summaries.py:45:6:45:20 | ControlFlowNode for Subscript |
| summaries.py:51:18:51:41 | ControlFlowNode for map() [List element] | summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] |
| summaries.py:51:33:51:40 | ControlFlowNode for List [List element] | summaries.py:51:18:51:41 | ControlFlowNode for map() [List element] |
| summaries.py:51:34:51:39 | ControlFlowNode for SOURCE | summaries.py:51:33:51:40 | ControlFlowNode for List [List element] |
| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | summaries.py:52:6:52:22 | ControlFlowNode for Subscript |
| summaries.py:57:27:57:58 | ControlFlowNode for map() [List element] | summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] |
| summaries.py:57:50:57:57 | ControlFlowNode for List [List element] | summaries.py:57:27:57:58 | ControlFlowNode for map() [List element] |
| summaries.py:57:51:57:56 | ControlFlowNode for SOURCE | summaries.py:57:50:57:57 | ControlFlowNode for List [List element] |
| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | summaries.py:58:6:58:31 | ControlFlowNode for Subscript |
| summaries.py:60:26:60:48 | ControlFlowNode for map() [List element] | summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] |
| summaries.py:60:40:60:47 | ControlFlowNode for List [List element] | summaries.py:60:26:60:48 | ControlFlowNode for map() [List element] |
| summaries.py:60:41:60:46 | ControlFlowNode for SOURCE | summaries.py:60:40:60:47 | ControlFlowNode for List [List element] |
| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | summaries.py:61:6:61:30 | ControlFlowNode for Subscript |
nodes
| summaries.py:2:12:2:28 | ControlFlowNode for identity() | semmle.label | ControlFlowNode for identity() |
| summaries.py:2:21:2:27 | ControlFlowNode for Str | semmle.label | ControlFlowNode for Str |
| summaries.py:3:7:3:13 | ControlFlowNode for tainted | semmle.label | ControlFlowNode for tainted |
| summaries.py:13:26:13:34 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
| summaries.py:14:7:14:21 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| summaries.py:34:7:34:27 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| summaries.py:10:10:10:17 | ControlFlowNode for Str | semmle.label | ControlFlowNode for Str |
| summaries.py:32:11:32:26 | ControlFlowNode for identity() | semmle.label | ControlFlowNode for identity() |
| summaries.py:32:20:32:25 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
| summaries.py:33:6:33:12 | ControlFlowNode for tainted | semmle.label | ControlFlowNode for tainted |
| summaries.py:36:18:36:54 | ControlFlowNode for apply_lambda() | semmle.label | ControlFlowNode for apply_lambda() |
| summaries.py:36:48:36:53 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | semmle.label | ControlFlowNode for tainted_lambda |
| summaries.py:44:16:44:33 | ControlFlowNode for reversed() [List element] | semmle.label | ControlFlowNode for reversed() [List element] |
| summaries.py:44:25:44:32 | ControlFlowNode for List | semmle.label | ControlFlowNode for List |
| summaries.py:44:25:44:32 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
| summaries.py:44:26:44:31 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
| summaries.py:45:6:45:17 | ControlFlowNode for tainted_list [List element] | semmle.label | ControlFlowNode for tainted_list [List element] |
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| summaries.py:51:18:51:41 | ControlFlowNode for map() [List element] | semmle.label | ControlFlowNode for map() [List element] |
| summaries.py:51:33:51:40 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
| summaries.py:51:34:51:39 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
| summaries.py:52:6:52:19 | ControlFlowNode for tainted_mapped [List element] | semmle.label | ControlFlowNode for tainted_mapped [List element] |
| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| summaries.py:57:27:57:58 | ControlFlowNode for map() [List element] | semmle.label | ControlFlowNode for map() [List element] |
| summaries.py:57:50:57:57 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
| summaries.py:57:51:57:56 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
| summaries.py:58:6:58:28 | ControlFlowNode for tainted_mapped_explicit [List element] | semmle.label | ControlFlowNode for tainted_mapped_explicit [List element] |
| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| summaries.py:60:26:60:48 | ControlFlowNode for map() [List element] | semmle.label | ControlFlowNode for map() [List element] |
| summaries.py:60:40:60:47 | ControlFlowNode for List [List element] | semmle.label | ControlFlowNode for List [List element] |
| summaries.py:60:41:60:46 | ControlFlowNode for SOURCE | semmle.label | ControlFlowNode for SOURCE |
| summaries.py:61:6:61:27 | ControlFlowNode for tainted_mapped_summary [List element] | semmle.label | ControlFlowNode for tainted_mapped_summary [List element] |
| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| summaries.py:65:6:65:26 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
subpaths
invalidSpecComponent
#select
| summaries.py:3:7:3:13 | ControlFlowNode for tainted | summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:3:7:3:13 | ControlFlowNode for tainted | $@ | summaries.py:2:21:2:27 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:14:7:14:21 | ControlFlowNode for Subscript | summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:14:7:14:21 | ControlFlowNode for Subscript | $@ | summaries.py:2:21:2:27 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:34:7:34:27 | ControlFlowNode for Subscript | summaries.py:2:21:2:27 | ControlFlowNode for Str | summaries.py:34:7:34:27 | ControlFlowNode for Subscript | $@ | summaries.py:2:21:2:27 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:33:6:33:12 | ControlFlowNode for tainted | summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:33:6:33:12 | ControlFlowNode for tainted | $@ | summaries.py:10:10:10:17 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:37:6:37:19 | ControlFlowNode for tainted_lambda | $@ | summaries.py:10:10:10:17 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:45:6:45:20 | ControlFlowNode for Subscript | summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:45:6:45:20 | ControlFlowNode for Subscript | $@ | summaries.py:10:10:10:17 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:52:6:52:22 | ControlFlowNode for Subscript | summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:52:6:52:22 | ControlFlowNode for Subscript | $@ | summaries.py:10:10:10:17 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:58:6:58:31 | ControlFlowNode for Subscript | summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:58:6:58:31 | ControlFlowNode for Subscript | $@ | summaries.py:10:10:10:17 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:61:6:61:30 | ControlFlowNode for Subscript | summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:61:6:61:30 | ControlFlowNode for Subscript | $@ | summaries.py:10:10:10:17 | ControlFlowNode for Str | ControlFlowNode for Str |
| summaries.py:65:6:65:26 | ControlFlowNode for Subscript | summaries.py:10:10:10:17 | ControlFlowNode for Str | summaries.py:65:6:65:26 | ControlFlowNode for Subscript | $@ | summaries.py:10:10:10:17 | ControlFlowNode for Str | ControlFlowNode for Str |

View File

@@ -1,34 +1,65 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# Simple summary
tainted = identity("taint")
sink(tainted)
tainted = identity(SOURCE)
SINK(tainted) # $ flow="SOURCE, l:-1 -> tainted"
# Lambda summary
tainted_lambda = apply_lambda(lambda x: x + 1, tainted)
sink(tainted_lambda)
# Lambda summary
tainted_lambda = apply_lambda(lambda x: x + 1, SOURCE)
SINK(tainted_lambda) # $ flow="SOURCE, l:-1 -> tainted_lambda"
untainted_lambda = apply_lambda(lambda x: 1, tainted)
sink(tainted_lambda) # should not see flow
# A lambda that breaks the flow
untainted_lambda = apply_lambda(lambda x: 1, SOURCE)
SINK_F(untainted_lambda) # $ SPURIOUS: flow="SOURCE, l:-1 -> untainted_lambda"
# Collection summaries
tainted_list = reversed([tainted])
sink(tainted_list[0])
# Collection summaries
tainted_list = reversed([SOURCE])
SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]"
# Complex summaries
def add_colon(x):
return x + ":"
# Complex summaries
def add_colon(x):
return x + ":"
tainted_mapped = map(add_colon, [tainted])
sink(tainted_mapped[0])
tainted_mapped = map(add_colon, [SOURCE])
SINK(tainted_mapped[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped[0]"
def explicit_identity(x):
return x
def explicit_identity(x):
return x
tainted_mapped_explicit = map(explicit_identity, [tainted])
sink(tainted_mapped_explicit[0])
tainted_mapped_explicit = map(explicit_identity, [SOURCE])
SINK(tainted_mapped_explicit[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_explicit[0]"
tainted_mapped_summary = map(identity, [tainted])
sink(tainted_mapped_summary[0])
tainted_mapped_summary = map(identity, [SOURCE])
SINK(tainted_mapped_summary[0]) # $ flow="SOURCE, l:-1 -> tainted_mapped_summary[0]"
from json import loads as json_loads
tainted_resultlist = json_loads(tainted)
sink(tainted_resultlist[0])
from json import loads as json_loads
tainted_resultlist = json_loads(SOURCE)
SINK(tainted_resultlist[0]) # $ MISSING: flow

View File

@@ -18,11 +18,11 @@ query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c)
class Conf extends TaintTracking::Configuration {
Conf() { this = "FlowSummaries" }
override predicate isSource(DataFlow::Node src) { src.asExpr().(StrConst).getS() = "taint" }
override predicate isSource(DataFlow::Node src) { src.asExpr().(StrConst).getS() = "source" }
override predicate isSink(DataFlow::Node sink) {
exists(Call mc |
mc.getFunc().(Name).getId() = "sink" and
mc.getFunc().(Name).getId() = "SINK" and
mc.getAnArg() = sink.asExpr()
)
}