Merge pull request #13178 from yoff/python-ruby/track-through-summaries-pm

ruby/python: Shared module for typetracking through flow summaries
This commit is contained in:
yoff
2023-06-20 11:19:45 +02:00
committed by GitHub
13 changed files with 1314 additions and 264 deletions

View File

@@ -60,7 +60,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable {
}
private class SummarizedCallableReversed extends SummarizedCallable {
SummarizedCallableReversed() { this = "reversed" }
SummarizedCallableReversed() { this = "list_reversed" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this

View File

@@ -66,3 +66,21 @@ SINK(tainted_list[0]) # $ flow="SOURCE, l:-1 -> tainted_list[0]"
from json import loads as json_loads
tainted_resultlist = json_loads(SOURCE)
SINK(tainted_resultlist[0]) # $ flow="SOURCE, l:-1 -> tainted_resultlist[0]"
# Class methods are not handled right now
class MyClass:
@staticmethod
def foo(x):
return x
def bar(self, x):
return x
through_staticmethod = apply_lambda(MyClass.foo, SOURCE)
through_staticmethod # $ MISSING: flow
mc = MyClass()
through_method = apply_lambda(mc.bar, SOURCE)
through_method # $ MISSING: flow

View File

@@ -0,0 +1,189 @@
private import python
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
/**
* This module ensures that the `callStep` predicate in
* our type tracker implementation does not refer to the
* `getACall` predicate on `SummarizedCallable`.
*/
module RecursionGuard {
private import semmle.python.dataflow.new.internal.TypeTrackerSpecific as TT
private class RecursionGuard extends SummarizedCallable {
RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this and
(TT::callStep(_, _) implies any())
}
override DataFlow::CallCfgNode getACallSimple() { none() }
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
}
predicate test(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
TT::levelStepNoCall(nodeFrom, nodeTo)
}
}
private class SummarizedCallableIdentity extends SummarizedCallable {
SummarizedCallableIdentity() { this = "TTS_identity" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = true
}
}
// For lambda flow to work, implement lambdaCall and lambdaCreation
private class SummarizedCallableApplyLambda extends SummarizedCallable {
SummarizedCallableApplyLambda() { this = "TTS_apply_lambda" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1]" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue" and
preservesValue = true
}
}
private class SummarizedCallableReversed extends SummarizedCallable {
SummarizedCallableReversed() { this = "TTS_reversed" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0].ListElement" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
private class SummarizedCallableMap extends SummarizedCallable {
SummarizedCallableMap() { this = "TTS_list_map" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1].ListElement" and
output = "Argument[0].Parameter[0]" and
preservesValue = true
or
input = "Argument[0].ReturnValue" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
private class SummarizedCallableAppend extends SummarizedCallable {
SummarizedCallableAppend() { this = "TTS_append_to_list" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
or
input = "Argument[1]" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
private class SummarizedCallableJsonLoads extends SummarizedCallable {
SummarizedCallableJsonLoads() { this = "TTS_json.loads" }
override DataFlow::CallCfgNode getACall() {
result = API::moduleImport("json").getMember("loads").getACall()
}
override DataFlow::CallCfgNode getACallSimple() { none() }
override DataFlow::ArgumentNode getACallback() {
result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue.ListElement" and
preservesValue = true
}
}
// read and store
private class SummarizedCallableReadSecret extends SummarizedCallable {
SummarizedCallableReadSecret() { this = "TTS_read_secret" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0].Attribute[secret]" and
output = "ReturnValue" and
preservesValue = true
}
}
private class SummarizedCallableSetSecret extends SummarizedCallable {
SummarizedCallableSetSecret() { this = "TTS_set_secret" }
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[1]" and
output = "Argument[0].Attribute[secret]" and
preservesValue = true
}
}

View File

@@ -0,0 +1,78 @@
import sys
import os
# Simple summary
tainted = TTS_identity(tracked) # $ tracked
tainted # $ tracked
# Lambda summary
# I think the missing result is expected because type tracking
# is not allowed to flow back out of a call.
tainted_lambda = TTS_apply_lambda(lambda x: x, tracked) # $ tracked
tainted_lambda # $ MISSING: tracked
# A lambda that directly introduces taint
bad_lambda = TTS_apply_lambda(lambda x: tracked, 1) # $ tracked
bad_lambda # $ tracked
# A lambda that breaks the flow
untainted_lambda = TTS_apply_lambda(lambda x: 1, tracked) # $ tracked
untainted_lambda
# Collection summaries
tainted_list = TTS_reversed([tracked]) # $ tracked
tl = tainted_list[0]
tl # $ MISSING: tracked
# Complex summaries
def add_colon(x):
return x + ":"
tainted_mapped = TTS_list_map(add_colon, [tracked]) # $ tracked
tm = tainted_mapped[0]
tm # $ MISSING: tracked
def explicit_identity(x):
return x
tainted_mapped_explicit = TTS_list_map(explicit_identity, [tracked]) # $ tracked
tainted_mapped_explicit[0] # $ MISSING: tracked
tainted_mapped_summary = TTS_list_map(identity, [tracked]) # $ tracked
tms = tainted_mapped_summary[0]
tms # $ MISSING: tracked
another_tainted_list = TTS_append_to_list([], tracked) # $ tracked
atl = another_tainted_list[0]
atl # $ MISSING: tracked
# This will not work, as the call is not found by `getACallSimple`.
from json import loads as json_loads
tainted_resultlist = json_loads(tracked) # $ tracked
tr = tainted_resultlist[0]
tr # $ MISSING: tracked
x.secret = tracked # $ tracked=secret tracked
r = TTS_read_secret(x) # $ tracked=secret tracked
r # $ tracked
y # $ tracked=secret
TTS_set_secret(y, tracked) # $ tracked tracked=secret
y.secret # $ tracked tracked=secret
# Class methods are not handled right now
class MyClass:
@staticmethod
def foo(x):
return x
def bar(self, x):
return x
through_staticmethod = TTS_apply_lambda(MyClass.foo, tracked) # $ tracked
through_staticmethod # $ MISSING: tracked
mc = MyClass()
through_method = TTS_apply_lambda(mc.bar, tracked) # $ tracked
through_method # $ MISSING: tracked

View File

@@ -0,0 +1,36 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TypeTracker
import TestUtilities.InlineExpectationsTest
import semmle.python.ApiGraphs
import TestSummaries
// -----------------------------------------------------------------------------
// tracked
// -----------------------------------------------------------------------------
private DataFlow::TypeTrackingNode tracked(TypeTracker t) {
t.start() and
result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
or
exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
}
class TrackedTest extends InlineExpectationsTest {
TrackedTest() { this = "TrackedTest" }
override string getARelevantTag() { result = "tracked" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node e, TypeTracker t |
exists(e.getLocation().getFile().getRelativePath()) and
e.getLocation().getStartLine() > 0 and
tracked(t).flowsTo(e) and
// Module variables have no sensible location, and hence can't be annotated.
not e instanceof DataFlow::ModuleVariableNode and
tag = "tracked" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}