Python: Better handling of sensitive functions

This solution was the best I could come up with, but it _is_ a bit
brittle since you need to remember to add this additional taint step
to any configuration that relies on sensitive data sources... I don't
see an easy way around this though :|
This commit is contained in:
Rasmus Wriedt Larsen
2021-06-10 15:07:03 +02:00
parent f167143a84
commit ea0c1d7db3
4 changed files with 60 additions and 6 deletions

View File

@@ -93,6 +93,8 @@ private module SensitiveDataModeling {
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*
* Also see `extraStepForCalls`.
*/
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
@@ -105,12 +107,49 @@ private module SensitiveDataModeling {
SensitiveFunctionCall() {
this.getFunction() = sensitiveFunction(classification)
or
// to cover functions that we don't have the definition for, and where the
// reference to the function has not already been marked as being sensitive
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/**
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
*
* To handle calls properly, while preserving a good source for path explanations,
* you need to include this predicate as an additional taint step in your taint-tracking
* configurations.
*
* The core problem can be illustrated by the example below. If we consider the
* `print` a sink, what path and what source do we want to show? My initial approach
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
* how we figured out that `non_sensitive_name`
* could be a function that returns a password (and in cases where there is many calls to
* `my_func` it will be annoying for someone to figure this out manually).
*
* By including this additional taint-step in the taint-tracking configuration, it's possible
* to get a path explanation going from _good source_ to the sink.
*
* ```python
* def my_func(non_sensitive_name):
* x = non_sensitive_name() # <-- bad source
* print(x) # <-- sink
*
* import not_found
* f = not_found.get_passwd # <-- good source
* my_func(f)
* ```
*/
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
nodeTo.getFunction() = nodeFrom
}
/**
* Any kind of variable assignment (also including with/for) where the name indicates
* it contains sensitive data.
@@ -200,3 +239,5 @@ private module SensitiveDataModeling {
override SensitiveDataClassification getClassification() { result = classification }
}
}
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;

View File

@@ -13,6 +13,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.dataflow.new.SensitiveDataSources
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
@@ -38,6 +39,10 @@ module NormalHashFunction {
or
node instanceof Sanitizer
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
}
@@ -70,5 +75,9 @@ module ComputationallyExpensiveHashFunction {
or
node instanceof Sanitizer
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
}

View File

@@ -40,6 +40,10 @@ class SensitiveUseConfiguration extends TaintTracking::Configuration {
override predicate isSink(DataFlow::Node node) {
node = API::builtin("print").getACall().getArg(_)
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
// import DataFlow::PathGraph
// from SensitiveUseConfiguration cfg, DataFlow::PathNode source, DataFlow::PathNode sink

View File

@@ -29,17 +29,17 @@ x = unkown_func_not_even_imported_get_password() # $ SensitiveDataSource=passwor
print(x) # $ SensitiveUse=password
f = get_passwd
x = f() # $ MISSING: SensitiveDataSource=password
print(x) # $ MISSING: SensitiveUse=password
x = f()
print(x) # $ SensitiveUse=password
import not_found
f = not_found.get_passwd # $ SensitiveDataSource=password
x = f() # $ MISSING: SensitiveDataSource=password
print(x) # $ MISSING: SensitiveUse=password
x = f()
print(x) # $ SensitiveUse=password
def my_func(non_sensitive_name):
x = non_sensitive_name() # $ MISSING: SensitiveDataSource=password
print(x) # $ MISSING: SensitiveUse=password
x = non_sensitive_name()
print(x) # $ SensitiveUse=password
f = not_found.get_passwd # $ SensitiveDataSource=password
my_func(f)