mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Python: Better handling of sensitive functions
This solution was the best I could come up with, but it _is_ a bit brittle since you need to remember to add this additional taint step to any configuration that relies on sensitive data sources... I don't see an easy way around this though :|
This commit is contained in:
@@ -93,6 +93,8 @@ private module SensitiveDataModeling {
|
||||
/**
|
||||
* Gets a reference to a string constant that, if used as the key in a lookup,
|
||||
* indicates the presence of sensitive data with `classification`.
|
||||
*
|
||||
* Also see `extraStepForCalls`.
|
||||
*/
|
||||
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
|
||||
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
@@ -105,12 +107,49 @@ private module SensitiveDataModeling {
|
||||
SensitiveFunctionCall() {
|
||||
this.getFunction() = sensitiveFunction(classification)
|
||||
or
|
||||
// to cover functions that we don't have the definition for, and where the
|
||||
// reference to the function has not already been marked as being sensitive
|
||||
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
|
||||
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
|
||||
*
|
||||
* To handle calls properly, while preserving a good source for path explanations,
|
||||
* you need to include this predicate as an additional taint step in your taint-tracking
|
||||
* configurations.
|
||||
*
|
||||
* The core problem can be illustrated by the example below. If we consider the
|
||||
* `print` a sink, what path and what source do we want to show? My initial approach
|
||||
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
|
||||
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
|
||||
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
|
||||
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
|
||||
* how we figured out that `non_sensitive_name`
|
||||
* could be a function that returns a password (and in cases where there is many calls to
|
||||
* `my_func` it will be annoying for someone to figure this out manually).
|
||||
*
|
||||
* By including this additional taint-step in the taint-tracking configuration, it's possible
|
||||
* to get a path explanation going from _good source_ to the sink.
|
||||
*
|
||||
* ```python
|
||||
* def my_func(non_sensitive_name):
|
||||
* x = non_sensitive_name() # <-- bad source
|
||||
* print(x) # <-- sink
|
||||
*
|
||||
* import not_found
|
||||
* f = not_found.get_passwd # <-- good source
|
||||
* my_func(f)
|
||||
* ```
|
||||
*/
|
||||
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
|
||||
nodeTo.getFunction() = nodeFrom
|
||||
}
|
||||
|
||||
/**
|
||||
* Any kind of variable assignment (also including with/for) where the name indicates
|
||||
* it contains sensitive data.
|
||||
@@ -200,3 +239,5 @@ private module SensitiveDataModeling {
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
}
|
||||
|
||||
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;
|
||||
|
||||
@@ -13,6 +13,7 @@ private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.BarrierGuards
|
||||
private import semmle.python.dataflow.new.SensitiveDataSources
|
||||
|
||||
/**
|
||||
* Provides a taint-tracking configuration for detecting use of a broken or weak
|
||||
@@ -38,6 +39,10 @@ module NormalHashFunction {
|
||||
or
|
||||
node instanceof Sanitizer
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
sensitiveDataExtraStepForCalls(node1, node2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,5 +75,9 @@ module ComputationallyExpensiveHashFunction {
|
||||
or
|
||||
node instanceof Sanitizer
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
sensitiveDataExtraStepForCalls(node1, node2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,6 +40,10 @@ class SensitiveUseConfiguration extends TaintTracking::Configuration {
|
||||
override predicate isSink(DataFlow::Node node) {
|
||||
node = API::builtin("print").getACall().getArg(_)
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
sensitiveDataExtraStepForCalls(node1, node2)
|
||||
}
|
||||
}
|
||||
// import DataFlow::PathGraph
|
||||
// from SensitiveUseConfiguration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
|
||||
@@ -29,17 +29,17 @@ x = unkown_func_not_even_imported_get_password() # $ SensitiveDataSource=passwor
|
||||
print(x) # $ SensitiveUse=password
|
||||
|
||||
f = get_passwd
|
||||
x = f() # $ MISSING: SensitiveDataSource=password
|
||||
print(x) # $ MISSING: SensitiveUse=password
|
||||
x = f()
|
||||
print(x) # $ SensitiveUse=password
|
||||
|
||||
import not_found
|
||||
f = not_found.get_passwd # $ SensitiveDataSource=password
|
||||
x = f() # $ MISSING: SensitiveDataSource=password
|
||||
print(x) # $ MISSING: SensitiveUse=password
|
||||
x = f()
|
||||
print(x) # $ SensitiveUse=password
|
||||
|
||||
def my_func(non_sensitive_name):
|
||||
x = non_sensitive_name() # $ MISSING: SensitiveDataSource=password
|
||||
print(x) # $ MISSING: SensitiveUse=password
|
||||
x = non_sensitive_name()
|
||||
print(x) # $ SensitiveUse=password
|
||||
f = not_found.get_passwd # $ SensitiveDataSource=password
|
||||
my_func(f)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user