Python: Model sensitive data based on variable names

This commit is contained in:
Rasmus Wriedt Larsen
2021-06-04 11:10:50 +02:00
parent f5fd0f8d1c
commit 350f79e1e1
3 changed files with 49 additions and 3 deletions

View File

@@ -111,6 +111,43 @@ private module SensitiveDataModeling {
override SensitiveDataClassification getClassification() { result = classification }
}
/**
* Any kind of variable assignment (also including with/for) where the name indicates
* it contains sensitive data.
*
* Note: We _could_ make any access to a variable with a sensitive name a source of
* sensitive data, but to make path explanations in data-flow/taint-tracking good,
* we don't want that, since it works against allowing users to understand the flow
* in the program (which is the whole point).
*
* Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
* the variable is marked as the source (as compared to marking the variable as the
* source).
*/
class SensitiveVariableAssignment extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveVariableAssignment() {
exists(DefinitionNode def |
nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
(
this.asCfgNode() = def.getValue()
or
this.asCfgNode() = def.getValue().(ForNode).getSequence()
) and
not this.asExpr() instanceof FunctionExpr and
not this.asExpr() instanceof ClassExpr
)
or
exists(With with |
nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
this.asExpr() = with.getContextExpr()
)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** An attribute access that is considered a source of sensitive data. */
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
SensitiveDataClassification classification;

View File

@@ -1,5 +1,6 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import TestUtilities.InlineExpectationsTest
import semmle.python.dataflow.new.SensitiveDataSources
private import semmle.python.ApiGraphs
@@ -21,7 +22,7 @@ class SensitiveDataSourcesTest extends InlineExpectationsTest {
or
exists(DataFlow::Node use |
use = API::builtin("print").getACall().getArg(_) and
DataFlow::localFlow(source, use) and
TaintTracking::localTaint(source, use) and
location = use.getLocation() and
element = use.toString() and
value = source.getClassification() and

View File

@@ -29,12 +29,20 @@ foo = ObjectFromDatabase()
foo.secret # $ SensitiveDataSource=secret
foo.username # $ SensitiveDataSource=id
# based on variable/parameter names
def my_func(password): # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
password = some_function()
print(password) # $ MISSING: SensitiveUse=password
password = some_function() # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
for password in some_function2(): # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
with some_function3() as password: # $ SensitiveDataSource=password
print(password) # $ SensitiveUse=password
# Special handling of lookups of sensitive properties
request.args["password"], # $ SensitiveDataSource=password