mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Python: Model sensitive data based on variable names
This commit is contained in:
@@ -111,6 +111,43 @@ private module SensitiveDataModeling {
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/**
|
||||
* Any kind of variable assignment (also including with/for) where the name indicates
|
||||
* it contains sensitive data.
|
||||
*
|
||||
* Note: We _could_ make any access to a variable with a sensitive name a source of
|
||||
* sensitive data, but to make path explanations in data-flow/taint-tracking good,
|
||||
* we don't want that, since it works against allowing users to understand the flow
|
||||
* in the program (which is the whole point).
|
||||
*
|
||||
* Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
|
||||
* the variable is marked as the source (as compared to marking the variable as the
|
||||
* source).
|
||||
*/
|
||||
class SensitiveVariableAssignment extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveVariableAssignment() {
|
||||
exists(DefinitionNode def |
|
||||
nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
|
||||
(
|
||||
this.asCfgNode() = def.getValue()
|
||||
or
|
||||
this.asCfgNode() = def.getValue().(ForNode).getSequence()
|
||||
) and
|
||||
not this.asExpr() instanceof FunctionExpr and
|
||||
not this.asExpr() instanceof ClassExpr
|
||||
)
|
||||
or
|
||||
exists(With with |
|
||||
nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
|
||||
this.asExpr() = with.getContextExpr()
|
||||
)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** An attribute access that is considered a source of sensitive data. */
|
||||
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
import semmle.python.dataflow.new.SensitiveDataSources
|
||||
private import semmle.python.ApiGraphs
|
||||
@@ -21,7 +22,7 @@ class SensitiveDataSourcesTest extends InlineExpectationsTest {
|
||||
or
|
||||
exists(DataFlow::Node use |
|
||||
use = API::builtin("print").getACall().getArg(_) and
|
||||
DataFlow::localFlow(source, use) and
|
||||
TaintTracking::localTaint(source, use) and
|
||||
location = use.getLocation() and
|
||||
element = use.toString() and
|
||||
value = source.getClassification() and
|
||||
|
||||
@@ -29,12 +29,20 @@ foo = ObjectFromDatabase()
|
||||
foo.secret # $ SensitiveDataSource=secret
|
||||
foo.username # $ SensitiveDataSource=id
|
||||
|
||||
|
||||
# based on variable/parameter names
|
||||
def my_func(password): # $ SensitiveDataSource=password
|
||||
print(password) # $ SensitiveUse=password
|
||||
|
||||
password = some_function()
|
||||
print(password) # $ MISSING: SensitiveUse=password
|
||||
password = some_function() # $ SensitiveDataSource=password
|
||||
print(password) # $ SensitiveUse=password
|
||||
|
||||
for password in some_function2(): # $ SensitiveDataSource=password
|
||||
print(password) # $ SensitiveUse=password
|
||||
|
||||
with some_function3() as password: # $ SensitiveDataSource=password
|
||||
print(password) # $ SensitiveUse=password
|
||||
|
||||
|
||||
# Special handling of lookups of sensitive properties
|
||||
request.args["password"], # $ SensitiveDataSource=password
|
||||
|
||||
Reference in New Issue
Block a user