Python: Port sensitive data modeling

No longer using points-to 🎉
This commit is contained in:
Rasmus Wriedt Larsen
2021-06-03 14:17:29 +02:00
parent 3b68c87b6c
commit 00a71a1c41
3 changed files with 110 additions and 17 deletions

View File

@@ -8,7 +8,6 @@ private import semmle.python.dataflow.new.DataFlow
// Need to import since frameworks can extend `RemoteFlowSource::Range`
private import semmle.python.Frameworks
private import semmle.python.Concepts
private import semmle.python.security.SensitiveData as OldSensitiveData
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
// We export these explicitly, so we don't also export the `HeuristicNames` module.
@@ -49,17 +48,94 @@ module SensitiveDataSource {
}
}
// TODO: rewrite this to not rely on the old points-to implementation
private class PortOfOldModeling extends SensitiveDataSource::Range {
OldSensitiveData::SensitiveData::Source oldSensitiveSource;
/** Actual sensitive data modeling */
private module SensitiveDataModeling {
private import SensitiveDataHeuristics::HeuristicNames
PortOfOldModeling() { this.asCfgNode() = oldSensitiveSource }
/**
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
private DataFlow::LocalSourceNode sensitiveFunction(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
exists(Function f |
nameIndicatesSensitiveData(f.getName(), classification) and
result.asExpr() = f.getDefinition()
)
or
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
}
override SensitiveDataClassification getClassification() {
exists(OldSensitiveData::SensitiveData classification |
oldSensitiveSource.isSourceOf(classification)
|
classification = "sensitive.data." + result
/**
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*/
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
or
exists(DataFlow::TypeTracker t2 |
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
)
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*/
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
}
/** A function call that is considered a source of sensitive data. */
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
SensitiveDataClassification classification;
SensitiveFunctionCall() {
this.getFunction() = sensitiveFunction(classification)
or
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** An attribute access that is considered a source of sensitive data. */
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveAttributeAccess() {
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
or
// I considered excluding any `from ... import something_sensitive`, but then realized that
// we should flag up `form ... import password as ...` as a password
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
SensitiveDataClassification classification;
SensitiveGetCall() {
this.getFunction().asCfgNode().(AttrNode).getName() = "get" and
this.getArg(0) = sensitiveLookupStringConst(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
}

View File

@@ -2,19 +2,32 @@ import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
import semmle.python.dataflow.new.SensitiveDataSources
private import semmle.python.ApiGraphs
class SensitiveDataSourcesTest extends InlineExpectationsTest {
SensitiveDataSourcesTest() { this = "SensitiveDataSourcesTest" }
override string getARelevantTag() { result = "SensitiveDataSource" }
override string getARelevantTag() { result in ["SensitiveDataSource", "SensitiveUse"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(SensitiveDataSource source |
location = source.getLocation() and
element = source.toString() and
value = source.getClassification() and
tag = "SensitiveDataSource"
(
location = source.getLocation() and
element = source.toString() and
value = source.getClassification() and
tag = "SensitiveDataSource"
)
or
exists(DataFlow::Node use |
use = API::builtin("print").getACall().getArg(_) and
DataFlow::localFlow(source, use) and
location = use.getLocation() and
element = use.toString() and
value = source.getClassification() and
tag = "SensitiveUse"
)
)
}
}

View File

@@ -1,5 +1,6 @@
from not_found import get_passwd, account_id
from not_found import get_passwd # $ SensitiveDataSource=password
from not_found import account_id # $ SensitiveDataSource=id
def get_password():
pass
@@ -30,7 +31,7 @@ foo.username # $ SensitiveDataSource=id
# plain variables
password = some_function()
print(password) # $ MISSING: SensitiveDataSource=password
print(password) # $ MISSING: SensitiveUse=password
# Special handling of lookups of sensitive properties
request.args["password"], # $ MISSING: SensitiveDataSource=password
@@ -41,3 +42,6 @@ request.args.get(x) # $ SensitiveDataSource=password
# I don't think handling `getlist` is super important, just included it to show what we don't handle
request.args.getlist("password")[0] # $ MISSING: SensitiveDataSource=password
from not_found import password2 as foo # $ SensitiveDataSource=password
print(foo) # $ SensitiveUse=password