mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Python: Port sensitive data modeling
No longer using points-to 🎉
This commit is contained in:
@@ -8,7 +8,6 @@ private import semmle.python.dataflow.new.DataFlow
|
||||
// Need to import since frameworks can extend `RemoteFlowSource::Range`
|
||||
private import semmle.python.Frameworks
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.security.SensitiveData as OldSensitiveData
|
||||
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
|
||||
|
||||
// We export these explicitly, so we don't also export the `HeuristicNames` module.
|
||||
@@ -49,17 +48,94 @@ module SensitiveDataSource {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: rewrite this to not rely on the old points-to implementation
|
||||
private class PortOfOldModeling extends SensitiveDataSource::Range {
|
||||
OldSensitiveData::SensitiveData::Source oldSensitiveSource;
|
||||
/** Actual sensitive data modeling */
|
||||
private module SensitiveDataModeling {
|
||||
private import SensitiveDataHeuristics::HeuristicNames
|
||||
|
||||
PortOfOldModeling() { this.asCfgNode() = oldSensitiveSource }
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode sensitiveFunction(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
exists(Function f |
|
||||
nameIndicatesSensitiveData(f.getName(), classification) and
|
||||
result.asExpr() = f.getDefinition()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() {
|
||||
exists(OldSensitiveData::SensitiveData classification |
|
||||
oldSensitiveSource.isSourceOf(classification)
|
||||
|
|
||||
classification = "sensitive.data." + result
|
||||
/**
|
||||
* Gets a reference to a function that is considered to be a sensitive source of
|
||||
* `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
|
||||
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a string constant that, if used as the key in a lookup,
|
||||
* indicates the presence of sensitive data with `classification`.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
|
||||
DataFlow::TypeTracker t, SensitiveDataClassification classification
|
||||
) {
|
||||
t.start() and
|
||||
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a string constant that, if used as the key in a lookup,
|
||||
* indicates the presence of sensitive data with `classification`.
|
||||
*/
|
||||
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
|
||||
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
|
||||
}
|
||||
|
||||
/** A function call that is considered a source of sensitive data. */
|
||||
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveFunctionCall() {
|
||||
this.getFunction() = sensitiveFunction(classification)
|
||||
or
|
||||
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** An attribute access that is considered a source of sensitive data. */
|
||||
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveAttributeAccess() {
|
||||
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
|
||||
or
|
||||
// I considered excluding any `from ... import something_sensitive`, but then realized that
|
||||
// we should flag up `form ... import password as ...` as a password
|
||||
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
|
||||
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
|
||||
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
|
||||
SensitiveDataClassification classification;
|
||||
|
||||
SensitiveGetCall() {
|
||||
this.getFunction().asCfgNode().(AttrNode).getName() = "get" and
|
||||
this.getArg(0) = sensitiveLookupStringConst(classification)
|
||||
}
|
||||
|
||||
override SensitiveDataClassification getClassification() { result = classification }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,19 +2,32 @@ import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
import semmle.python.dataflow.new.SensitiveDataSources
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
class SensitiveDataSourcesTest extends InlineExpectationsTest {
|
||||
SensitiveDataSourcesTest() { this = "SensitiveDataSourcesTest" }
|
||||
|
||||
override string getARelevantTag() { result = "SensitiveDataSource" }
|
||||
override string getARelevantTag() { result in ["SensitiveDataSource", "SensitiveUse"] }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
exists(SensitiveDataSource source |
|
||||
location = source.getLocation() and
|
||||
element = source.toString() and
|
||||
value = source.getClassification() and
|
||||
tag = "SensitiveDataSource"
|
||||
(
|
||||
location = source.getLocation() and
|
||||
element = source.toString() and
|
||||
value = source.getClassification() and
|
||||
tag = "SensitiveDataSource"
|
||||
)
|
||||
or
|
||||
exists(DataFlow::Node use |
|
||||
use = API::builtin("print").getACall().getArg(_) and
|
||||
DataFlow::localFlow(source, use) and
|
||||
location = use.getLocation() and
|
||||
element = use.toString() and
|
||||
value = source.getClassification() and
|
||||
tag = "SensitiveUse"
|
||||
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
from not_found import get_passwd, account_id
|
||||
from not_found import get_passwd # $ SensitiveDataSource=password
|
||||
from not_found import account_id # $ SensitiveDataSource=id
|
||||
|
||||
def get_password():
|
||||
pass
|
||||
@@ -30,7 +31,7 @@ foo.username # $ SensitiveDataSource=id
|
||||
|
||||
# plain variables
|
||||
password = some_function()
|
||||
print(password) # $ MISSING: SensitiveDataSource=password
|
||||
print(password) # $ MISSING: SensitiveUse=password
|
||||
|
||||
# Special handling of lookups of sensitive properties
|
||||
request.args["password"], # $ MISSING: SensitiveDataSource=password
|
||||
@@ -41,3 +42,6 @@ request.args.get(x) # $ SensitiveDataSource=password
|
||||
|
||||
# I don't think handling `getlist` is super important, just included it to show what we don't handle
|
||||
request.args.getlist("password")[0] # $ MISSING: SensitiveDataSource=password
|
||||
|
||||
from not_found import password2 as foo # $ SensitiveDataSource=password
|
||||
print(foo) # $ SensitiveUse=password
|
||||
|
||||
Reference in New Issue
Block a user