Python: Add py/weak-sensitive-data-hashing query

This commit is contained in:
Rasmus Wriedt Larsen
2021-04-22 11:49:38 +02:00
parent 499adc26a3
commit ac83c695ad
12 changed files with 488 additions and 0 deletions

View File

@@ -0,0 +1,103 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Using a broken or weak cryptographic hash function can leave data
vulnerable, and should not be used in security related code.
</p>
<p>
A strong cryptographic hash function should be resistant to:
</p>
<ul>
<li>
pre-image attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find the input <code>x</code>.
</li>
<li>
collision attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find a different input <code>y</code>
such that hash value is the same <code>h(x) = h(y)</code>.
</li>
</ul>
<p>
In cases with a limited input space, such as for passwords, the hash
function also needs to be computationally expensive to be resistant to
brute-force attacks.
</p>
<p>
As an example, both MD5 and SHA-1 is known to be vulnerable to collision attacks.
</p>
<p>
Since it's OK to use a weak cryptographic hash function in a non-security
context, this query only alerts when these are used to hash sensitive
data (such as passwords, certificates, usernames).
</p>
<p>
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
handled by the <code>py/weak-cryptographic-algorithm</code> query.
</p>
</overview>
<recommendation>
<p>
Ensure that you use a strong, modern cryptographic hash function:
</p>
<ul>
<li>
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
</li>
<li>
such as SHA-2, or SHA-3 in other cases.
</li>
</ul>
</recommendation>
<example>
<p>
The following example shows two functions for checking whether the hash
of a certificate matches a known value -- to prevent tampering.
The first function uses MD5 that is known to be vulnerable to collision attacks.
The second function uses SHA-256 that is a strong cryptographic hashing function.
</p>
<sample src="examples/weak_certificate_hashing.py" />
</example>
<example>
<p>
The following example shows two functions for hashing passwords.
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
strong cryptographic hash function, it is not suitable for password
hashing since it is not computationally expensive.
</p>
<sample src="examples/weak_password_hashing_bad.py" />
<p>
The second function uses Argon2 (through the <code>argon2-cffi</code>
PyPI package), which is a strong password hashing algorithm (and
includes a per-password salt by default).
</p>
<sample src="examples/weak_password_hashing_good.py" />
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,47 @@
/**
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @precision high
* @id py/weak-sensitive-data-hashing
* @tags security
* external/cwe/cwe-327
* external/cwe/cwe-916
*/
import python
import semmle.python.security.dataflow.WeakSensitiveDataHashing
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import DataFlow::PathGraph
from
DataFlow::PathNode source, DataFlow::PathNode sink, string ending, string algorithmName,
string classification
where
exists(NormalHashFunction::Configuration config |
config.hasFlowPath(source, sink) and
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
ending = "."
)
or
exists(ComputationallyExpensiveHashFunction::Configuration config |
config.hasFlowPath(source, sink) and
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
classification =
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
(
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending = "."
or
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending =
" for " + classification +
" hashing, since it is not a computationally expensive hash function."
)
)
select sink.getNode(), source, sink,
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
source.getNode(), "Sensitive data (" + classification + ")"

View File

@@ -0,0 +1,9 @@
import hashlib
def certificate_matches_known_hash_bad(certificate, known_hash):
hash = hashlib.md5(certificate).hexdigest() # BAD
return hash == known_hash
def certificate_matches_known_hash_good(certificate, known_hash):
hash = hashlib.sha256(certificate).hexdigest() # GOOD
return hash == known_hash

View File

@@ -0,0 +1,4 @@
import hashlib
def get_password_hash(password: str, salt: str):
return hashlib.sha256(password + salt).hexdigest() # BAD

View File

@@ -0,0 +1,9 @@
from argon2 import PasswordHasher
def get_initial_hash(password: str):
ph = PasswordHasher()
return ph.hash(password) # GOOD
def check_password(password: str, known_hash):
ph = PasswordHasher()
return ph.verify(known_hash, password) # GOOD

View File

@@ -0,0 +1,74 @@
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on sensitive data.
*
* Note, for performance reasons: only import this file if
* `WeakSensitiveDataHashing::Configuration` is needed, otherwise
* `SqlInjectionCustomizations` should be imported instead.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hash function on sensitive data, that does NOT require a
* computationally expensive hash function.
*/
module NormalHashFunction {
import WeakSensitiveDataHashingCustomizations::NormalHashFunction
/**
* A taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on sensitive data.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "NormalHashFunction" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node)
or
node instanceof Sanitizer
}
}
}
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on passwords.
*
* Passwords has stricter requirements on the hashing algorithm used (must be
* computationally expensive to prevent brute-force attacks).
*/
module ComputationallyExpensiveHashFunction {
import WeakSensitiveDataHashingCustomizations::ComputationallyExpensiveHashFunction
/**
* A taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on passwords.
*
* Passwords has stricter requirements on the hashing algorithm used (must be
* computationally expensive to prevent brute-force attacks).
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "ComputationallyExpensiveHashFunction" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node)
or
node instanceof Sanitizer
}
}
}

View File

@@ -0,0 +1,157 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.SensitiveDataSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities on sensitive data that does NOT require computationally expensive
* hashing, as well as extension points for adding your own.
*
* Also see the `ComputationallyExpensiveHashFunction` module.
*/
module NormalHashFunction {
/**
* A data flow source for "use of a broken or weak cryptographic hashing algorithm on
* sensitive data" vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
Source() { not this instanceof ComputationallyExpensiveHashFunction::Source }
/** Gets the classification of the sensitive data. */
abstract string getClassification();
}
/**
* A data flow sink for "use of a broken or weak cryptographic hashing algorithm on
* sensitive data" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the name of the weak hashing algorithm.
*/
abstract string getAlgorithmName();
}
/**
* A sanitizer for "use of a broken or weak cryptographic hashing algorithm on
* sensitive data" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
override string getClassification() { result = SensitiveDataSource.super.getClassification() }
}
/** The input to a hashing operation using a weak algorithm, considered as a flow sink. */
class WeakHashingOperationInputSink extends Sink {
Cryptography::HashingAlgorithm algorithm;
WeakHashingOperationInputSink() {
exists(Cryptography::CryptographicOperation operation |
algorithm = operation.getAlgorithm() and
algorithm.isWeak() and
this = operation.getAnInput()
)
}
override string getAlgorithmName() { result = algorithm.getName() }
}
}
/**
* Provides default sources, sinks and sanitizers for detecting
* "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities on sensitive data that DOES require computationally expensive
* hashing, as well as extension points for adding your own.
*
* Also see the `NormalHashFunction` module.
*/
module ComputationallyExpensiveHashFunction {
/**
* A data flow source of sensitive data that requires computationally expensive
* hashing for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
/** Gets the classification of the sensitive data. */
abstract string getClassification();
}
/**
* A data flow sink for sensitive data that requires computationally expensive
* hashing for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the name of the weak hashing algorithm.
*/
abstract string getAlgorithmName();
/**
* Holds if this sink is for a computationally expensive hash function (meaning that
* hash function is just weak in some other regard.
*/
abstract predicate isComputationallyExpensive();
}
/**
* A sanitizer of sensitive data that requires computationally expensive
* hashing for "use of a broken or weak cryptographic hashing
* algorithm on sensitive data" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of passwords, considered as a flow source.
*/
class PasswordSourceAsSource extends Source, SensitiveDataSource {
PasswordSourceAsSource() {
// TODO: once https://github.com/github/codeql/pull/5739 has been merged,
// don't use hardcoded value anymore
SensitiveDataSource.super.getClassification() = "password"
}
override string getClassification() { result = SensitiveDataSource.super.getClassification() }
}
/**
* The input to a password hashing operation using a weak algorithm, considered as a
* flow sink.
*/
class WeakPasswordHashingOperationInputSink extends Sink {
Cryptography::CryptographicAlgorithm algorithm;
WeakPasswordHashingOperationInputSink() {
(
algorithm instanceof Cryptography::PasswordHashingAlgorithm and
algorithm.isWeak()
or
algorithm instanceof Cryptography::HashingAlgorithm
) and
exists(Cryptography::CryptographicOperation operation |
algorithm = operation.getAlgorithm() and
this = operation.getAnInput()
)
}
override string getAlgorithmName() { result = algorithm.getName() }
override predicate isComputationallyExpensive() {
algorithm instanceof Cryptography::PasswordHashingAlgorithm
}
}
}

View File

@@ -0,0 +1,3 @@
Note that the tests in this directory are very shallow, and simply show that the query is able to produce alerts.
More in-depth tests can be found for the individual frameworks that we have modeled `Cryptography::CryptographicOperation` for.

View File

@@ -0,0 +1,27 @@
edges
| test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous |
| test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous |
| test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous |
| test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous |
| test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous |
| test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous |
nodes
| test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | semmle.label | ControlFlowNode for get_certificate() |
| test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | semmle.label | ControlFlowNode for get_certificate() |
| test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
| test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | semmle.label | ControlFlowNode for get_password() |
| test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | semmle.label | ControlFlowNode for dangerous |
#select
| test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | test_cryptodome.py:8:19:8:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptodome.py:6:17:6:33 | ControlFlowNode for get_certificate() | Sensitive data (certificate) |
| test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | test_cryptodome.py:15:19:15:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:13:17:13:30 | ControlFlowNode for get_password() | Sensitive data (password) |
| test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | test_cryptodome.py:24:19:24:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptodome.py:20:17:20:30 | ControlFlowNode for get_password() | Sensitive data (password) |
| test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | test_cryptography.py:9:19:9:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure. | test_cryptography.py:7:17:7:33 | ControlFlowNode for get_certificate() | Sensitive data (certificate) |
| test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | test_cryptography.py:17:19:17:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:15:17:15:30 | ControlFlowNode for get_password() | Sensitive data (password) |
| test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | test_cryptography.py:27:19:27:27 | ControlFlowNode for dangerous | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | test_cryptography.py:23:17:23:30 | ControlFlowNode for get_password() | Sensitive data (password) |

View File

@@ -0,0 +1 @@
Security/CWE-327/WeakSensitiveDataHashing.ql

View File

@@ -0,0 +1,25 @@
from Cryptodome.Hash import MD5, SHA256
from my_module import get_password, get_certificate
def get_badly_hashed_certificate():
dangerous = get_certificate()
hasher = MD5.new()
hasher.update(dangerous) # NOT OK
return hasher.hexdigest()
def get_badly_hashed_password():
dangerous = get_password()
hasher = MD5.new()
hasher.update(dangerous) # NOT OK
return hasher.hexdigest()
def get_badly_hashed_password2():
dangerous = get_password()
# Although SHA-256 is a strong cryptographic hash functions,
# it is not suitable for password hashing.
hasher = SHA256.new()
hasher.update(dangerous) # NOT OK
return hasher.hexdigest()

View File

@@ -0,0 +1,29 @@
from cryptography.hazmat.primitives import hashes
from binascii import hexlify
from my_module import get_password, get_certificate
def get_badly_hashed_certificate():
dangerous = get_certificate()
hasher = hashes.Hash(hashes.MD5())
hasher.update(dangerous) # NOT OK
digest = hasher.finalize()
return hexlify(digest).decode("utf-8")
def get_badly_hashed_password():
dangerous = get_password()
hasher = hashes.Hash(hashes.MD5())
hasher.update(dangerous) # NOT OK
digest = hasher.finalize()
return hexlify(digest).decode("utf-8")
def get_badly_hashed_password2():
dangerous = get_password()
# Although SHA-256 is a strong cryptographic hash functions,
# it is not suitable for password hashing.
hasher = hashes.Hash(hashes.SHA256())
hasher.update(dangerous) # NOT OK
digest = hasher.finalize()
return hexlify(digest).decode("utf-8")