Merge pull request #16781 from alexrford/rb/weak-sensitive-data-hashing

Add `rb/weak-sensitive-data-hashing` query port
This commit is contained in:
Alex Ford
2024-07-25 14:11:42 +01:00
committed by GitHub
11 changed files with 563 additions and 0 deletions

View File

@@ -0,0 +1,238 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities, as well as extension points for adding your own.
*/
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.security.SensitiveActions
private import codeql.ruby.dataflow.BarrierGuards
private import codeql.ruby.dataflow.SSA
private module SensitiveDataSources {
/**
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SensitiveDataSource::Range` instead.
*/
class SensitiveDataSource extends DataFlow::Node instanceof SensitiveDataSource::Range {
/**
* Gets the classification of the sensitive data.
*/
SensitiveDataClassification getClassification() { result = super.getClassification() }
}
/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
module SensitiveDataSource {
/**
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SensitiveDataSource` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the classification of the sensitive data.
*/
abstract SensitiveDataClassification getClassification();
}
}
/**
* A call to a method that may return sensitive data.
*/
class SensitiveMethodCall extends SensitiveDataSource::Range instanceof SensitiveCall {
override SensitiveDataClassification getClassification() {
result = SensitiveCall.super.getClassification()
}
}
/**
* An assignment to a variable that may contain sensitive data.
*/
class SensitiveVariableAssignment extends SensitiveDataSource::Range, DataFlow::SsaDefinitionNode {
SensitiveNode sensitiveNode;
SensitiveVariableAssignment() {
this.getDefinition().(Ssa::WriteDefinition).getWriteAccess() = sensitiveNode.asExpr()
}
override SensitiveDataClassification getClassification() {
result = sensitiveNode.getClassification()
}
}
/**
* A read from a hash value that may return sensitive data.
*/
class SensitiveHashValueAccess extends SensitiveDataSource::Range instanceof SensitiveNode {
SensitiveHashValueAccess() {
this.asExpr() instanceof Cfg::CfgNodes::ExprNodes::ElementReferenceCfgNode
}
override SensitiveDataClassification getClassification() {
result = SensitiveNode.super.getClassification()
}
}
/**
* A parameter node that may contain sensitive data.
*/
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode instanceof SensitiveNode
{
override SensitiveDataClassification getClassification() {
result = SensitiveNode.super.getClassification()
}
}
}
/**
* Provides default sources, sinks and sanitizers for detecting
* "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities on sensitive data that does NOT require computationally expensive
* hashing, as well as extension points for adding your own.
*
* Also see the `ComputationallyExpensiveHashFunction` module.
*/
module NormalHashFunction {
/**
* A data flow source for "use of a broken or weak cryptographic hashing algorithm on
* sensitive data" vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
Source() { not this instanceof ComputationallyExpensiveHashFunction::Source }
/** Gets the classification of the sensitive data. */
abstract string getClassification();
}
/**
* A data flow sink for "use of a broken or weak cryptographic hashing algorithm on
* sensitive data" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the name of the weak hashing algorithm.
*/
abstract string getAlgorithmName();
}
/**
* A sanitizer for "use of a broken or weak cryptographic hashing algorithm on
* sensitive data" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source instanceof SensitiveDataSources::SensitiveDataSource
{
override SensitiveDataClassification getClassification() {
result = SensitiveDataSources::SensitiveDataSource.super.getClassification()
}
}
/** The input to a hashing operation using a weak algorithm, considered as a flow sink. */
class WeakHashingOperationInputSink extends Sink {
Cryptography::HashingAlgorithm algorithm;
WeakHashingOperationInputSink() {
exists(Cryptography::CryptographicOperation operation |
algorithm = operation.getAlgorithm() and
algorithm.isWeak() and
this = operation.getAnInput()
)
}
override string getAlgorithmName() { result = algorithm.getName() }
}
}
/**
* Provides default sources, sinks and sanitizers for detecting
* "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities on sensitive data that DOES require computationally expensive
* hashing, as well as extension points for adding your own.
*
* Also see the `NormalHashFunction` module.
*/
module ComputationallyExpensiveHashFunction {
/**
* A data flow source of sensitive data that requires computationally expensive
* hashing for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
/** Gets the classification of the sensitive data. */
abstract string getClassification();
}
/**
* A data flow sink for sensitive data that requires computationally expensive
* hashing for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the name of the weak hashing algorithm.
*/
abstract string getAlgorithmName();
/**
* Holds if this sink is for a computationally expensive hash function (meaning that
* hash function is just weak in some other regard.
*/
abstract predicate isComputationallyExpensive();
}
/**
* A sanitizer of sensitive data that requires computationally expensive
* hashing for "use of a broken or weak cryptographic hashing
* algorithm on sensitive data" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of passwords, considered as a flow source.
*/
class PasswordSourceAsSource extends Source instanceof SensitiveDataSources::SensitiveDataSource {
PasswordSourceAsSource() {
this.(SensitiveDataSources::SensitiveDataSource).getClassification() =
SensitiveDataClassification::password()
}
override SensitiveDataClassification getClassification() {
result = SensitiveDataSources::SensitiveDataSource.super.getClassification()
}
}
/**
* The input to a password hashing operation using a weak algorithm, considered as a
* flow sink.
*/
class WeakPasswordHashingOperationInputSink extends Sink {
Cryptography::CryptographicAlgorithm algorithm;
WeakPasswordHashingOperationInputSink() {
(
algorithm instanceof Cryptography::PasswordHashingAlgorithm and
algorithm.isWeak()
or
algorithm instanceof Cryptography::HashingAlgorithm // Note that HashingAlgorithm and PasswordHashingAlgorithm are disjoint
) and
exists(Cryptography::CryptographicOperation operation |
algorithm = operation.getAlgorithm() and
this = operation.getAnInput()
)
}
override string getAlgorithmName() { result = algorithm.getName() }
override predicate isComputationallyExpensive() {
algorithm instanceof Cryptography::PasswordHashingAlgorithm
}
}
}

View File

@@ -0,0 +1,87 @@
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on sensitive data.
*
* Note, for performance reasons: only import this file if
* `WeakSensitiveDataHashing::Configuration` is needed, otherwise
* `WeakSensitiveDataHashingCustomizations` should be imported instead.
*/
private import ruby
private import codeql.ruby.Concepts
private import codeql.ruby.TaintTracking
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.dataflow.BarrierGuards
private import codeql.ruby.security.SensitiveActions
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hash function on sensitive data, that does NOT require a
* computationally expensive hash function.
*/
module NormalHashFunction {
import WeakSensitiveDataHashingCustomizations::NormalHashFunction
private module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "use of a broken or weak cryptographic hashing algorithm on sensitive data" vulnerabilities. */
module Flow = TaintTracking::Global<Config>;
}
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on passwords.
*
* Passwords has stricter requirements on the hashing algorithm used (must be
* computationally expensive to prevent brute-force attacks).
*/
module ComputationallyExpensiveHashFunction {
import WeakSensitiveDataHashingCustomizations::ComputationallyExpensiveHashFunction
/**
* Passwords has stricter requirements on the hashing algorithm used (must be
* computationally expensive to prevent brute-force attacks).
*/
private module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
}
/** Global taint-tracking for detecting "use of a broken or weak cryptographic hashing algorithm on passwords" vulnerabilities. */
module Flow = TaintTracking::Global<Config>;
}
/**
* Global taint-tracking for detecting both variants of "use of a broken or weak
* cryptographic hashing algorithm on sensitive data" vulnerabilities.
*
* See convenience predicates `normalHashFunctionFlowPath` and
* `computationallyExpensiveHashFunctionFlowPath`.
*/
module WeakSensitiveDataHashingFlow =
DataFlow::MergePathGraph<NormalHashFunction::Flow::PathNode,
ComputationallyExpensiveHashFunction::Flow::PathNode, NormalHashFunction::Flow::PathGraph,
ComputationallyExpensiveHashFunction::Flow::PathGraph>;
/** Holds if data can flow from `source` to `sink` with `NormalHashFunction::Flow`. */
predicate normalHashFunctionFlowPath(
WeakSensitiveDataHashingFlow::PathNode source, WeakSensitiveDataHashingFlow::PathNode sink
) {
NormalHashFunction::Flow::flowPath(source.asPathNode1(), sink.asPathNode1())
}
/** Holds if data can flow from `source` to `sink` with `ComputationallyExpensiveHashFunction::Flow`. */
predicate computationallyExpensiveHashFunctionFlowPath(
WeakSensitiveDataHashingFlow::PathNode source, WeakSensitiveDataHashingFlow::PathNode sink
) {
ComputationallyExpensiveHashFunction::Flow::flowPath(source.asPathNode2(), sink.asPathNode2())
}

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `rb/weak-sensitive-data-hashing`, to detect cases where sensitive data is hashed using a weak cryptographic hashing algorithm.

View File

@@ -0,0 +1,104 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Using a broken or weak cryptographic hash function can leave data
vulnerable, and should not be used in security related code.
</p>
<p>
A strong cryptographic hash function should be resistant to:
</p>
<ul>
<li>
pre-image attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find the input <code>x</code>.
</li>
<li>
collision attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find a different input <code>y</code>
with the same hash value <code>h(x) = h(y)</code>.
</li>
</ul>
<p>
In cases with a limited input space, such as for passwords, the hash
function also needs to be computationally expensive to be resistant to
brute-force attacks. Passwords should also have an unique salt applied
before hashing, but that is not considered by this query.
</p>
<p>
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
</p>
<p>
Since it's OK to use a weak cryptographic hash function in a non-security
context, this query only alerts when these are used to hash sensitive
data (such as passwords, certificates, usernames).
</p>
<p>
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
handled by the <code>rb/weak-cryptographic-algorithm</code> query.
</p>
</overview>
<recommendation>
<p>
Ensure that you use a strong, modern cryptographic hash function:
</p>
<ul>
<li>
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
</li>
<li>
such as SHA-2, or SHA-3 in other cases.
</li>
</ul>
</recommendation>
<example>
<p>
The following example shows two functions for checking whether the hash
of a certificate matches a known value -- to prevent tampering.
The first function uses MD5 that is known to be vulnerable to collision attacks.
The second function uses SHA-256 that is a strong cryptographic hashing function.
</p>
<sample src="examples/weak_certificate_hashing.rb" />
</example>
<example>
<p>
The following example shows two functions for hashing passwords.
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
strong cryptographic hash function, it is not suitable for password
hashing since it is not computationally expensive.
</p>
<sample src="examples/weak_password_hashing_bad.rb" />
<p>
The second function uses Argon2 (through the <code>argon2</code>
gem), which is a strong password hashing algorithm (and
includes a per-password salt by default).
</p>
<sample src="examples/weak_password_hashing_good.rb" />
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,43 @@
/**
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id rb/weak-sensitive-data-hashing
* @tags security
* external/cwe/cwe-327
* external/cwe/cwe-328
* external/cwe/cwe-916
*/
import ruby
import codeql.ruby.security.WeakSensitiveDataHashingQuery
import WeakSensitiveDataHashingFlow::PathGraph
from
WeakSensitiveDataHashingFlow::PathNode source, WeakSensitiveDataHashingFlow::PathNode sink,
string ending, string algorithmName, string classification
where
normalHashFunctionFlowPath(source, sink) and
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
ending = "."
or
computationallyExpensiveHashFunctionFlowPath(source, sink) and
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
classification =
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
(
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending = "."
or
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending =
" for " + classification +
" hashing, since it is not a computationally expensive hash function."
)
select sink.getNode(), source, sink,
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
source.getNode(), "Sensitive data (" + classification + ")"

View File

@@ -0,0 +1,11 @@
require 'openssl'
def certificate_matches_known_hash_bad(certificate, known_hash)
hash = OpenSSL::Digest.new('SHA1').digest certificate
hash == known_hash
end
def certificate_matches_known_hash_good(certificate, known_hash)
hash = OpenSSL::Digest.new('SHA256').digest certificate
hash == known_hash
end

View File

@@ -0,0 +1,5 @@
require 'openssl'
def get_password_hash(password, salt)
OpenSSL::Digest.new('SHA256').digest(password + salt) # BAD
end

View File

@@ -0,0 +1,9 @@
require 'argon2'
def get_initial_hash(password)
Argon2::Password.create(password)
end
def check_password(password, known_hash)
Argon2::Password.verify_password(password, known_hash)
end

View File

@@ -0,0 +1,28 @@
edges
| weak_hashing.rb:3:1:3:8 | password | weak_hashing.rb:6:1:6:1 | x | provenance | |
| weak_hashing.rb:3:1:3:8 | password | weak_hashing.rb:10:23:10:30 | password | provenance | |
| weak_hashing.rb:3:1:3:8 | password | weak_hashing.rb:11:32:11:39 | password | provenance | |
| weak_hashing.rb:4:1:4:8 | username | weak_hashing.rb:12:23:12:30 | username | provenance | |
| weak_hashing.rb:6:1:6:1 | x | weak_hashing.rb:13:23:13:23 | x | provenance | |
| weak_hashing.rb:30:25:30:38 | password_param | weak_hashing.rb:32:25:32:38 | password_param | provenance | |
nodes
| weak_hashing.rb:3:1:3:8 | password | semmle.label | password |
| weak_hashing.rb:4:1:4:8 | username | semmle.label | username |
| weak_hashing.rb:6:1:6:1 | x | semmle.label | x |
| weak_hashing.rb:10:23:10:30 | password | semmle.label | password |
| weak_hashing.rb:11:32:11:39 | password | semmle.label | password |
| weak_hashing.rb:12:23:12:30 | username | semmle.label | username |
| weak_hashing.rb:13:23:13:23 | x | semmle.label | x |
| weak_hashing.rb:24:23:24:36 | call to get_password | semmle.label | call to get_password |
| weak_hashing.rb:28:23:28:42 | ...[...] | semmle.label | ...[...] |
| weak_hashing.rb:30:25:30:38 | password_param | semmle.label | password_param |
| weak_hashing.rb:32:25:32:38 | password_param | semmle.label | password_param |
subpaths
#select
| weak_hashing.rb:10:23:10:30 | password | weak_hashing.rb:3:1:3:8 | password | weak_hashing.rb:10:23:10:30 | password | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | weak_hashing.rb:3:1:3:8 | password | Sensitive data (password) |
| weak_hashing.rb:11:32:11:39 | password | weak_hashing.rb:3:1:3:8 | password | weak_hashing.rb:11:32:11:39 | password | $@ is used in a hashing algorithm (SHA1) that is insecure for password hashing, since it is not a computationally expensive hash function. | weak_hashing.rb:3:1:3:8 | password | Sensitive data (password) |
| weak_hashing.rb:12:23:12:30 | username | weak_hashing.rb:4:1:4:8 | username | weak_hashing.rb:12:23:12:30 | username | $@ is used in a hashing algorithm (MD5) that is insecure. | weak_hashing.rb:4:1:4:8 | username | Sensitive data (id) |
| weak_hashing.rb:13:23:13:23 | x | weak_hashing.rb:3:1:3:8 | password | weak_hashing.rb:13:23:13:23 | x | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | weak_hashing.rb:3:1:3:8 | password | Sensitive data (password) |
| weak_hashing.rb:24:23:24:36 | call to get_password | weak_hashing.rb:24:23:24:36 | call to get_password | weak_hashing.rb:24:23:24:36 | call to get_password | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | weak_hashing.rb:24:23:24:36 | call to get_password | Sensitive data (password) |
| weak_hashing.rb:28:23:28:42 | ...[...] | weak_hashing.rb:28:23:28:42 | ...[...] | weak_hashing.rb:28:23:28:42 | ...[...] | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | weak_hashing.rb:28:23:28:42 | ...[...] | Sensitive data (password) |
| weak_hashing.rb:32:25:32:38 | password_param | weak_hashing.rb:30:25:30:38 | password_param | weak_hashing.rb:32:25:32:38 | password_param | $@ is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function. | weak_hashing.rb:30:25:30:38 | password_param | Sensitive data (password) |

View File

@@ -0,0 +1 @@
queries/security/cwe-327/WeakSensitiveDataHashing.ql

View File

@@ -0,0 +1,33 @@
require 'openssl'
password = "abcde"
username = "some_user"
some_data = "foo"
x = password
Digest::MD5.hexdigest(some_data) # OK: input is not sensitive
Digest::SHA256.hexdigest(password) # OK: strong hash algorithm
Digest::MD5.hexdigest(password) # BAD: weak hash function used for sensitive data
OpenSSL::Digest.digest('SHA1', password) # BAD: weak hash function used for sensitive data
Digest::MD5.hexdigest(username) # BAD: weak hash function used for sensitive data
Digest::MD5.hexdigest(x) # BAD: weak hash function used for sensitive data
def get_safe_data()
return "hello"
end
def get_password()
return "changeme"
end
Digest::MD5.hexdigest(get_safe_data()) # OK: input is not sensitive
Digest::MD5.hexdigest(get_password()) # BAD: weak hash function used for sensitive data
some_hash = {password: "changeme", foo: "bar"}
Digest::MD5.hexdigest(some_hash[:foo]) # OK: input is not sensitive
Digest::MD5.hexdigest(some_hash[:password]) # BAD: weak hash function used for sensitive data
def a_method(safe_data, password_param)
Digest::MD5.hexdigest(safe_data) # OK: input is not sensitive
Digest::MD5.hexdigest(password_param) # BAD: weak hash function used for sensitive data
end