Add query for hashing sensitive data with weak hashing algorithm

This commit is contained in:
Owen Mansel-Chan
2025-10-31 15:56:18 +00:00
committed by Owen Mansel-Chan
parent 713e19f6f1
commit 52d7e2dd18
10 changed files with 536 additions and 1 deletions

View File

@@ -0,0 +1,172 @@
/**
* Provides default sources, sinks and sanitizers for detecting "use of a
* broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities, as well as extension points for adding your own. This is
* divided into two general cases:
* - hashing sensitive data
* - hashing passwords (which requires the hashing algorithm to be
* sufficiently computationally expensive in addition to other requirements)
*/
import go
import semmle.go.dataflow.internal.DataFlowPrivate
private import semmle.go.security.SensitiveActions
/**
* Provides default sources, sinks and sanitizers for detecting "use of a broken or weak
* cryptographic hashing algorithm on sensitive data" vulnerabilities on sensitive data that does
* NOT require computationally expensive hashing, as well as extension points for adding your own.
*
* Also see the `ComputationallyExpensiveHashFunction` module.
*/
module NormalHashFunction {
/**
* A data flow source for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities that does not require computationally expensive hashing. That is, a
* piece of sensitive data that is not a password.
*/
abstract class Source extends DataFlow::Node {
Source() { not this instanceof ComputationallyExpensiveHashFunction::Source }
/**
* Gets the classification of the sensitive data.
*/
abstract string getClassification();
}
/**
* A data flow sink for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities that applies to data that does not require computationally expensive
* hashing. That is, a broken or weak hashing algorithm.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the name of the weak hashing algorithm.
*/
abstract string getAlgorithmName();
}
/**
* A barrier for "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities that applies to data that does not require computationally expensive hashing.
*/
abstract class Barrier extends DataFlow::Node { }
/**
* A flow source modeled by the `SensitiveData` library.
*/
class SensitiveDataAsSource extends Source {
SensitiveExpr::Classification classification;
SensitiveDataAsSource() {
classification = this.asExpr().(SensitiveExpr).getClassification() and
not classification = SensitiveExpr::password() and // (covered in ComputationallyExpensiveHashFunction)
not classification = SensitiveExpr::id() // (not accurate enough)
}
override SensitiveExpr::Classification getClassification() { result = classification }
}
/**
* A flow sink modeled by the `Cryptography` module.
*/
class WeakHashingOperationInputAsSink extends Sink {
Cryptography::HashingAlgorithm algorithm;
WeakHashingOperationInputAsSink() {
exists(Cryptography::CryptographicOperation operation |
algorithm.isWeak() and
algorithm = operation.getAlgorithm() and
this = operation.getAnInput()
)
}
override string getAlgorithmName() { result = algorithm.getName() }
}
}
/**
* Provides default sources, sinks and sanitizers for detecting "use of a broken or weak
* cryptographic hashing algorithm on sensitive data" vulnerabilities on sensitive data that DOES
* require computationally expensive hashing, as well as extension points for adding your own.
*
* Also see the `NormalHashFunction` module.
*/
module ComputationallyExpensiveHashFunction {
/**
* A data flow source for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities that does require computationally expensive hashing. That is, a
* password.
*/
abstract class Source extends DataFlow::Node {
/**
* Gets the classification of the sensitive data.
*/
abstract string getClassification();
}
/**
* A data flow sink for "use of a broken or weak cryptographic hashing algorithm on sensitive
* data" vulnerabilities that applies to data that does require computationally expensive
* hashing. That is, a broken or weak hashing algorithm or one that is not computationally
* expensive enough for password hashing.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the name of the weak hashing algorithm.
*/
abstract string getAlgorithmName();
/**
* Holds if this sink is for a computationally expensive hash function (meaning that hash
* function is just weak in some other regard.
*/
abstract predicate isComputationallyExpensive();
}
/**
* A barrier for "use of a broken or weak cryptographic hashing algorithm on sensitive data"
* vulnerabilities that applies to data that does require computationally expensive hashing.
*/
abstract class Barrier extends DataFlow::Node { }
/**
* A flow source modeled by the `SensitiveData` library.
*/
class PasswordAsSource extends Source {
SensitiveExpr::Classification classification;
PasswordAsSource() {
classification = this.asExpr().(SensitiveExpr).getClassification() and
classification = SensitiveExpr::password()
}
override SensitiveExpr::Classification getClassification() { result = classification }
}
/**
* A flow sink modeled by the `Cryptography` module.
*/
class WeakPasswordHashingOperationInputSink extends Sink {
Cryptography::CryptographicAlgorithm algorithm;
WeakPasswordHashingOperationInputSink() {
exists(Cryptography::CryptographicOperation operation |
(
algorithm instanceof Cryptography::PasswordHashingAlgorithm and
algorithm.isWeak()
or
algorithm instanceof Cryptography::HashingAlgorithm // Note that HashingAlgorithm and PasswordHashingAlgorithm are disjoint
) and
algorithm = operation.getAlgorithm() and
this = operation.getAnInput()
)
}
override string getAlgorithmName() { result = algorithm.getName() }
override predicate isComputationallyExpensive() {
algorithm instanceof Cryptography::PasswordHashingAlgorithm
}
}
}

View File

@@ -0,0 +1,104 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Using a broken or weak cryptographic hash function can leave data
vulnerable, and should not be used in security related code.
</p>
<p>
A strong cryptographic hash function should be resistant to:
</p>
<ul>
<li>
pre-image attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find the input <code>x</code>.
</li>
<li>
collision attacks: if you know a hash value <code>h(x)</code>,
you should not be able to easily find a different input <code>y</code>
with the same hash value <code>h(x) = h(y)</code>.
</li>
</ul>
<p>
In cases with a limited input space, such as for passwords, the hash
function also needs to be computationally expensive to be resistant to
brute-force attacks. Passwords should also have an unique salt applied
before hashing, but that is not considered by this query.
</p>
<p>
As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
</p>
<p>
Since it's OK to use a weak cryptographic hash function in a non-security
context, this query only alerts when these are used to hash sensitive
data (such as passwords, certificates, usernames).
</p>
<p>
Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
handled by the <code>rb/weak-cryptographic-algorithm</code> query.
</p>
</overview>
<recommendation>
<p>
Ensure that you use a strong, modern cryptographic hash function:
</p>
<ul>
<li>
such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
</li>
<li>
such as SHA-2, or SHA-3 in other cases.
</li>
</ul>
</recommendation>
<example>
<p>
The following example shows two functions for checking whether the hash
of a certificate matches a known value -- to prevent tampering.
The first function uses MD5 that is known to be vulnerable to collision attacks.
The second function uses SHA-256 that is a strong cryptographic hashing function.
</p>
<sample src="examples/weak_certificate_hashing.rb" />
</example>
<example>
<p>
The following example shows two functions for hashing passwords.
The first function uses SHA-256 to hash passwords. Although SHA-256 is a
strong cryptographic hash function, it is not suitable for password
hashing since it is not computationally expensive.
</p>
<sample src="examples/weak_password_hashing_bad.rb" />
<p>
The second function uses Argon2 (through the <code>argon2</code>
gem), which is a strong password hashing algorithm (and
includes a per-password salt by default).
</p>
<sample src="examples/weak_password_hashing_good.rb" />
</example>
<references>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,114 @@
/**
* @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id go/weak-sensitive-data-hashing
* @tags security
* external/cwe/cwe-327
* external/cwe/cwe-328
* external/cwe/cwe-916
*/
import go
import semmle.go.security.WeakSensitiveDataHashingCustomizations
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hash function on sensitive data, that does NOT require a
* computationally expensive hash function.
*/
module NormalHashFunctionFlow {
import NormalHashFunction
private module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Barrier }
predicate isBarrierIn(DataFlow::Node node) {
// make sources barriers so that we only report the closest instance
isSource(node)
}
predicate isBarrierOut(DataFlow::Node node) {
// make sinks barriers so that we only report the closest instance
isSink(node)
}
}
import TaintTracking::Global<Config>
}
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
* cryptographic hashing algorithm on passwords.
*
* Passwords has stricter requirements on the hashing algorithm used (must be
* computationally expensive to prevent brute-force attacks).
*/
module ComputationallyExpensiveHashFunctionFlow {
import ComputationallyExpensiveHashFunction
private module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof Source }
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
predicate isBarrier(DataFlow::Node node) { node instanceof Barrier }
predicate isBarrierIn(DataFlow::Node node) {
// make sources barriers so that we only report the closest instance
isSource(node)
}
predicate isBarrierOut(DataFlow::Node node) {
// make sinks barriers so that we only report the closest instance
isSink(node)
}
}
import TaintTracking::Global<Config>
}
/**
* Global taint-tracking for detecting both variants of "use of a broken or weak
* cryptographic hashing algorithm on sensitive data" vulnerabilities. The two configurations are
* merged to generate a combined path graph.
*/
module WeakSensitiveDataHashingFlow =
DataFlow::MergePathGraph<NormalHashFunctionFlow::PathNode,
ComputationallyExpensiveHashFunctionFlow::PathNode, NormalHashFunctionFlow::PathGraph,
ComputationallyExpensiveHashFunctionFlow::PathGraph>;
import WeakSensitiveDataHashingFlow::PathGraph
from
WeakSensitiveDataHashingFlow::PathNode source, WeakSensitiveDataHashingFlow::PathNode sink,
string ending, string algorithmName, string classification
where
NormalHashFunctionFlow::flowPath(source.asPathNode1(), sink.asPathNode1()) and
algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
classification = source.getNode().(NormalHashFunction::Source).getClassification() and
ending = "."
or
ComputationallyExpensiveHashFunctionFlow::flowPath(source.asPathNode2(), sink.asPathNode2()) and
algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
classification =
source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
(
sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending = "."
or
not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
ending =
" for " + classification +
" hashing, since it is not a computationally expensive hash function."
)
select sink.getNode(), source, sink,
"$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
source.getNode(), "Sensitive data (" + classification + ")"

View File

@@ -0,0 +1,22 @@
#select
| hashing.go:20:8:20:22 | secretByteSlice | hashing.go:20:8:20:22 | secretByteSlice | hashing.go:20:8:20:22 | secretByteSlice | $@ is used in a hashing algorithm (MD5) that is insecure. | hashing.go:20:8:20:22 | secretByteSlice | Sensitive data (secret) |
| hashing.go:21:10:21:24 | secretByteSlice | hashing.go:21:10:21:24 | secretByteSlice | hashing.go:21:10:21:24 | secretByteSlice | $@ is used in a hashing algorithm (MD5) that is insecure. | hashing.go:21:10:21:24 | secretByteSlice | Sensitive data (secret) |
| hashing.go:22:20:22:31 | secretString | hashing.go:22:20:22:31 | secretString | hashing.go:22:20:22:31 | secretString | $@ is used in a hashing algorithm (MD5) that is insecure. | hashing.go:22:20:22:31 | secretString | Sensitive data (secret) |
| hashing.go:23:10:23:24 | secretByteSlice | hashing.go:23:10:23:24 | secretByteSlice | hashing.go:23:10:23:24 | secretByteSlice | $@ is used in a hashing algorithm (MD5) that is insecure. | hashing.go:23:10:23:24 | secretByteSlice | Sensitive data (secret) |
| hashing.go:25:17:25:31 | secretByteSlice | hashing.go:25:17:25:31 | secretByteSlice | hashing.go:25:17:25:31 | secretByteSlice | $@ is used in a hashing algorithm (SHA1) that is insecure. | hashing.go:25:17:25:31 | secretByteSlice | Sensitive data (secret) |
| hashing.go:26:11:26:25 | secretByteSlice | hashing.go:26:11:26:25 | secretByteSlice | hashing.go:26:11:26:25 | secretByteSlice | $@ is used in a hashing algorithm (SHA1) that is insecure. | hashing.go:26:11:26:25 | secretByteSlice | Sensitive data (secret) |
| hashing.go:28:16:28:30 | secretByteSlice | hashing.go:28:16:28:30 | secretByteSlice | hashing.go:28:16:28:30 | secretByteSlice | $@ is used in a hashing algorithm (MD4) that is insecure. | hashing.go:28:16:28:30 | secretByteSlice | Sensitive data (secret) |
| hashing.go:29:22:29:36 | secretByteSlice | hashing.go:29:22:29:36 | secretByteSlice | hashing.go:29:22:29:36 | secretByteSlice | $@ is used in a hashing algorithm (RIPEMD160) that is insecure. | hashing.go:29:22:29:36 | secretByteSlice | Sensitive data (secret) |
| hashing.go:80:16:80:23 | password | hashing.go:80:16:80:23 | password | hashing.go:80:16:80:23 | password | $@ is used in a hashing algorithm (SHA256) that is insecure for password hashing, since it is not a computationally expensive hash function. | hashing.go:80:16:80:23 | password | Sensitive data (password) |
edges
nodes
| hashing.go:20:8:20:22 | secretByteSlice | semmle.label | secretByteSlice |
| hashing.go:21:10:21:24 | secretByteSlice | semmle.label | secretByteSlice |
| hashing.go:22:20:22:31 | secretString | semmle.label | secretString |
| hashing.go:23:10:23:24 | secretByteSlice | semmle.label | secretByteSlice |
| hashing.go:25:17:25:31 | secretByteSlice | semmle.label | secretByteSlice |
| hashing.go:26:11:26:25 | secretByteSlice | semmle.label | secretByteSlice |
| hashing.go:28:16:28:30 | secretByteSlice | semmle.label | secretByteSlice |
| hashing.go:29:22:29:36 | secretByteSlice | semmle.label | secretByteSlice |
| hashing.go:80:16:80:23 | password | semmle.label | password |
subpaths

View File

@@ -0,0 +1,4 @@
query: Security/CWE-327/WeakSensitiveDataHashing.ql
postprocess:
- utils/test/PrettyPrintModels.ql
- utils/test/InlineExpectationsTestQuery.ql

View File

@@ -1,3 +1,5 @@
module test
go 1.24
go 1.24.0
require golang.org/x/crypto v0.43.0

View File

@@ -0,0 +1,81 @@
package main
//go:generate depstubber -vendor golang.org/x/crypto/md4 "" New
//go:generate depstubber -vendor golang.org/x/crypto/ripemd160 "" New
import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha3"
"crypto/sha512"
"io"
"golang.org/x/crypto/md4"
"golang.org/x/crypto/ripemd160"
)
func WeakHashes() {
h := md5.New()
h.Sum(secretByteSlice) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="MD5. init from line 19."
h.Write(secretByteSlice) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="MD5. init from line 19."
io.WriteString(h, secretString) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="MD5. init from line 19."
md5.Sum(secretByteSlice) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="MD5. init from line 23."
sha1.New().Sum(secretByteSlice) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="SHA1. init from line 25."
sha1.Sum(secretByteSlice) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="SHA1. init from line 26."
md4.New().Sum(secretByteSlice) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="MD4. init from line 28."
ripemd160.New().Sum(secretByteSlice) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="RIPEMD160. init from line 29."
// Only alert when sensitive data is hashed.
md5.New().Sum(public) // $ CryptographicOperation="MD5. init from line 32."
md5.Sum(public) // $ CryptographicOperation="MD5. init from line 33."
sha1.New().Sum(public) // $ CryptographicOperation="SHA1. init from line 34."
sha1.Sum(public) // $ CryptographicOperation="SHA1. init from line 35."
}
func StrongHashes() {
sha256.New224().Sum(secretByteSlice) // $ CryptographicOperation="SHA224. init from line 39."
sha256.Sum224(secretByteSlice) // $ CryptographicOperation="SHA224. init from line 40."
sha256.New().Sum(secretByteSlice) // $ CryptographicOperation="SHA256. init from line 42."
sha256.Sum256(secretByteSlice) // $ CryptographicOperation="SHA256. init from line 43."
sha512.New().Sum(secretByteSlice) // $ CryptographicOperation="SHA512. init from line 45."
sha512.Sum512(secretByteSlice) // $ CryptographicOperation="SHA512. init from line 46."
sha512.New384().Sum(secretByteSlice) // $ CryptographicOperation="SHA384. init from line 48."
sha512.Sum384(secretByteSlice) // $ CryptographicOperation="SHA384. init from line 49."
sha512.New512_224().Sum(secretByteSlice) // $ CryptographicOperation="SHA512224. init from line 51."
sha512.Sum512_224(secretByteSlice) // $ CryptographicOperation="SHA512224. init from line 52."
sha512.New512_256().Sum(secretByteSlice) // $ CryptographicOperation="SHA512256. init from line 54."
sha512.Sum512_256(secretByteSlice) // $ CryptographicOperation="SHA512256. init from line 55."
sha3.New224().Sum(secretByteSlice) // $ CryptographicOperation="SHA3224. init from line 57."
sha3.Sum224(secretByteSlice) // $ CryptographicOperation="SHA3224. init from line 58."
sha3.New256().Sum(secretByteSlice) // $ CryptographicOperation="SHA3256. init from line 60."
sha3.Sum256(secretByteSlice) // $ CryptographicOperation="SHA3256. init from line 61."
sha3.New384().Sum(secretByteSlice) // $ CryptographicOperation="SHA3384. init from line 63."
sha3.Sum384(secretByteSlice) // $ CryptographicOperation="SHA3384. init from line 64."
sha3.New512().Sum(secretByteSlice) // $ CryptographicOperation="SHA3512. init from line 66."
sha3.Sum512(secretByteSlice) // $ CryptographicOperation="SHA3512. init from line 67."
sha3.NewSHAKE128().Write(secretByteSlice) // $ CryptographicOperation="SHAKE128. init from line 69."
sha3.NewCSHAKE128(nil, nil).Write(secretByteSlice) // $ CryptographicOperation="SHAKE128. init from line 70."
sha3.SumSHAKE128(secretByteSlice, 100) // $ CryptographicOperation="SHAKE128. init from line 71."
sha3.NewSHAKE256().Write(secretByteSlice) // $ CryptographicOperation="SHAKE256. init from line 73."
sha3.NewCSHAKE256(nil, nil).Write(secretByteSlice) // $ CryptographicOperation="SHAKE256. init from line 74."
sha3.SumSHAKE256(secretByteSlice, 100) // $ CryptographicOperation="SHAKE256. init from line 75."
}
func PasswordHashing() {
password := []byte("")
sha256.Sum256(password) // $ Alert[go/weak-sensitive-data-hashing] CryptographicOperation="SHA256. init from line 80."
}

View File

@@ -0,0 +1,16 @@
// Code generated by depstubber. DO NOT EDIT.
// This is a simple stub for golang.org/x/crypto/md4, strictly for use in testing.
// See the LICENSE file for information about the licensing of the original library.
// Source: golang.org/x/crypto/md4 (exports: ; functions: New)
// Package md4 is a stub of golang.org/x/crypto/md4, generated by depstubber.
package md4
import (
hash "hash"
)
func New() hash.Hash {
return nil
}

View File

@@ -0,0 +1,16 @@
// Code generated by depstubber. DO NOT EDIT.
// This is a simple stub for golang.org/x/crypto/ripemd160, strictly for use in testing.
// See the LICENSE file for information about the licensing of the original library.
// Source: golang.org/x/crypto/ripemd160 (exports: ; functions: New)
// Package ripemd160 is a stub of golang.org/x/crypto/ripemd160, generated by depstubber.
package ripemd160
import (
hash "hash"
)
func New() hash.Hash {
return nil
}

View File

@@ -0,0 +1,4 @@
# golang.org/x/crypto v0.43.0
## explicit
golang.org/x/crypto/md4
golang.org/x/crypto/ripemd160