mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge branch 'main' into moresensitive2
This commit is contained in:
@@ -754,7 +754,11 @@ public class AutoBuild {
|
||||
continue;
|
||||
}
|
||||
Path odir = cfg.getParent().resolve(root.getCompilerOptions().getOutDir()).toAbsolutePath().normalize();
|
||||
outDirs.add(odir);
|
||||
// Only exclude outDirs that are proper subdirectories of the source root
|
||||
// This prevents excluding all code when outDir points outside the source root or to the source root itself
|
||||
if (tryRelativize(LGTM_SRC, odir) != null && !odir.equals(LGTM_SRC)) {
|
||||
outDirs.add(odir);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// ignore malformed tsconfig or missing fields
|
||||
|
||||
@@ -235,6 +235,51 @@ public class AutoBuildTests {
|
||||
runTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void skipFilesInTsconfigOutDirPointingToParent() throws IOException {
|
||||
// Test that outDir pointing to parent directory (outside source root) is ignored
|
||||
addFile(true, LGTM_SRC, "tsconfig.json");
|
||||
Path config = Paths.get(LGTM_SRC.toString(), "tsconfig.json");
|
||||
Files.write(config,
|
||||
"{\"compilerOptions\":{\"outDir\":\"..\"}}".getBytes(StandardCharsets.UTF_8));
|
||||
|
||||
// All files should be extracted since outDir pointing outside source root should be ignored
|
||||
addFile(true, LGTM_SRC, "src", "app.ts");
|
||||
addFile(true, LGTM_SRC, "main.js");
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void skipFilesInTsconfigOutDirPointingToSourceRoot() throws IOException {
|
||||
// Test that outDir pointing to source root itself is ignored
|
||||
addFile(true, LGTM_SRC, "tsconfig.json");
|
||||
Path config = Paths.get(LGTM_SRC.toString(), "tsconfig.json");
|
||||
Files.write(config,
|
||||
"{\"compilerOptions\":{\"outDir\":\".\"}}".getBytes(StandardCharsets.UTF_8));
|
||||
|
||||
// All files should be extracted since outDir pointing to source root should be ignored
|
||||
addFile(true, LGTM_SRC, "src", "app.ts");
|
||||
addFile(true, LGTM_SRC, "main.js");
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void skipFilesInTsconfigOutDirWithRelativePath() throws IOException {
|
||||
// Test that outDir with relative path "somedir/.." (resolves to root) is ignored
|
||||
addFile(true, LGTM_SRC, "tsconfig.json");
|
||||
Path config = Paths.get(LGTM_SRC.toString(), "tsconfig.json");
|
||||
Files.write(config,
|
||||
"{\"compilerOptions\":{\"outDir\":\"somedir/..\"}}".getBytes(StandardCharsets.UTF_8));
|
||||
|
||||
// All files should be extracted since outDir resolving to root should be ignored
|
||||
addFile(true, LGTM_SRC, "src", "app.ts");
|
||||
addFile(true, LGTM_SRC, "main.js");
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void includeFile() throws IOException {
|
||||
envVars.put("LGTM_INDEX_INCLUDE", "tst.js");
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
|
||||
* Enhanced modeling for the `execa` library, adding support for command execution methods `execaCommand`, `execaCommandSync`, `$`, and `$.sync`, as well as file system operations through `inputFile`, `pipeStdout`, `pipeAll`, and `pipeStderr`.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* The JavaScript extractor no longer ignores source files specified in the `tsconfig.json` compiler options `outDir` if doing so would result in excluding all source code.
|
||||
|
||||
## 2.6.6
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
@@ -3,3 +3,7 @@
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Enhanced modeling for the `execa` library, adding support for command execution methods `execaCommand`, `execaCommandSync`, `$`, and `$.sync`, as well as file system operations through `inputFile`, `pipeStdout`, `pipeAll`, and `pipeStderr`.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* The JavaScript extractor no longer ignores source files specified in the `tsconfig.json` compiler options `outDir` if doing so would result in excluding all source code.
|
||||
|
||||
@@ -6,6 +6,7 @@ extractor: javascript
|
||||
library: true
|
||||
upgrades: upgrades
|
||||
dependencies:
|
||||
codeql/concepts: ${workspace}
|
||||
codeql/dataflow: ${workspace}
|
||||
codeql/mad: ${workspace}
|
||||
codeql/regex: ${workspace}
|
||||
|
||||
@@ -5,7 +5,11 @@
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.dataflow.internal.sharedlib.DataFlowArg
|
||||
private import codeql.threatmodels.ThreatModels
|
||||
private import codeql.concepts.ConceptsShared
|
||||
|
||||
private module ConceptsShared = ConceptsMake<Location, JSDataFlow>;
|
||||
|
||||
/**
|
||||
* A data flow source, for a specific threat-model.
|
||||
@@ -206,7 +210,7 @@ abstract class PersistentWriteAccess extends DataFlow::Node {
|
||||
* Provides models for cryptographic things.
|
||||
*/
|
||||
module Cryptography {
|
||||
private import semmle.javascript.internal.ConceptsShared::Cryptography as SC
|
||||
private import ConceptsShared::Cryptography as SC
|
||||
|
||||
/**
|
||||
* A data-flow node that is an application of a cryptographic algorithm. For example,
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
import javascript
|
||||
import semmle.javascript.Concepts::Cryptography
|
||||
private import semmle.javascript.security.internal.CryptoAlgorithmNames
|
||||
private import codeql.concepts.internal.CryptoAlgorithmNames
|
||||
|
||||
/**
|
||||
* A key used in a cryptographic algorithm.
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
/**
|
||||
* This file contains imports required for the JavaScript version of `ConceptsShared.qll`.
|
||||
* Since they are language-specific, they can't be placed directly in that file, as it is shared between languages.
|
||||
*/
|
||||
|
||||
import semmle.javascript.dataflow.DataFlow::DataFlow as DataFlow
|
||||
import semmle.javascript.security.CryptoAlgorithms as CryptoAlgorithms
|
||||
@@ -1,181 +0,0 @@
|
||||
/**
|
||||
* Provides Concepts which are shared across languages.
|
||||
*
|
||||
* Each language has a language specific `Concepts.qll` file that can import the
|
||||
* shared concepts from this file. A language can either re-export the concept directly,
|
||||
* or can add additional member-predicates that are needed for that language.
|
||||
*
|
||||
* Moving forward, `Concepts.qll` will be the staging ground for brand new concepts from
|
||||
* each language, but we will maintain a discipline of moving those concepts to
|
||||
* `ConceptsShared.qll` ASAP.
|
||||
*/
|
||||
|
||||
private import ConceptsImports
|
||||
|
||||
/**
|
||||
* Provides models for cryptographic concepts.
|
||||
*
|
||||
* Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into
|
||||
* consideration for the `isWeak` member predicate. So RSA is always considered
|
||||
* secure, although using a low number of bits will actually make it insecure. We plan
|
||||
* to improve our libraries in the future to more precisely capture this aspect.
|
||||
*/
|
||||
module Cryptography {
|
||||
class CryptographicAlgorithm = CryptoAlgorithms::CryptographicAlgorithm;
|
||||
|
||||
class EncryptionAlgorithm = CryptoAlgorithms::EncryptionAlgorithm;
|
||||
|
||||
class HashingAlgorithm = CryptoAlgorithms::HashingAlgorithm;
|
||||
|
||||
class PasswordHashingAlgorithm = CryptoAlgorithms::PasswordHashingAlgorithm;
|
||||
|
||||
/**
|
||||
* A data flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CryptographicOperation::Range` instead.
|
||||
*/
|
||||
class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range {
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() }
|
||||
|
||||
/** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */
|
||||
DataFlow::Node getInitialization() { result = super.getInitialization() }
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
DataFlow::Node getAnInput() { result = super.getAnInput() }
|
||||
|
||||
/**
|
||||
* Gets the block mode used to perform this cryptographic operation.
|
||||
*
|
||||
* This predicate is only expected to have a result if two conditions hold:
|
||||
* 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
|
||||
* 2. The algorithm used is a block cipher (not a stream cipher).
|
||||
*
|
||||
* If either of these conditions do not hold, then this predicate should have no result.
|
||||
*/
|
||||
BlockMode getBlockMode() { result = super.getBlockMode() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling new applications of a cryptographic algorithms. */
|
||||
module CryptographicOperation {
|
||||
/**
|
||||
* A data flow node that is an application of a cryptographic algorithm. For example,
|
||||
* encryption, decryption, signature-validation.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CryptographicOperation` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the data flow node where the cryptographic algorithm used in this operation is configured. */
|
||||
abstract DataFlow::Node getInitialization();
|
||||
|
||||
/** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
|
||||
abstract CryptographicAlgorithm getAlgorithm();
|
||||
|
||||
/** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/**
|
||||
* Gets the block mode used to perform this cryptographic operation.
|
||||
*
|
||||
* This predicate is only expected to have a result if two conditions hold:
|
||||
* 1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
|
||||
* 2. The algorithm used is a block cipher (not a stream cipher).
|
||||
*
|
||||
* If either of these conditions do not hold, then this predicate should have no result.
|
||||
*/
|
||||
abstract BlockMode getBlockMode();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A cryptographic block cipher mode of operation. This can be used to encrypt
|
||||
* data of arbitrary length using a block encryption algorithm.
|
||||
*/
|
||||
class BlockMode extends string {
|
||||
BlockMode() {
|
||||
this =
|
||||
[
|
||||
"ECB", "CBC", "GCM", "CCM", "CFB", "OFB", "CTR", "OPENPGP",
|
||||
"XTS", // https://csrc.nist.gov/publications/detail/sp/800-38e/final
|
||||
"EAX" // https://en.wikipedia.org/wiki/EAX_mode
|
||||
]
|
||||
}
|
||||
|
||||
/** Holds if this block mode is considered to be insecure. */
|
||||
predicate isWeak() { this = "ECB" }
|
||||
|
||||
/** Holds if the given string appears to match this block mode. */
|
||||
bindingset[s]
|
||||
predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") }
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module Http {
|
||||
/** Provides classes for modeling HTTP clients. */
|
||||
module Client {
|
||||
/**
|
||||
* A data flow node that makes an outgoing HTTP request.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Http::Client::Request::Range` instead.
|
||||
*/
|
||||
class Request extends DataFlow::Node instanceof Request::Range {
|
||||
/**
|
||||
* Gets a data flow node that contributes to the URL of the request.
|
||||
* Depending on the framework, a request may have multiple nodes which contribute to the URL.
|
||||
*/
|
||||
DataFlow::Node getAUrlPart() { result = super.getAUrlPart() }
|
||||
|
||||
/** Gets a string that identifies the framework used for this request. */
|
||||
string getFramework() { result = super.getFramework() }
|
||||
|
||||
/**
|
||||
* Holds if this request is made using a mode that disables SSL/TLS
|
||||
* certificate validation, where `disablingNode` represents the point at
|
||||
* which the validation was disabled, and `argumentOrigin` represents the origin
|
||||
* of the argument that disabled the validation (which could be the same node as
|
||||
* `disablingNode`).
|
||||
*/
|
||||
predicate disablesCertificateValidation(
|
||||
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
|
||||
) {
|
||||
super.disablesCertificateValidation(disablingNode, argumentOrigin)
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new HTTP requests. */
|
||||
module Request {
|
||||
/**
|
||||
* A data flow node that makes an outgoing HTTP request.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Http::Client::Request` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets a data flow node that contributes to the URL of the request.
|
||||
* Depending on the framework, a request may have multiple nodes which contribute to the URL.
|
||||
*/
|
||||
abstract DataFlow::Node getAUrlPart();
|
||||
|
||||
/** Gets a string that identifies the framework used for this request. */
|
||||
abstract string getFramework();
|
||||
|
||||
/**
|
||||
* Holds if this request is made using a mode that disables SSL/TLS
|
||||
* certificate validation, where `disablingNode` represents the point at
|
||||
* which the validation was disabled, and `argumentOrigin` represents the origin
|
||||
* of the argument that disabled the validation (which could be the same node as
|
||||
* `disablingNode`).
|
||||
*/
|
||||
abstract predicate disablesCertificateValidation(
|
||||
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,117 +1,5 @@
|
||||
/**
|
||||
* Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and Google (2021).
|
||||
*/
|
||||
|
||||
private import internal.CryptoAlgorithmNames
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
private newtype TCryptographicAlgorithm =
|
||||
MkHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakHashingAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkEncryptionAlgorithm(string name, boolean isWeak) {
|
||||
isStrongEncryptionAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakEncryptionAlgorithm(name) and isWeak = true
|
||||
} or
|
||||
MkPasswordHashingAlgorithm(string name, boolean isWeak) {
|
||||
isStrongPasswordHashingAlgorithm(name) and isWeak = false
|
||||
or
|
||||
isWeakPasswordHashingAlgorithm(name) and isWeak = true
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the most specific `CryptographicAlgorithm` that matches the given `name`.
|
||||
* A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats.
|
||||
* In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match.
|
||||
*/
|
||||
bindingset[name]
|
||||
private CryptographicAlgorithm getBestAlgorithmForName(string name) {
|
||||
result =
|
||||
max(CryptographicAlgorithm algorithm |
|
||||
algorithm.getName() =
|
||||
[
|
||||
name.toUpperCase(), // the full name
|
||||
name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces
|
||||
name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores
|
||||
].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces
|
||||
|
|
||||
algorithm order by algorithm.getName().length()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm.
|
||||
*/
|
||||
abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = this.getName() }
|
||||
|
||||
/**
|
||||
* Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).
|
||||
*/
|
||||
abstract string getName();
|
||||
|
||||
/**
|
||||
* Holds if the name of this algorithm is the most specific match for `name`.
|
||||
* This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc.
|
||||
*/
|
||||
bindingset[name]
|
||||
predicate matchesName(string name) { this = getBestAlgorithmForName(name) }
|
||||
|
||||
/**
|
||||
* Holds if this algorithm is weak.
|
||||
*/
|
||||
abstract predicate isWeak();
|
||||
}
|
||||
|
||||
/**
|
||||
* A hashing algorithm such as `MD5` or `SHA512`.
|
||||
*/
|
||||
class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
|
||||
/**
|
||||
* An encryption algorithm such as `DES` or `AES512`.
|
||||
*/
|
||||
class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
|
||||
/** Holds if this algorithm is a stream cipher. */
|
||||
predicate isStreamCipher() { isStreamCipher(name) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A password hashing algorithm such as `PBKDF2` or `SCRYPT`.
|
||||
*/
|
||||
class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm {
|
||||
string name;
|
||||
boolean isWeak;
|
||||
|
||||
PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) }
|
||||
|
||||
override string getName() { result = name }
|
||||
|
||||
override predicate isWeak() { isWeak = true }
|
||||
}
|
||||
import codeql.concepts.CryptoAlgorithms
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
*/
|
||||
|
||||
import javascript
|
||||
import semmle.javascript.security.internal.SensitiveDataHeuristics
|
||||
import codeql.concepts.internal.SensitiveDataHeuristics
|
||||
private import HeuristicNames
|
||||
|
||||
/** An expression that might contain sensitive data. */
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
/**
|
||||
* Names of cryptographic algorithms, separated into strong and weak variants.
|
||||
*
|
||||
* The names are normalized: upper-case, no spaces, dashes or underscores.
|
||||
*
|
||||
* The names are inspired by the names used in real world crypto libraries.
|
||||
*
|
||||
* The classification into strong and weak are based on Wikipedia, OWASP and Google (2021).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a strong hashing algorithm.
|
||||
*/
|
||||
predicate isStrongHashingAlgorithm(string name) {
|
||||
name =
|
||||
[
|
||||
// see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#blake2
|
||||
// and https://www.blake2.net/
|
||||
"BLAKE2", "BLAKE2B", "BLAKE2S",
|
||||
// see https://github.com/BLAKE3-team/BLAKE3
|
||||
"BLAKE3",
|
||||
//
|
||||
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
|
||||
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512",
|
||||
// see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#cryptography.hazmat.primitives.hashes.SHAKE128
|
||||
"SHAKE128", "SHAKE256",
|
||||
// see https://cryptography.io/en/latest/hazmat/primitives/cryptographic-hashes/#sm3
|
||||
"SM3",
|
||||
// see https://security.stackexchange.com/a/216297
|
||||
"WHIRLPOOL",
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a weak hashing algorithm.
|
||||
*/
|
||||
predicate isWeakHashingAlgorithm(string name) {
|
||||
name =
|
||||
[
|
||||
"HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160",
|
||||
"RIPEMD320", "SHA0", "SHA1"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a strong encryption algorithm.
|
||||
*/
|
||||
predicate isStrongEncryptionAlgorithm(string name) {
|
||||
name =
|
||||
[
|
||||
"AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512",
|
||||
"ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192",
|
||||
"CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89",
|
||||
"IDEA", "RABBIT", "RSA", "SEED", "SM4"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a weak encryption algorithm.
|
||||
*/
|
||||
predicate isWeakEncryptionAlgorithm(string name) {
|
||||
name =
|
||||
[
|
||||
"DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4",
|
||||
"ARCFOUR", "ARC5", "RC5"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a strong password hashing algorithm.
|
||||
*/
|
||||
predicate isStrongPasswordHashingAlgorithm(string name) {
|
||||
name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a weak password hashing algorithm.
|
||||
*/
|
||||
predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" }
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a stream cipher.
|
||||
*/
|
||||
predicate isStreamCipher(string name) { name = ["CHACHA", "RC4", "ARC4", "ARCFOUR", "RABBIT"] }
|
||||
@@ -1,193 +0,0 @@
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Provides classes and predicates for identifying strings that may indicate the presence of sensitive data.
|
||||
* Such that we can share this logic across our CodeQL analysis of different languages.
|
||||
*
|
||||
* 'Sensitive' data in general is anything that should not be sent around in unencrypted form.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A classification of different kinds of sensitive data:
|
||||
*
|
||||
* - secret: generic secret or trusted data;
|
||||
* - id: a user name or other account information;
|
||||
* - password: a password or authorization key;
|
||||
* - certificate: a certificate.
|
||||
* - private: private data such as credit card numbers
|
||||
*
|
||||
* While classifications are represented as strings, this should not be relied upon.
|
||||
* Instead, use the predicates in `SensitiveDataClassification::` to work with
|
||||
* classifications.
|
||||
*/
|
||||
class SensitiveDataClassification extends string {
|
||||
SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] }
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides predicates to select the different kinds of sensitive data we support.
|
||||
*/
|
||||
module SensitiveDataClassification {
|
||||
/** Gets the classification for secret or trusted data. */
|
||||
SensitiveDataClassification secret() { result = "secret" }
|
||||
|
||||
/** Gets the classification for user names or other account information. */
|
||||
SensitiveDataClassification id() { result = "id" }
|
||||
|
||||
/** Gets the classification for passwords or authorization keys. */
|
||||
SensitiveDataClassification password() { result = "password" }
|
||||
|
||||
/** Gets the classification for certificates. */
|
||||
SensitiveDataClassification certificate() { result = "certificate" }
|
||||
|
||||
/** Gets the classification for private data. */
|
||||
SensitiveDataClassification private() { result = "private" }
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* Provides heuristics for identifying names related to sensitive information.
|
||||
*/
|
||||
module HeuristicNames {
|
||||
/**
|
||||
* Gets a regular expression that identifies strings that may indicate the presence of secret
|
||||
* or trusted data.
|
||||
*/
|
||||
string maybeSecret() {
|
||||
result = "(?is).*((?<!is|is_)secret|(?<!un|un_|is|is_)trusted(?!_iter)|confidential).*"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a regular expression that identifies strings that may indicate the presence of
|
||||
* user names or other account information.
|
||||
*/
|
||||
string maybeAccountInfo() {
|
||||
result = "(?is).*(acc(ou)?nt|puid|user.?(name|id)|session.?(id|key)).*" or
|
||||
result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a regular expression that identifies strings that may indicate the presence of
|
||||
* a password or an authorization key.
|
||||
*/
|
||||
string maybePassword() {
|
||||
result =
|
||||
"(?is).*(pass(wd|word|code|.?phrase)(?!.*question)|(auth(entication|ori[sz]ation)?).?key|oauth|"
|
||||
+ "api.?(key|token)|([_-]|\\b)mfa([_-]|\\b)).*"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a regular expression that identifies strings that may indicate the presence of
|
||||
* a certificate.
|
||||
*/
|
||||
string maybeCertificate() { result = "(?is).*(cert)(?!.*(format|name|ification)).*" }
|
||||
|
||||
/**
|
||||
* Gets a regular expression that identifies strings that may indicate the presence of
|
||||
* private data.
|
||||
*/
|
||||
string maybePrivate() {
|
||||
result =
|
||||
"(?is).*(" +
|
||||
// Inspired by multiple sources including the list on https://cwe.mitre.org/data/definitions/359.html
|
||||
// Government identifiers, such as Social Security Numbers
|
||||
"social.?security|employer.?identification|national.?insurance|resident.?id|" +
|
||||
"passport.?(num|no)|([_-]|\\b)ssn([_-]|\\b)|" +
|
||||
// Contact information, such as home addresses
|
||||
"post.?code|zip.?code|home.?addr|" +
|
||||
// and telephone numbers
|
||||
"(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" +
|
||||
"emergency.?contact|" +
|
||||
// Geographic location - where the user is (or was)
|
||||
"latitude|longitude|nationality|" +
|
||||
// Financial data - such as credit card numbers, salary, bank accounts, and debts
|
||||
"(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|acc(ou)?nt.?(no|num|credit)|routing.?num|"
|
||||
+ "salary|billing|beneficiary|credit.?(rating|score)|([_-]|\\b)(ccn|cvv|iban)([_-]|\\b)|" +
|
||||
// Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc.
|
||||
// "e(mail|_mail)|" + // this seems too noisy
|
||||
// Health - medical conditions, insurance status, prescription records
|
||||
"birth.?da(te|y)|da(te|y).?(of.?)?birth|gender|([_-]|\\b)sex([_-]|\\b)|" +
|
||||
"medical|(health|care).?plan|healthkit|appointment|prescription|patient.?(id|record)|" +
|
||||
"blood.?(type|alcohol|glucose|pressure)|heart.?(rate|rhythm)|body.?(mass|fat)|" +
|
||||
"menstrua|pregnan|insulin|inhaler|" +
|
||||
// Relationships - work and family
|
||||
"employ(er|ee)|spouse|maiden.?name|" +
|
||||
// Device information
|
||||
"mac.?addr" +
|
||||
// ---
|
||||
").*"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a regular expression that identifies strings that may indicate the presence
|
||||
* of sensitive data, with `classification` describing the kind of sensitive data involved.
|
||||
*/
|
||||
string maybeSensitiveRegexp(SensitiveDataClassification classification) {
|
||||
result = maybeSecret() and classification = SensitiveDataClassification::secret()
|
||||
or
|
||||
result = maybeAccountInfo() and classification = SensitiveDataClassification::id()
|
||||
or
|
||||
result = maybePassword() and classification = SensitiveDataClassification::password()
|
||||
or
|
||||
result = maybeCertificate() and
|
||||
classification = SensitiveDataClassification::certificate()
|
||||
or
|
||||
result = maybePrivate() and
|
||||
classification = SensitiveDataClassification::private()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a regular expression that identifies strings that may indicate the presence of data
|
||||
* that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
|
||||
* suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
|
||||
*
|
||||
* We also filter out common words like `certain` and `concert`, since otherwise these could
|
||||
* be matched by the certificate regular expressions. Same for `accountable` (account), or
|
||||
* `secretarial` (secret).
|
||||
*/
|
||||
string notSensitiveRegexp() {
|
||||
result =
|
||||
"(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|(?<!pass)code)|"
|
||||
+ "certain|concert|secretar|account(ant|ab|ing|ed)|file|path|([_-]|\\b)url).*"
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` may indicate the presence of sensitive data, and `name` does not indicate that
|
||||
* the data is in fact non-sensitive (for example since it is hashed or encrypted).
|
||||
*
|
||||
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the given
|
||||
* classification), and none of the regexps from `notSensitiveRegexp` matches `name`.
|
||||
*/
|
||||
bindingset[name]
|
||||
predicate nameIndicatesSensitiveData(string name) {
|
||||
exists(string combinedRegexp |
|
||||
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
|
||||
combinedRegexp =
|
||||
"(?:" + strictconcat(string r | r = maybeSensitiveRegexp(_) | r, ")|(?:") + ")"
|
||||
|
|
||||
name.regexpMatch(combinedRegexp)
|
||||
) and
|
||||
not name.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` may indicate the presence of sensitive data, and
|
||||
* `name` does not indicate that the data is in fact non-sensitive (for example since
|
||||
* it is hashed or encrypted). `classification` describes the kind of sensitive data
|
||||
* involved.
|
||||
*
|
||||
* That is, one of the regexps from `maybeSensitiveRegexp` matches `name` (with the
|
||||
* given classification), and none of the regexps from `notSensitiveRegexp` matches
|
||||
* `name`.
|
||||
*
|
||||
* When the set of names is large, it's worth using `nameIndicatesSensitiveData/1` as a first
|
||||
* pass, since that combines all the regexps into one, and should be faster. Then call this
|
||||
* predicate to get the classification(s).
|
||||
*/
|
||||
bindingset[name]
|
||||
predicate nameIndicatesSensitiveData(string name, SensitiveDataClassification classification) {
|
||||
name.regexpMatch(maybeSensitiveRegexp(classification)) and
|
||||
not name.regexpMatch(notSensitiveRegexp())
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@
|
||||
* @problem.severity warning
|
||||
* @precision high
|
||||
* @tags quality
|
||||
* maintainability
|
||||
* reliability
|
||||
* error-handling
|
||||
* frameworks/nodejs
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user