diff --git a/cpp/ql/lib/change-notes/2022-03-28-private-data.md b/cpp/ql/lib/change-notes/2022-03-28-private-data.md new file mode 100644 index 00000000000..1c02c3ddf98 --- /dev/null +++ b/cpp/ql/lib/change-notes/2022-03-28-private-data.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* A new library `semmle.code.cpp.security.PrivateData` has been added. The new library heuristically detects variables and functions dealing with sensitive private data, such as e-mail addresses and credit card numbers. diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/security/PrivateCleartextWrite.qll b/cpp/ql/lib/experimental/semmle/code/cpp/security/PrivateCleartextWrite.qll index 5438722fd08..f82330f3fbd 100644 --- a/cpp/ql/lib/experimental/semmle/code/cpp/security/PrivateCleartextWrite.qll +++ b/cpp/ql/lib/experimental/semmle/code/cpp/security/PrivateCleartextWrite.qll @@ -4,7 +4,7 @@ import cpp import semmle.code.cpp.dataflow.TaintTracking -import experimental.semmle.code.cpp.security.PrivateData +import semmle.code.cpp.security.PrivateData import semmle.code.cpp.security.FileWrite import semmle.code.cpp.security.BufferWrite diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/security/PrivateData.qll b/cpp/ql/lib/experimental/semmle/code/cpp/security/PrivateData.qll deleted file mode 100644 index ec37e8ce86c..00000000000 --- a/cpp/ql/lib/experimental/semmle/code/cpp/security/PrivateData.qll +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Provides classes and predicates for identifying private data and functions for security. - * - * 'Private' data in general is anything that would compromise user privacy if exposed. This - * library tries to guess where private data may either be stored in a variable or produced by a - * function. - * - * This library is not concerned with credentials. See `SensitiveActions` for expressions related - * to credentials. - */ - -import cpp - -/** A string for `match` that identifies strings that look like they represent private data. */ -private string privateNames() { - result = - [ - // Inspired by the list on https://cwe.mitre.org/data/definitions/359.html - // Government identifiers, such as Social Security Numbers - "%social%security%number%", - // Contact information, such as home addresses and telephone numbers - "%postcode%", "%zipcode%", - // result = "%telephone%" or - // Geographic location - where the user is (or was) - "%latitude%", "%longitude%", - // Financial data - such as credit card numbers, salary, bank accounts, and debts - "%creditcard%", "%salary%", "%bankaccount%", - // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. - // result = "%email%" or - // result = "%mobile%" or - "%employer%", - // Health - medical conditions, insurance status, prescription records - "%medical%" - ] -} - -/** An expression that might contain private data. */ -abstract class PrivateDataExpr extends Expr { } - -/** A functiond call that might produce private data. */ -class PrivateFunctionCall extends PrivateDataExpr, FunctionCall { - PrivateFunctionCall() { - exists(string s | this.getTarget().getName().toLowerCase() = s | s.matches(privateNames())) - } -} - -/** An access to a variable that might contain private data. */ -class PrivateVariableAccess extends PrivateDataExpr, VariableAccess { - PrivateVariableAccess() { - exists(string s | this.getTarget().getName().toLowerCase() = s | s.matches(privateNames())) - } -} diff --git a/cpp/ql/lib/semmle/code/cpp/security/PrivateData.qll b/cpp/ql/lib/semmle/code/cpp/security/PrivateData.qll new file mode 100644 index 00000000000..faca0d8513e --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/security/PrivateData.qll @@ -0,0 +1,67 @@ +/** + * Provides classes for heuristically identifying variables and functions that + * might contain or return sensitive private data. + * + * 'Private' data in general is anything that would compromise user privacy if + * exposed. This library tries to guess where private data may either be stored + * in a variable or returned by a function call. + * + * This library is not concerned with credentials. See `SensitiveExprs.qll` for + * expressions related to credentials. + */ + +import cpp + +/** + * A string for `regexpMatch` that identifies strings that look like they + * represent private data. + */ +private string privateNames() { + result = + ".*(" + + // Inspired by the list on https://cwe.mitre.org/data/definitions/359.html + // Government identifiers, such as Social Security Numbers + "social.?security|" + + // Contact information, such as home addresses and telephone numbers + "post.?code|zip.?code|telephone|" + + // Geographic location - where the user is (or was) + "latitude|longitude|" + + // Financial data - such as credit card numbers, salary, bank accounts, and debts + "credit.?card|salary|bank.?account|" + + // Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc. + "email|mobile|employer|" + + // Health - medical conditions, insurance status, prescription records + "medical" + + // --- + ").*" +} + +/** + * A variable that might contain sensitive private information. + */ +class PrivateDataVariable extends Variable { + PrivateDataVariable() { + this.getName().toLowerCase().regexpMatch(privateNames()) and + not this.getUnspecifiedType() instanceof IntegralType + } +} + +/** + * A function that might return sensitive private information. + */ +class PrivateDataFunction extends Function { + PrivateDataFunction() { + this.getName().toLowerCase().regexpMatch(privateNames()) and + not this.getUnspecifiedType() instanceof IntegralType + } +} + +/** + * An expression whose value might be sensitive private information. + */ +class PrivateDataExpr extends Expr { + PrivateDataExpr() { + this.(VariableAccess).getTarget() instanceof PrivateDataVariable or + this.(FunctionCall).getTarget() instanceof PrivateDataFunction + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/security/SensitiveExprs.qll b/cpp/ql/lib/semmle/code/cpp/security/SensitiveExprs.qll index 389129835cb..431f6893fa1 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/SensitiveExprs.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/SensitiveExprs.qll @@ -1,13 +1,16 @@ /** * Provides classes for heuristically identifying variables and functions that - * might contain or return a password or other sensitive information. + * might contain or return a password or other credential. + * + * This library is not concerned with other kinds of sensitive private + * information. See `PrivateData.qll` for expressions related to that. */ import cpp /** * Holds if the name `s` suggests something might contain or return a password - * or other sensitive information. + * or other credential. */ bindingset[s] private predicate suspicious(string s) { @@ -16,7 +19,7 @@ private predicate suspicious(string s) { } /** - * A variable that might contain a password or other sensitive information. + * A variable that might contain a password or other credential. */ class SensitiveVariable extends Variable { SensitiveVariable() { @@ -26,7 +29,7 @@ class SensitiveVariable extends Variable { } /** - * A function that might return a password or other sensitive information. + * A function that might return a password or other credential. */ class SensitiveFunction extends Function { SensitiveFunction() { @@ -36,7 +39,7 @@ class SensitiveFunction extends Function { } /** - * An expression whose value might be a password or other sensitive information. + * An expression whose value might be a password or other credential. */ class SensitiveExpr extends Expr { SensitiveExpr() { diff --git a/cpp/ql/src/Security/CWE/CWE-311/CleartextTransmission.ql b/cpp/ql/src/Security/CWE/CWE-311/CleartextTransmission.ql index 8a9d4f9b991..72d96d0da3f 100644 --- a/cpp/ql/src/Security/CWE/CWE-311/CleartextTransmission.ql +++ b/cpp/ql/src/Security/CWE/CWE-311/CleartextTransmission.ql @@ -9,28 +9,43 @@ * @id cpp/cleartext-transmission * @tags security * external/cwe/cwe-319 + * external/cwe/cwe-359 */ import cpp import semmle.code.cpp.security.SensitiveExprs +import semmle.code.cpp.security.PrivateData import semmle.code.cpp.dataflow.TaintTracking import semmle.code.cpp.models.interfaces.FlowSource import semmle.code.cpp.commons.File import DataFlow::PathGraph +class SourceVariable extends Variable { + SourceVariable() { + this instanceof SensitiveVariable or + this instanceof PrivateDataVariable + } +} + +class SourceFunction extends Function { + SourceFunction() { + this instanceof SensitiveFunction or + this instanceof PrivateDataFunction + } +} + /** * A DataFlow node corresponding to a variable or function call that * might contain or return a password or other sensitive information. */ -class SensitiveNode extends DataFlow::Node { - SensitiveNode() { - this.asExpr() = any(SensitiveVariable sv).getInitializer().getExpr() or - this.asExpr().(VariableAccess).getTarget() = - any(SensitiveVariable sv).(GlobalOrNamespaceVariable) or - this.asExpr().(VariableAccess).getTarget() = any(SensitiveVariable v | v instanceof Field) or - this.asUninitialized() instanceof SensitiveVariable or - this.asParameter() instanceof SensitiveVariable or - this.asExpr().(FunctionCall).getTarget() instanceof SensitiveFunction +class SourceNode extends DataFlow::Node { + SourceNode() { + this.asExpr() = any(SourceVariable sv).getInitializer().getExpr() or + this.asExpr().(VariableAccess).getTarget() = any(SourceVariable sv).(GlobalOrNamespaceVariable) or + this.asExpr().(VariableAccess).getTarget() = any(SourceVariable v | v instanceof Field) or + this.asUninitialized() instanceof SourceVariable or + this.asParameter() instanceof SourceVariable or + this.asExpr().(FunctionCall).getTarget() instanceof SourceFunction } } @@ -207,7 +222,7 @@ class Encrypted extends Expr { class FromSensitiveConfiguration extends TaintTracking::Configuration { FromSensitiveConfiguration() { this = "FromSensitiveConfiguration" } - override predicate isSource(DataFlow::Node source) { source instanceof SensitiveNode } + override predicate isSource(DataFlow::Node source) { source instanceof SourceNode } override predicate isSink(DataFlow::Node sink) { sink.asExpr() = any(NetworkSendRecv nsr).getDataExpr() diff --git a/cpp/ql/src/change-notes/2022-03-28-cleartext-transmission.md b/cpp/ql/src/change-notes/2022-03-28-cleartext-transmission.md new file mode 100644 index 00000000000..4f75b43f295 --- /dev/null +++ b/cpp/ql/src/change-notes/2022-03-28-cleartext-transmission.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The `cpp/cleartext-transmission` query now recognizes additional sources, for sensitive private data such as e-mail addresses and credit card numbers. diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/CleartextTransmission.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/CleartextTransmission.expected index a63099bb875..28d67ade614 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/CleartextTransmission.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/CleartextTransmission.expected @@ -92,6 +92,10 @@ edges | test3.cpp:398:18:398:25 | password | test3.cpp:400:33:400:40 | password | | test3.cpp:421:21:421:28 | password | test3.cpp:421:3:421:17 | call to decrypt_inplace | | test3.cpp:429:7:429:14 | password | test3.cpp:431:8:431:15 | password | +| test3.cpp:526:44:526:54 | my_latitude | test3.cpp:527:15:527:20 | buffer | +| test3.cpp:532:45:532:58 | home_longitude | test3.cpp:533:15:533:20 | buffer | +| test3.cpp:551:47:551:58 | salaryString | test3.cpp:552:15:552:20 | buffer | +| test3.cpp:556:19:556:30 | salaryString | test3.cpp:559:15:559:20 | buffer | | test.cpp:41:23:41:43 | cleartext password! | test.cpp:48:21:48:27 | call to encrypt | | test.cpp:41:23:41:43 | cleartext password! | test.cpp:48:29:48:39 | thePassword | | test.cpp:66:23:66:43 | cleartext password! | test.cpp:76:21:76:27 | call to encrypt | @@ -221,6 +225,25 @@ nodes | test3.cpp:421:21:421:28 | password | semmle.label | password | | test3.cpp:429:7:429:14 | password | semmle.label | password | | test3.cpp:431:8:431:15 | password | semmle.label | password | +| test3.cpp:507:18:507:39 | social_security_number | semmle.label | social_security_number | +| test3.cpp:508:18:508:33 | socialSecurityNo | semmle.label | socialSecurityNo | +| test3.cpp:509:18:509:29 | homePostCode | semmle.label | homePostCode | +| test3.cpp:510:18:510:28 | my_zip_code | semmle.label | my_zip_code | +| test3.cpp:511:18:511:26 | telephone | semmle.label | telephone | +| test3.cpp:512:18:512:36 | mobile_phone_number | semmle.label | mobile_phone_number | +| test3.cpp:513:18:513:22 | email | semmle.label | email | +| test3.cpp:514:18:514:38 | my_credit_card_number | semmle.label | my_credit_card_number | +| test3.cpp:515:18:515:35 | my_bank_account_no | semmle.label | my_bank_account_no | +| test3.cpp:516:18:516:29 | employerName | semmle.label | employerName | +| test3.cpp:517:18:517:29 | medical_info | semmle.label | medical_info | +| test3.cpp:526:44:526:54 | my_latitude | semmle.label | my_latitude | +| test3.cpp:527:15:527:20 | buffer | semmle.label | buffer | +| test3.cpp:532:45:532:58 | home_longitude | semmle.label | home_longitude | +| test3.cpp:533:15:533:20 | buffer | semmle.label | buffer | +| test3.cpp:551:47:551:58 | salaryString | semmle.label | salaryString | +| test3.cpp:552:15:552:20 | buffer | semmle.label | buffer | +| test3.cpp:556:19:556:30 | salaryString | semmle.label | salaryString | +| test3.cpp:559:15:559:20 | buffer | semmle.label | buffer | | test.cpp:41:23:41:43 | cleartext password! | semmle.label | cleartext password! | | test.cpp:48:21:48:27 | call to encrypt | semmle.label | call to encrypt | | test.cpp:48:29:48:39 | thePassword | semmle.label | thePassword | @@ -254,3 +277,18 @@ subpaths | test3.cpp:414:3:414:6 | call to recv | test3.cpp:414:17:414:24 | password | test3.cpp:414:17:414:24 | password | This operation receives into 'password', which may put unencrypted sensitive data into $@ | test3.cpp:414:17:414:24 | password | password | | test3.cpp:420:3:420:6 | call to recv | test3.cpp:420:17:420:24 | password | test3.cpp:420:17:420:24 | password | This operation receives into 'password', which may put unencrypted sensitive data into $@ | test3.cpp:420:17:420:24 | password | password | | test3.cpp:431:2:431:6 | call to fgets | test3.cpp:429:7:429:14 | password | test3.cpp:431:8:431:15 | password | This operation receives into 'password', which may put unencrypted sensitive data into $@ | test3.cpp:429:7:429:14 | password | password | +| test3.cpp:507:2:507:5 | call to send | test3.cpp:507:18:507:39 | social_security_number | test3.cpp:507:18:507:39 | social_security_number | This operation transmits 'social_security_number', which may contain unencrypted sensitive data from $@ | test3.cpp:507:18:507:39 | social_security_number | social_security_number | +| test3.cpp:508:2:508:5 | call to send | test3.cpp:508:18:508:33 | socialSecurityNo | test3.cpp:508:18:508:33 | socialSecurityNo | This operation transmits 'socialSecurityNo', which may contain unencrypted sensitive data from $@ | test3.cpp:508:18:508:33 | socialSecurityNo | socialSecurityNo | +| test3.cpp:509:2:509:5 | call to send | test3.cpp:509:18:509:29 | homePostCode | test3.cpp:509:18:509:29 | homePostCode | This operation transmits 'homePostCode', which may contain unencrypted sensitive data from $@ | test3.cpp:509:18:509:29 | homePostCode | homePostCode | +| test3.cpp:510:2:510:5 | call to send | test3.cpp:510:18:510:28 | my_zip_code | test3.cpp:510:18:510:28 | my_zip_code | This operation transmits 'my_zip_code', which may contain unencrypted sensitive data from $@ | test3.cpp:510:18:510:28 | my_zip_code | my_zip_code | +| test3.cpp:511:2:511:5 | call to send | test3.cpp:511:18:511:26 | telephone | test3.cpp:511:18:511:26 | telephone | This operation transmits 'telephone', which may contain unencrypted sensitive data from $@ | test3.cpp:511:18:511:26 | telephone | telephone | +| test3.cpp:512:2:512:5 | call to send | test3.cpp:512:18:512:36 | mobile_phone_number | test3.cpp:512:18:512:36 | mobile_phone_number | This operation transmits 'mobile_phone_number', which may contain unencrypted sensitive data from $@ | test3.cpp:512:18:512:36 | mobile_phone_number | mobile_phone_number | +| test3.cpp:513:2:513:5 | call to send | test3.cpp:513:18:513:22 | email | test3.cpp:513:18:513:22 | email | This operation transmits 'email', which may contain unencrypted sensitive data from $@ | test3.cpp:513:18:513:22 | email | email | +| test3.cpp:514:2:514:5 | call to send | test3.cpp:514:18:514:38 | my_credit_card_number | test3.cpp:514:18:514:38 | my_credit_card_number | This operation transmits 'my_credit_card_number', which may contain unencrypted sensitive data from $@ | test3.cpp:514:18:514:38 | my_credit_card_number | my_credit_card_number | +| test3.cpp:515:2:515:5 | call to send | test3.cpp:515:18:515:35 | my_bank_account_no | test3.cpp:515:18:515:35 | my_bank_account_no | This operation transmits 'my_bank_account_no', which may contain unencrypted sensitive data from $@ | test3.cpp:515:18:515:35 | my_bank_account_no | my_bank_account_no | +| test3.cpp:516:2:516:5 | call to send | test3.cpp:516:18:516:29 | employerName | test3.cpp:516:18:516:29 | employerName | This operation transmits 'employerName', which may contain unencrypted sensitive data from $@ | test3.cpp:516:18:516:29 | employerName | employerName | +| test3.cpp:517:2:517:5 | call to send | test3.cpp:517:18:517:29 | medical_info | test3.cpp:517:18:517:29 | medical_info | This operation transmits 'medical_info', which may contain unencrypted sensitive data from $@ | test3.cpp:517:18:517:29 | medical_info | medical_info | +| test3.cpp:527:3:527:6 | call to send | test3.cpp:526:44:526:54 | my_latitude | test3.cpp:527:15:527:20 | buffer | This operation transmits 'buffer', which may contain unencrypted sensitive data from $@ | test3.cpp:526:44:526:54 | my_latitude | my_latitude | +| test3.cpp:533:3:533:6 | call to send | test3.cpp:532:45:532:58 | home_longitude | test3.cpp:533:15:533:20 | buffer | This operation transmits 'buffer', which may contain unencrypted sensitive data from $@ | test3.cpp:532:45:532:58 | home_longitude | home_longitude | +| test3.cpp:552:3:552:6 | call to send | test3.cpp:551:47:551:58 | salaryString | test3.cpp:552:15:552:20 | buffer | This operation transmits 'buffer', which may contain unencrypted sensitive data from $@ | test3.cpp:551:47:551:58 | salaryString | salaryString | +| test3.cpp:559:3:559:6 | call to send | test3.cpp:556:19:556:30 | salaryString | test3.cpp:559:15:559:20 | buffer | This operation transmits 'buffer', which may contain unencrypted sensitive data from $@ | test3.cpp:556:19:556:30 | salaryString | salaryString | diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/test3.cpp b/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/test3.cpp index f54d2a09e8d..663297de78b 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/test3.cpp +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-311/semmle/tests/test3.cpp @@ -5,7 +5,7 @@ typedef unsigned long size_t; int stdout_fileno = STDOUT_FILENO; size_t strlen(const char *s); - +int snprintf(char *s, size_t n, const char *format, ...); void send(int fd, const void *buf, size_t bufLen, int d); void recv(int fd, void *buf, size_t bufLen, int d); void read(int fd, void *buf, size_t bufLen); @@ -474,3 +474,88 @@ void test_tty() recv(f, password, 256, val()); // GOOD: from terminal or stdin } } + +// --- + +struct person_info +{ + // sensitive + char *social_security_number; + char *socialSecurityNo; + char *homePostCode; + char *my_zip_code; + char *telephone; + char *mobile_phone_number; + char *email; + char *my_credit_card_number; + char *my_bank_account_no; + char *employerName; + char medical_info[8 * 1024]; + char *license_key; + double my_latitude; + double home_longitude; + int newSalary; + char *salaryString; + // not sensitive + char *license_key_hash; + char *my_zip_file; +}; + +void tests2(person_info *pi) +{ + // direct cases + send(val(), pi->social_security_number, strlen(pi->social_security_number), val()); // BAD + send(val(), pi->socialSecurityNo, strlen(pi->socialSecurityNo), val()); // BAD + send(val(), pi->homePostCode, strlen(pi->homePostCode), val()); // BAD + send(val(), pi->my_zip_code, strlen(pi->my_zip_code), val()); // BAD + send(val(), pi->telephone, strlen(pi->telephone), val()); // BAD + send(val(), pi->mobile_phone_number, strlen(pi->mobile_phone_number), val()); // BAD + send(val(), pi->email, strlen(pi->email), val()); // BAD + send(val(), pi->my_credit_card_number, strlen(pi->my_credit_card_number), val()); // BAD + send(val(), pi->my_bank_account_no, strlen(pi->my_bank_account_no), val()); // BAD + send(val(), pi->employerName, strlen(pi->employerName), val()); // BAD + send(val(), pi->medical_info, strlen(pi->medical_info), val()); // BAD + send(val(), pi->license_key, strlen(pi->license_key), val()); // BAD [NOT DETECTED] + send(val(), pi->license_key_hash, strlen(pi->license_key_hash), val()); // GOOD + send(val(), pi->my_zip_file, strlen(pi->my_zip_file), val()); // GOOD + + // indirect cases + { + char buffer[1024]; + + snprintf(buffer, 1024, "lat = %f\n", pi->my_latitude); + send(val(), buffer, strlen(buffer), val()); // BAD + } + { + char buffer[1024]; + + snprintf(buffer, 1024, "long = %f\n", pi->home_longitude); + send(val(), buffer, strlen(buffer), val()); // BAD + } + { + char buffer[1024]; + + snprintf(buffer, 1024, "salary = %i\n", pi->newSalary); + send(val(), buffer, strlen(buffer), val()); // BAD [NOT DETECTED] + } + { + char buffer[1024]; + int sal = pi->newSalary; + + snprintf(buffer, 1024, "salary = %i\n", sal); + send(val(), buffer, strlen(buffer), val()); // BAD [NOT DETECTED] + } + { + char buffer[1024]; + + snprintf(buffer, 1024, "salary = %s\n", pi->salaryString); + send(val(), buffer, strlen(buffer), val()); // BAD + } + { + char buffer[1024]; + char *sal = pi->salaryString; + + snprintf(buffer, 1024, "salary = %s\n", sal); + send(val(), buffer, strlen(buffer), val()); // BAD + } +}