Merge branch 'main' into amammad-js-CodeInjection_execa

This commit is contained in:
erik-krogh
2024-05-23 08:02:22 +02:00
271 changed files with 6686 additions and 5907 deletions

View File

@@ -735,6 +735,7 @@ public class AutoBuild {
.collect(Collectors.toList());
filesToExtract = filesToExtract.stream()
.filter(p -> !isFileTooLarge(p))
.sorted(PATH_ORDERING)
.collect(Collectors.toCollection(() -> new LinkedHashSet<>()));
@@ -1010,6 +1011,15 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
return config;
}
private boolean isFileTooLarge(Path f) {
long fileSize = f.toFile().length();
if (fileSize > 1_000_000L * this.maximumFileSizeInMegabytes) {
warn("Skipping " + f + " because it is too large (" + StringUtil.printFloat(fileSize / 1_000_000.0) + " MB). The limit is " + this.maximumFileSizeInMegabytes + " MB.");
return true;
}
return false;
}
private Set<Path> extractTypeScript(
Set<Path> files,
Set<Path> extractedFiles,
@@ -1051,9 +1061,10 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
// compiler can parse them for us.
continue;
}
if (!extractedFiles.contains(sourcePath)) {
typeScriptFiles.add(sourcePath);
if (extractedFiles.contains(sourcePath)) {
continue;
}
typeScriptFiles.add(sourcePath);
}
typeScriptFiles.sort(PATH_ORDERING);
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractors);
@@ -1236,11 +1247,6 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
warn("Skipping " + file + ", which does not exist.");
return;
}
long fileSize = f.length();
if (fileSize > 1_000_000L * this.maximumFileSizeInMegabytes) {
warn("Skipping " + file + " because it is too large (" + StringUtil.printFloat(fileSize / 1_000_000.0) + " MB). The limit is " + this.maximumFileSizeInMegabytes + " MB.");
return;
}
try {
long start = logBeginProcess("Extracting " + file);

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Additional heuristics for a new sensitive data classification for private information (e.g. credit card numbers) have been added to the shared `SensitiveDataHeuristics.qll` library. This may result in additional results for queries that use sensitive data such as `js/clear-text-storage-sensitive-data` and `js/clear-text-logging`.

View File

@@ -0,0 +1,4 @@
---
category: fix
---
* Fixed a bug where very large TypeScript files would cause database creation to crash. Large files over 10MB were already excluded from analysis, but the file size check was not applied to TypeScript files.

View File

@@ -14,13 +14,14 @@
* - id: a user name or other account information;
* - password: a password or authorization key;
* - certificate: a certificate.
* - private: private data such as credit card numbers
*
* While classifications are represented as strings, this should not be relied upon.
* Instead, use the predicates in `SensitiveDataClassification::` to work with
* classifications.
*/
class SensitiveDataClassification extends string {
SensitiveDataClassification() { this in ["secret", "id", "password", "certificate"] }
SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] }
}
/**
@@ -38,6 +39,9 @@ module SensitiveDataClassification {
/** Gets the classification for certificates. */
SensitiveDataClassification certificate() { result = "certificate" }
/** Gets the classification for private data. */
SensitiveDataClassification private() { result = "private" }
}
/**
@@ -77,6 +81,40 @@ module HeuristicNames {
*/
string maybeCertificate() { result = "(?is).*(cert)(?!.*(format|name|ification)).*" }
/**
* Gets a regular expression that identifies strings that may indicate the presence of
* private data.
*/
string maybePrivate() {
result =
"(?is).*(" +
// Inspired by the list on https://cwe.mitre.org/data/definitions/359.html
// Government identifiers, such as Social Security Numbers
"social.?security|employer.?identification|national.?insurance|resident.?id|" +
"passport.?(num|no)|([_-]|\\b)ssn([_-]|\\b)|" +
// Contact information, such as home addresses
"post.?code|zip.?code|home.?addr|" +
// and telephone numbers
"(mob(ile)?|home).?(num|no|tel|phone)|(tel|fax|phone).?(num|no)|telephone|" +
"emergency.?contact|" +
// Geographic location - where the user is (or was)
"latitude|longitude|nationality|" +
// Financial data - such as credit card numbers, salary, bank accounts, and debts
"(credit|debit|bank|visa).?(card|num|no|acc(ou)?nt)|acc(ou)?nt.?(no|num|credit)|" +
"salary|billing|credit.?(rating|score)|([_-]|\\b)ccn([_-]|\\b)|" +
// Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc.
// "e(mail|_mail)|" + // this seems too noisy
// Health - medical conditions, insurance status, prescription records
"birth.?da(te|y)|da(te|y).?(of.?)?birth|" +
"medical|(health|care).?plan|healthkit|appointment|prescription|" +
"blood.?(type|alcohol|glucose|pressure)|heart.?(rate|rhythm)|body.?(mass|fat)|" +
"menstrua|pregnan|insulin|inhaler|" +
// Relationships - work and family
"employ(er|ee)|spouse|maiden.?name" +
// ---
").*"
}
/**
* Gets a regular expression that identifies strings that may indicate the presence
* of sensitive data, with `classification` describing the kind of sensitive data involved.
@@ -90,6 +128,9 @@ module HeuristicNames {
or
result = maybeCertificate() and
classification = SensitiveDataClassification::certificate()
or
result = maybePrivate() and
classification = SensitiveDataClassification::private()
}
/**