Merge branch 'main' into post-release-prep/codeql-cli-2.10.0

2025-12-21 11:16:30 +01:00 · 2022-06-27 20:37:25 +02:00
parent d506f448ef dfe276aa18
commit cc57cb8af5
331 changed files with 13701 additions and 4340 deletions
--- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java
@@ -217,9 +217,6 @@ public class FileExtractor {
      }

      private boolean hasBadFileHeader(File f, String lcExt, ExtractorConfig config) {
-        if (!".ts".equals(lcExt)) {
-          return false;
-        }
        try (FileInputStream fis = new FileInputStream(f)) {
          byte[] bytes = new byte[fileHeaderSize];
          int length = fis.read(bytes);
--- a/javascript/extractor/src/com/semmle/js/extractor/Main.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java
@@ -43,7 +43,7 @@ public class Main {
   * A version identifier that should be updated every time the extractor changes in such a way that
   * it may produce different tuples for the same file under the same {@link ExtractorConfig}.
   */
-  public static final String EXTRACTOR_VERSION = "2022-06-08";
+  public static final String EXTRACTOR_VERSION = "2022-06-27";

  public static final Pattern NEWLINE = Pattern.compile("\n");

--- a/javascript/ql/lib/change-notes/2022-06-22-sensitive-common-words.md
+++ b/javascript/ql/lib/change-notes/2022-06-22-sensitive-common-words.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Improved modeling of sensitive data sources, so common words like `certain` and `secretary` are no longer considered a certificate and a secret (respectively).
--- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll
+++ b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll
@@ -50,7 +50,7 @@ module HeuristicNames {
   * Gets a regular expression that identifies strings that may indicate the presence of secret
   * or trusted data.
   */
-  string maybeSecret() { result = "(?is).*((?<!is)secret|(?<!un|is)trusted).*" }
+  string maybeSecret() { result = "(?is).*((?<!is|is_)secret|(?<!un|un_|is|is_)trusted).*" }

  /**
   * Gets a regular expression that identifies strings that may indicate the presence of
@@ -96,10 +96,14 @@ module HeuristicNames {
   * Gets a regular expression that identifies strings that may indicate the presence of data
   * that is hashed or encrypted, and hence rendered non-sensitive, or contains special characters
   * suggesting nouns within the string do not represent the meaning of the whole string (e.g. a URL or a SQL query).
+   *
+   * We also filter out common words like `certain` and `concert`, since otherwise these could
+   * be matched by the certificate regular expressions. Same for `accountable` (account), or
+   * `secretarial` (secret).
   */
  string notSensitiveRegexp() {
    result =
-      "(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)).*"
+      "(?is).*([^\\w$.-]|redact|censor|obfuscate|hash|md5|sha|random|((?<!un)(en))?(crypt|code)|certain|concert|secretar|accountant|accountab).*"
  }

  /**
--- a/javascript/ql/src/Declarations/UnusedVariable.ql
+++ b/javascript/ql/src/Declarations/UnusedVariable.ql
@@ -165,6 +165,9 @@ predicate whitelisted(UnusedLocal v) {
    or
    // ignore ambient declarations - too noisy
    vd.isAmbient()
+    or
+    // ignore variables in template placeholders, as each placeholder sees a different copy of the variable
+    vd.getTopLevel() instanceof Templating::TemplateTopLevel
  )
  or
  exists(Expr eval | eval instanceof DirectEval or eval instanceof GeneratedCodeExpr |