Merge branch 'main' into orm

2026-03-06 15:49:08 +01:00 · 2022-03-01 12:01:54 +01:00
parent 98c60a706e 94cb5c2be4
commit cd58c12bbe
108 changed files with 4343 additions and 1908 deletions
--- a/python/ql/lib/CHANGELOG.md
+++ b/python/ql/lib/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 0.0.10
+
+### Deprecated APIs
+
+* The old points-to based modeling has been deprecated. Use the new type-tracking/API-graphs based modeling instead.
+
 ## 0.0.9

 ## 0.0.8
--- a/python/ql/lib/change-notes/2022-02-25-regex-group-characters.md
+++ b/python/ql/lib/change-notes/2022-02-25-regex-group-characters.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.
--- a/python/ql/lib/change-notes/2022-01-19-deprecate-old-library-modeling.md
+++ b/python/ql/lib/change-notes/2022-01-19-deprecate-old-library-modeling.md
@@ -1,4 +1,5 @@
---
-category: deprecated
---
+## 0.0.10
+
+### Deprecated APIs
+
 * The old points-to based modeling has been deprecated. Use the new type-tracking/API-graphs based modeling instead.
--- a/python/ql/lib/codeql-pack.release.yml
+++ b/python/ql/lib/codeql-pack.release.yml
@@ -1,2 +1,2 @@
 ---
-lastReleaseVersion: 0.0.9
+lastReleaseVersion: 0.0.10
--- a/python/ql/lib/qlpack.yml
+++ b/python/ql/lib/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/python-all
-version: 0.0.10-dev
+version: 0.0.11-dev
 groups: python
 dbscheme: semmlecode.python.dbscheme
 extractor: python
--- a/python/ql/lib/semmle/python/RegexTreeView.qll
+++ b/python/ql/lib/semmle/python/RegexTreeView.qll
@@ -39,7 +39,12 @@ newtype TRegExpParent =
  /** A special character */
  TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
  /** A normal character */
-  TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
+  TRegExpNormalChar(Regex re, int start, int end) {
+    re.normalCharacterSequence(start, end)
+    or
+    re.escapedCharacter(start, end) and
+    not re.specialCharacter(start, end, _)
+  } or
  /** A back reference */
  TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }

--- a/python/ql/lib/semmle/python/regex.qll
+++ b/python/ql/lib/semmle/python/regex.qll
@@ -427,6 +427,7 @@ abstract class RegexString extends Expr {
  }

  predicate normalCharacter(int start, int end) {
+    end = start + 1 and
    this.character(start, end) and
    not this.specialCharacter(start, end, _)
  }
@@ -446,6 +447,49 @@ abstract class RegexString extends Expr {
    )
  }

+  /**
+   * Holds if the range [start:end) consists of only 'normal' characters.
+   */
+  predicate normalCharacterSequence(int start, int end) {
+    // a normal character inside a character set is interpreted on its own
+    this.normalCharacter(start, end) and
+    this.inCharSet(start)
+    or
+    // a maximal run of normal characters is considered as one constant
+    exists(int s, int e |
+      e = max(int i | this.normalCharacterRun(s, i)) and
+      not this.inCharSet(s)
+    |
+      // 'abc' can be considered one constant, but
+      // 'abc+' has to be broken up into 'ab' and 'c+',
+      // as the qualifier only applies to 'c'.
+      if this.qualifier(e, _, _, _)
+      then
+        end = e and start = e - 1
+        or
+        end = e - 1 and start = s and start < end
+      else (
+        end = e and
+        start = s
+      )
+    )
+  }
+
+  private predicate normalCharacterRun(int start, int end) {
+    (
+      this.normalCharacterRun(start, end - 1)
+      or
+      start = end - 1 and not this.normalCharacter(start - 1, start)
+    ) and
+    this.normalCharacter(end - 1, end)
+  }
+
+  private predicate characterItem(int start, int end) {
+    this.normalCharacterSequence(start, end) or
+    this.escapedCharacter(start, end) or
+    this.specialCharacter(start, end, _)
+  }
+
  /** Whether the text in the range start,end is a group */
  predicate group(int start, int end) {
    this.groupContents(start, end, _, _)
@@ -717,7 +761,7 @@ abstract class RegexString extends Expr {
  string getBackrefName(int start, int end) { this.named_backreference(start, end, result) }

  private predicate baseItem(int start, int end) {
-    this.character(start, end) and
+    this.characterItem(start, end) and
    not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
    or
    this.group(start, end)
@@ -837,14 +881,14 @@ abstract class RegexString extends Expr {
  }

  private predicate item_start(int start) {
-    this.character(start, _) or
+    this.characterItem(start, _) or
    this.isGroupStart(start) or
    this.charSet(start, _) or
    this.backreference(start, _)
  }

  private predicate item_end(int end) {
-    this.character(_, end)
+    this.characterItem(_, end)
    or
    exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
    or
@@ -953,7 +997,7 @@ abstract class RegexString extends Expr {
   */
  predicate firstItem(int start, int end) {
    (
-      this.character(start, end)
+      this.characterItem(start, end)
      or
      this.qualifiedItem(start, end, _, _)
      or
@@ -968,7 +1012,7 @@ abstract class RegexString extends Expr {
   */
  predicate lastItem(int start, int end) {
    (
-      this.character(start, end)
+      this.characterItem(start, end)
      or
      this.qualifiedItem(start, end, _, _)
      or
--- a/python/ql/src/CHANGELOG.md
+++ b/python/ql/src/CHANGELOG.md
@@ -1,3 +1,10 @@
+## 0.0.10
+
+### New Queries
+
+* The query "LDAP query built from user-controlled sources" (`py/ldap-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5443).
+* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).
+
 ## 0.0.9

 ### Bug Fixes
--- a/python/ql/src/change-notes/2022-02-25-promote-log-injection.md
+++ b/python/ql/src/change-notes/2022-02-25-promote-log-injection.md
@@ -1,4 +0,0 @@
---
-category: newQuery
---
-* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).
--- a/python/ql/src/change-notes/2022-02-28-promote-ldap-injection.md
+++ b/python/ql/src/change-notes/2022-02-28-promote-ldap-injection.md
@@ -1,4 +0,0 @@
---
-category: newQuery
---
-* The query "LDAP query built from user-controlled sources" (`py/ldap-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5443).
--- a/python/ql/src/change-notes/released/0.0.10.md
+++ b/python/ql/src/change-notes/released/0.0.10.md
@@ -0,0 +1,6 @@
+## 0.0.10
+
+### New Queries
+
+* The query "LDAP query built from user-controlled sources" (`py/ldap-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5443).
+* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).
--- a/python/ql/src/codeql-pack.release.yml
+++ b/python/ql/src/codeql-pack.release.yml
@@ -1,2 +1,2 @@
 ---
-lastReleaseVersion: 0.0.9
+lastReleaseVersion: 0.0.10
--- a/python/ql/src/qlpack.yml
+++ b/python/ql/src/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/python-queries
-version: 0.0.10-dev
+version: 0.0.11-dev
 groups: 
  - python
  - queries
--- a/python/ql/test/library-tests/regex/FirstLast.expected
+++ b/python/ql/test/library-tests/regex/FirstLast.expected
@@ -1,6 +1,6 @@
-| 012345678 | first | 0 | 1 |
-| 012345678 | last | 8 | 9 |
-| (?!not-this)^[A-Z_]+$ | first | 3 | 4 |
+| 012345678 | first | 0 | 9 |
+| 012345678 | last | 0 | 9 |
+| (?!not-this)^[A-Z_]+$ | first | 3 | 11 |
 | (?!not-this)^[A-Z_]+$ | first | 12 | 13 |
 | (?!not-this)^[A-Z_]+$ | first | 13 | 19 |
 | (?!not-this)^[A-Z_]+$ | first | 13 | 20 |
@@ -27,9 +27,9 @@
 | (?m)^(?!$) | last | 4 | 5 |
 | (?m)^(?!$) | last | 8 | 9 |
 | (\\033\|~{) | first | 1 | 5 |
-| (\\033\|~{) | first | 6 | 7 |
+| (\\033\|~{) | first | 6 | 8 |
 | (\\033\|~{) | last | 1 | 5 |
-| (\\033\|~{) | last | 7 | 8 |
+| (\\033\|~{) | last | 6 | 8 |
 | [\ufffd-\ufffd] | first | 0 | 5 |
 | [\ufffd-\ufffd] | last | 0 | 5 |
 | [\ufffd-\ufffd][\ufffd-\ufffd] | first | 0 | 5 |
@@ -52,8 +52,8 @@
 | \\A[+-]?\\d+ | last | 7 | 9 |
 | \\A[+-]?\\d+ | last | 7 | 10 |
 | \\Afoo\\Z | first | 0 | 2 |
-| \\Afoo\\Z | first | 2 | 3 |
-| \\Afoo\\Z | last | 4 | 5 |
+| \\Afoo\\Z | first | 2 | 5 |
+| \\Afoo\\Z | last | 2 | 5 |
 | \\Afoo\\Z | last | 5 | 7 |
 | \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | first | 0 | 2 |
 | \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 32 |
@@ -86,24 +86,24 @@
 | ^[A-Z_]+$(?<!not-this) | last | 1 | 7 |
 | ^[A-Z_]+$(?<!not-this) | last | 1 | 8 |
 | ^[A-Z_]+$(?<!not-this) | last | 8 | 9 |
-| ^[A-Z_]+$(?<!not-this) | last | 20 | 21 |
+| ^[A-Z_]+$(?<!not-this) | last | 13 | 21 |
 | ax{01,3} | first | 0 | 1 |
 | ax{01,3} | last | 1 | 2 |
 | ax{01,3} | last | 1 | 8 |
-| ax{01,3} | last | 7 | 8 |
+| ax{01,3} | last | 3 | 8 |
 | ax{3,} | first | 0 | 1 |
 | ax{3,} | last | 1 | 2 |
 | ax{3,} | last | 1 | 6 |
-| ax{3,} | last | 5 | 6 |
+| ax{3,} | last | 3 | 6 |
 | ax{3} | first | 0 | 1 |
 | ax{3} | last | 1 | 2 |
 | ax{3} | last | 1 | 5 |
-| ax{3} | last | 4 | 5 |
+| ax{3} | last | 3 | 5 |
 | ax{,3} | first | 0 | 1 |
 | ax{,3} | last | 0 | 1 |
 | ax{,3} | last | 1 | 2 |
 | ax{,3} | last | 1 | 6 |
-| ax{,3} | last | 5 | 6 |
+| ax{,3} | last | 3 | 6 |
 | x\| | first | 0 | 1 |
 | x\| | last | 0 | 1 |
 | x\|(?<!\\w)l | first | 0 | 1 |
@@ -111,5 +111,5 @@
 | x\|(?<!\\w)l | first | 9 | 10 |
 | x\|(?<!\\w)l | last | 0 | 1 |
 | x\|(?<!\\w)l | last | 9 | 10 |
-| x{Not qual} | first | 0 | 1 |
-| x{Not qual} | last | 10 | 11 |
+| x{Not qual} | first | 0 | 11 |
+| x{Not qual} | last | 0 | 11 |
--- a/python/ql/test/library-tests/regex/Regex.ql
+++ b/python/ql/test/library-tests/regex/Regex.ql
@@ -6,6 +6,10 @@ predicate part(Regex r, int start, int end, string kind) {
  or
  r.normalCharacter(start, end) and kind = "char"
  or
+  r.escapedCharacter(start, end) and
+  kind = "char" and
+  not r.specialCharacter(start, end, _)
+  or
  r.specialCharacter(start, end, kind)
  or
  r.sequence(start, end) and kind = "sequence"
--- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected
+++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected
@@ -59,7 +59,7 @@
 | redos.py:220:25:220:29 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. |
 | redos.py:223:30:223:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
 | redos.py:229:30:229:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
-| redos.py:241:27:241:27 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ba'. |
+| redos.py:241:26:241:27 | ab | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ab'. |
 | redos.py:247:25:247:31 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
 | redos.py:256:25:256:27 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
 | redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |