Python: Rewrite attribute lookup helpers for better performance

Not that they actually had a huge problem right now, just that using the old pattern HAS lead to bad performance in the past. See https://github.com/github/codeql/pull/4361
2026-05-02 20:25:13 +02:00 · 2020-09-30 17:24:50 +02:00
parent 4adc26eb62
commit c4a2e1d6d1
1 changed files with 38 additions and 10 deletions
--- a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll
@@ -29,7 +29,7 @@ private module Stdlib {
   *
   * For example, using `attr_name = "system"` will get all uses of `os.system`.
   */
-  private DataFlow::Node os_attr(string attr_name, DataFlow::TypeTracker t) {
+  private DataFlow::Node os_attr(DataFlow::TypeTracker t, string attr_name) {
    attr_name in ["system", "popen",
          // exec
          "execl", "execle", "execlp", "execlpe", "execv", "execve", "execvp", "execvpe",
@@ -41,10 +41,24 @@ private module Stdlib {
      result = DataFlow::importMember("os", attr_name)
      or
      t.startInAttr(attr_name) and
-      result = os()
-      or
-      exists(DataFlow::TypeTracker t2 | result = os_attr(attr_name, t2).track(t2, t))
+      result = DataFlow::importModule("os")
    )
+    or
+    // Due to bad performance when using normal setup with `os_attr(t2, attr_name).track(t2, t)`
+    // we have inlined that code and forced a join
+    exists(DataFlow::TypeTracker t2 |
+      exists(DataFlow::StepSummary summary |
+        os_attr_first_join(t2, attr_name, result, summary) and
+        t = t2.append(summary)
+      )
+    )
+  }
+
+  pragma[nomagic]
+  private predicate os_attr_first_join(
+    DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
+  ) {
+    DataFlow::StepSummary::step(os_attr(t2, attr_name), res, summary)
  }

  /**
@@ -54,7 +68,7 @@ private module Stdlib {
   * For example, using `"system"` will get all uses of `os.system`.
   */
  private DataFlow::Node os_attr(string attr_name) {
-    result = os_attr(attr_name, DataFlow::TypeTracker::end())
+    result = os_attr(DataFlow::TypeTracker::end(), attr_name)
  }

  /**
@@ -148,17 +162,31 @@ private module Stdlib {
   *
   * For example, using `attr_name = "Popen"` will get all uses of `subprocess.Popen`.
   */
-  private DataFlow::Node subprocess_attr(string attr_name, DataFlow::TypeTracker t) {
+  private DataFlow::Node subprocess_attr(DataFlow::TypeTracker t, string attr_name) {
    attr_name in ["Popen", "call", "check_call", "check_output", "run"] and
    (
      t.start() and
      result = DataFlow::importMember("subprocess", attr_name)
      or
      t.startInAttr(attr_name) and
-      result = subprocess()
-      or
-      exists(DataFlow::TypeTracker t2 | result = subprocess_attr(attr_name, t2).track(t2, t))
+      result = DataFlow::importModule("subprocess")
    )
+    or
+    // Due to bad performance when using normal setup with `subprocess_attr(t2, attr_name).track(t2, t)`
+    // we have inlined that code and forced a join
+    exists(DataFlow::TypeTracker t2 |
+      exists(DataFlow::StepSummary summary |
+        subprocess_attr_first_join(t2, attr_name, result, summary) and
+        t = t2.append(summary)
+      )
+    )
+  }
+
+  pragma[nomagic]
+  private predicate subprocess_attr_first_join(
+    DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
+  ) {
+    DataFlow::StepSummary::step(subprocess_attr(t2, attr_name), res, summary)
  }

  /**
@@ -168,7 +196,7 @@ private module Stdlib {
   * For example, using `attr_name = "Popen"` will get all uses of `subprocess.Popen`.
   */
  private DataFlow::Node subprocess_attr(string attr_name) {
-    result = subprocess_attr(attr_name, DataFlow::TypeTracker::end())
+    result = subprocess_attr(DataFlow::TypeTracker::end(), attr_name)
  }

  /**