Python: Use type-tracking for integer literal tracking

Like we've done for pretty much everything else. An experiment to see what this means for query performance.
2026-04-30 19:26:02 +02:00 · 2021-02-23 17:16:49 +01:00
parent 27987717dc
commit c195c64982
4 changed files with 28 additions and 4212 deletions
--- a/python/ql/src/semmle/python/Concepts.qll
+++ b/python/ql/src/semmle/python/Concepts.qll
@@ -6,7 +6,6 @@

 import python
 private import semmle.python.dataflow.new.DataFlow
-private import semmle.python.dataflow.new.DataFlowOnlyInternalUse
 private import semmle.python.dataflow.new.RemoteFlowSources
 private import semmle.python.dataflow.new.TaintTracking
 private import semmle.python.Frameworks
@@ -563,19 +562,34 @@ module Cryptography {

    /** Provides classes for modeling new key-pair generation APIs. */
    module KeyGeneration {
-      /**
-       * A data-flow configuration for tracking integer literals.
-       */
-      private class IntegerLiteralTrackerConfiguration extends DataFlowOnlyInternalUse::Configuration {
-        IntegerLiteralTrackerConfiguration() { this = "IntegerLiteralTrackerConfiguration" }
+      /** Gets a reference to an integer literal, as well as the origin of the integer literal. */
+      private DataFlow::Node keysizeTracker(
+        DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
+      ) {
+        t.start() and
+        result.asExpr().(IntegerLiteral).getValue() = keySize and
+        origin = result
+        or
+        // Due to bad performance when using normal setup with we have inlined that code and forced a join
+        exists(DataFlow::TypeTracker t2 |
+          exists(DataFlow::StepSummary summary |
+            keysizeTracker_first_join(t2, keySize, origin, result, summary) and
+            t = t2.append(summary)
+          )
+        )
+      }

-        override predicate isSource(DataFlow::Node source) {
-          source = DataFlow::exprNode(any(IntegerLiteral size))
-        }
+      pragma[nomagic]
+      private predicate keysizeTracker_first_join(
+        DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res,
+        DataFlow::StepSummary summary
+      ) {
+        DataFlow::StepSummary::step(keysizeTracker(t2, keySize, origin), res, summary)
+      }

-        override predicate isSink(DataFlow::Node sink) {
-          sink = any(KeyGeneration::Range kg).getKeySizeArg()
-        }
+      /** Gets a reference to an integer literal, as well as the origin of the integer literal. */
+      private DataFlow::Node keysizeTracker(int keySize, DataFlow::Node origin) {
+        result = keysizeTracker(DataFlow::TypeTracker::end(), keySize, origin)
      }

      /**
@@ -596,11 +610,7 @@ module Cryptography {
         * explains how we obtained this specific key size.
         */
        int getKeySizeWithOrigin(DataFlow::Node origin) {
-          exists(IntegerLiteral size, IntegerLiteralTrackerConfiguration config |
-            origin.asExpr() = size and
-            config.hasFlow(origin, this.getKeySizeArg()) and
-            result = size.getValue()
-          )
+          this.getKeySizeArg() = keysizeTracker(result, origin)
        }

        /** Gets the minimum key size (in bits) for this algorithm to be considered secure. */
--- a/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll
+++ b/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll
@@ -1,40 +0,0 @@
-/**
- * INTERNAL: Do not use.
- *
- * This copy exists to allow internal non-query usage of global data-flow analyses. If
- * we used the same copy as was used in multiple queries (A, B, C), then all internal
- * non-query configurations would have to be re-evaluated for _each_ query, which is
- * expensive. By having a separate copy, we avoid this re-evaluation.
- *
- * Provides a library for local (intra-procedural) and global (inter-procedural)
- * data flow analysis: deciding whether data can flow from a _source_ to a
- * _sink_.
- *
- * Unless configured otherwise, _flow_ means that the exact value of
- * the source may reach the sink. We do not track flow across pointer
- * dereferences or array indexing. To track these types of flow, where the
- * exact value may not be preserved, import
- * `semmle.python.dataflow.new.TaintTracking`.
- *
- * To use global (interprocedural) data flow, extend the class
- * `DataFlow::Configuration` as documented on that class. To use local
- * (intraprocedural) data flow, call `DataFlow::localFlow` or
- * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
- */
-
-private import python
-
-/**
- * INTERNAL: Do not use.
- *
- * This copy exists to allow internal non-query usage of global data-flow analyses. If
- * we used the same copy as was used in multiple queries (A, B, C), then all internal
- * non-query configurations would have to be re-evaluated for _each_ query, which is
- * expensive. By having a separate copy, we avoid this re-evaluation.
- *
- * Provides classes for performing local (intra-procedural) and
- * global (inter-procedural) data flow analyses.
- */
-module DataFlowOnlyInternalUse {
-  import semmle.python.dataflow.new.internal.DataFlowImplOnlyInternalUse
-}
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll