Python: Use type-tracking for integer literal tracking

Like we've done for pretty much everything else. An experiment to see what this
means for query performance.
This commit is contained in:
Rasmus Wriedt Larsen
2021-02-23 17:16:49 +01:00
parent 27987717dc
commit c195c64982
4 changed files with 28 additions and 4212 deletions

View File

@@ -6,7 +6,6 @@
import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.DataFlowOnlyInternalUse
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Frameworks
@@ -563,19 +562,34 @@ module Cryptography {
/** Provides classes for modeling new key-pair generation APIs. */
module KeyGeneration {
/**
* A data-flow configuration for tracking integer literals.
*/
private class IntegerLiteralTrackerConfiguration extends DataFlowOnlyInternalUse::Configuration {
IntegerLiteralTrackerConfiguration() { this = "IntegerLiteralTrackerConfiguration" }
/** Gets a reference to an integer literal, as well as the origin of the integer literal. */
private DataFlow::Node keysizeTracker(
DataFlow::TypeTracker t, int keySize, DataFlow::Node origin
) {
t.start() and
result.asExpr().(IntegerLiteral).getValue() = keySize and
origin = result
or
// Due to bad performance when using normal setup with we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
keysizeTracker_first_join(t2, keySize, origin, result, summary) and
t = t2.append(summary)
)
)
}
override predicate isSource(DataFlow::Node source) {
source = DataFlow::exprNode(any(IntegerLiteral size))
}
pragma[nomagic]
private predicate keysizeTracker_first_join(
DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(keysizeTracker(t2, keySize, origin), res, summary)
}
override predicate isSink(DataFlow::Node sink) {
sink = any(KeyGeneration::Range kg).getKeySizeArg()
}
/** Gets a reference to an integer literal, as well as the origin of the integer literal. */
private DataFlow::Node keysizeTracker(int keySize, DataFlow::Node origin) {
result = keysizeTracker(DataFlow::TypeTracker::end(), keySize, origin)
}
/**
@@ -596,11 +610,7 @@ module Cryptography {
* explains how we obtained this specific key size.
*/
int getKeySizeWithOrigin(DataFlow::Node origin) {
exists(IntegerLiteral size, IntegerLiteralTrackerConfiguration config |
origin.asExpr() = size and
config.hasFlow(origin, this.getKeySizeArg()) and
result = size.getValue()
)
this.getKeySizeArg() = keysizeTracker(result, origin)
}
/** Gets the minimum key size (in bits) for this algorithm to be considered secure. */

View File

@@ -1,40 +0,0 @@
/**
* INTERNAL: Do not use.
*
* This copy exists to allow internal non-query usage of global data-flow analyses. If
* we used the same copy as was used in multiple queries (A, B, C), then all internal
* non-query configurations would have to be re-evaluated for _each_ query, which is
* expensive. By having a separate copy, we avoid this re-evaluation.
*
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis: deciding whether data can flow from a _source_ to a
* _sink_.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `semmle.python.dataflow.new.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
* (intraprocedural) data flow, call `DataFlow::localFlow` or
* `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
*/
private import python
/**
* INTERNAL: Do not use.
*
* This copy exists to allow internal non-query usage of global data-flow analyses. If
* we used the same copy as was used in multiple queries (A, B, C), then all internal
* non-query configurations would have to be re-evaluated for _each_ query, which is
* expensive. By having a separate copy, we avoid this re-evaluation.
*
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlowOnlyInternalUse {
import semmle.python.dataflow.new.internal.DataFlowImplOnlyInternalUse
}