From 67b45164eb86fa7ca9895506f86ca8814c7e0a71 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 22 Jul 2020 15:19:57 +0200 Subject: [PATCH] Python: CG trace: Partial matching of BytecodeExpr and AST not safe --- .../ql/RecordedCalls.qll | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/python/tools/recorded-call-graph-metrics/ql/RecordedCalls.qll b/python/tools/recorded-call-graph-metrics/ql/RecordedCalls.qll index 5117d13f831..6c4d28b301d 100644 --- a/python/tools/recorded-call-graph-metrics/ql/RecordedCalls.qll +++ b/python/tools/recorded-call-graph-metrics/ql/RecordedCalls.qll @@ -77,14 +77,17 @@ class XMLCall extends XMLElement { bytecode.(XMLBytecodeAttribute).get_object_data()) or matchBytecodeExpr(expr.(Call).getFunc(), bytecode.(XMLBytecodeCall).get_function_data()) - // I experimented with allowing partial matches with - // ``` - // or - // bytecode instanceof XMLBytecodeUnknown - // ``` - // but that only gave 1% improvement for Identified calls with approx 4200 calls - // in total (only supporting BytecodeVariableName/BytecodeAttribute/BytecodeCall). - // Since it's a potential performance problem, I did not enable. + // I considered allowing a partial match as well. That is, if the bytecode + // expression information only tells us `.foo()`, and we find an AST + // expression that matches on `.foo()`, that is good enough. + // + // However, we cannot assume that all calls are recorded (such as `range(10)`), + // and we cannot assume that for all recorded calls there exists a corresponding + // AST call (such as for list-comprehensions). + // + // So allowing partial matches is not safe, since we might end up matching a + // recorded call not in the AST together with an unrecorded call visible in the + // AST. ) } }