From a8b976b38963fa6c7dfe1f968f7112a2bfbec0cb Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 9 Jul 2024 13:46:37 +0200 Subject: [PATCH 1/6] Python: Always log errors before writing diagnostics So we have the info in the logs if the diagnostics processing fails --- python/extractor/semmle/worker.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/extractor/semmle/worker.py b/python/extractor/semmle/worker.py index 1207caf6727..ebc35ae97d6 100644 --- a/python/extractor/semmle/worker.py +++ b/python/extractor/semmle/worker.py @@ -274,16 +274,16 @@ def _extract_loop(proc_id, queue, trap_dir, archive, options, reply_queue, logge # Syntax errors have already been handled in extractor.py reply_queue.put(("FAILURE", unit, None)) except RecursionError as ex: + logger.error("Failed to extract %s: %s", unit, ex) + logger.traceback(WARN) error = recursion_error_message(ex, unit) diagnostics_writer.write(error) - logger.error("Failed to extract %s: %s", unit, ex) - logger.traceback(WARN) reply_queue.put(("FAILURE", unit, None)) except Exception as ex: - error = internal_error_message(ex, unit) - diagnostics_writer.write(error) logger.error("Failed to extract %s: %s", unit, ex) logger.traceback(WARN) + error = internal_error_message(ex, unit) + diagnostics_writer.write(error) reply_queue.put(("FAILURE", unit, None)) else: reply_queue.put(("SUCCESS", unit, None)) From c1da2c1d2fc2b7766a75860266cb66993e329500 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 9 Jul 2024 13:55:02 +0200 Subject: [PATCH 2/6] Python: Gracefully handle exceptions in diagnostics writing --- python/extractor/semmle/worker.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/extractor/semmle/worker.py b/python/extractor/semmle/worker.py index ebc35ae97d6..726c2c90c3b 100644 --- a/python/extractor/semmle/worker.py +++ b/python/extractor/semmle/worker.py @@ -276,14 +276,22 @@ def _extract_loop(proc_id, queue, trap_dir, archive, options, reply_queue, logge except RecursionError as ex: logger.error("Failed to extract %s: %s", unit, ex) logger.traceback(WARN) - error = recursion_error_message(ex, unit) - diagnostics_writer.write(error) + try: + error = recursion_error_message(ex, unit) + diagnostics_writer.write(error) + except Exception as ex: + logger.warning("Failed to write diagnostics: %s", ex) + logger.traceback(WARN) reply_queue.put(("FAILURE", unit, None)) except Exception as ex: logger.error("Failed to extract %s: %s", unit, ex) logger.traceback(WARN) - error = internal_error_message(ex, unit) - diagnostics_writer.write(error) + try: + error = internal_error_message(ex, unit) + diagnostics_writer.write(error) + except Exception as ex: + logger.warning("Failed to write diagnostics: %s", ex) + logger.traceback(WARN) reply_queue.put(("FAILURE", unit, None)) else: reply_queue.put(("SUCCESS", unit, None)) From 6b3625e24e4a1d8b50d64122ad6f4143ea8cd05b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 9 Jul 2024 13:55:28 +0200 Subject: [PATCH 3/6] Python: Handle diagnostics writing for BuiltinModuleExtractable --- python/extractor/semmle/logging.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/extractor/semmle/logging.py b/python/extractor/semmle/logging.py index fd2dc6a1916..cd83f72864f 100644 --- a/python/extractor/semmle/logging.py +++ b/python/extractor/semmle/logging.py @@ -373,7 +373,8 @@ def syntax_error_message(exception, unit): return error def recursion_error_message(exception, unit): - l = Location(file=unit.path) + # if unit is a BuiltinModuleExtractable, there will be no path attribute + l = Location(file=getattr(unit, "path", str(unit))) return (DiagnosticMessage(Source("py/diagnostics/recursion-error", "Recursion error in Python extractor"), Severity.ERROR) .with_location(l) .text(exception.args[0]) @@ -383,7 +384,8 @@ def recursion_error_message(exception, unit): ) def internal_error_message(exception, unit): - l = Location(file=unit.path) + # if unit is a BuiltinModuleExtractable, there will be no path attribute + l = Location(file=getattr(unit, "path", str(unit))) return (DiagnosticMessage(Source("py/diagnostics/internal-error", "Internal error in Python extractor"), Severity.ERROR) .with_location(l) .text("Internal error") From 60d1dc8af85af2f7eae3f4a516e6c2a2da4f2423 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 9 Jul 2024 13:55:51 +0200 Subject: [PATCH 4/6] Python: Bump extractor version --- python/extractor/semmle/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/extractor/semmle/util.py b/python/extractor/semmle/util.py index 85868a79351..8e1a371fc71 100644 --- a/python/extractor/semmle/util.py +++ b/python/extractor/semmle/util.py @@ -10,7 +10,7 @@ from io import BytesIO #Semantic version of extractor. #Update this if any changes are made -VERSION = "6.1.1" +VERSION = "6.1.2" PY_EXTENSIONS = ".py", ".pyw" From 04f173d14e0aa36b56dac0e99307bcb09440ad03 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Thu, 11 Jul 2024 21:35:48 +0200 Subject: [PATCH 5/6] SSA: Reduce caching in data-flow integration layer --- shared/ssa/codeql/ssa/Ssa.qll | 50 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/shared/ssa/codeql/ssa/Ssa.qll b/shared/ssa/codeql/ssa/Ssa.qll index 87811b005be..fd785540a37 100644 --- a/shared/ssa/codeql/ssa/Ssa.qll +++ b/shared/ssa/codeql/ssa/Ssa.qll @@ -1322,31 +1322,27 @@ module Make Input> { } } - private module Cached { - cached - newtype TNode = - TParamNode(DfInput::Parameter p) { DfInput::ssaDefInitializesParam(_, p) } or - TExprNode(DfInput::Expr e, Boolean isPost) { - e = DfInput::getARead(_) - or - DfInput::ssaDefAssigns(_, e) and - isPost = false - } or - TSsaDefinitionNode(DefinitionExt def) or - TSsaInputNode(SsaInputDefinitionExt def, BasicBlock input) { - def.hasInputFromBlock(_, _, _, _, input) - } - - cached - Definition getAPhiInputDef(SsaInputNode n) { - exists(SsaInputDefinitionExt phi, BasicBlock bb | - phi.hasInputFromBlock(result, _, _, _, bb) and - n.isInputInto(phi, bb) - ) - } + cached + private DefinitionExt getAPhiInputDef(SsaInputDefinitionExt phi, BasicBlock bb) { + phi.hasInputFromBlock(result, _, _, _, bb) } - private import Cached + private newtype TNode = + TParamNode(DfInput::Parameter p) { + exists(WriteDefinition def | DfInput::ssaDefInitializesParam(def, p)) + } or + TExprNode(DfInput::Expr e, Boolean isPost) { + e = DfInput::getARead(_) + or + exists(DefinitionExt def | + DfInput::ssaDefAssigns(def, e) and + isPost = false + ) + } or + TSsaDefinitionNode(DefinitionExt def) or + TSsaInputNode(SsaInputDefinitionExt phi, BasicBlock input) { + exists(getAPhiInputDef(phi, input)) + } /** * A data flow node that we need to reference in the value step relation. @@ -1627,6 +1623,14 @@ module Make Input> { */ signature predicate guardChecksSig(DfInput::Guard g, DfInput::Expr e, boolean branch); + pragma[nomagic] + private Definition getAPhiInputDef(SsaInputNode n) { + exists(SsaInputDefinitionExt phi, BasicBlock bb | + result = getAPhiInputDef(phi, bb) and + n.isInputInto(phi, bb) + ) + } + /** * Provides a set of barrier nodes for a guard that validates an expression. * From 354394d4c2958144296652c86b4b53d07429c897 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 12 Jul 2024 13:36:41 +0200 Subject: [PATCH 6/6] Python: Don't use fake locations in diagnostics Some of the internal tooling would not be too happy about this :D --- python/extractor/semmle/logging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/extractor/semmle/logging.py b/python/extractor/semmle/logging.py index cd83f72864f..0e0b173a4d7 100644 --- a/python/extractor/semmle/logging.py +++ b/python/extractor/semmle/logging.py @@ -374,7 +374,7 @@ def syntax_error_message(exception, unit): def recursion_error_message(exception, unit): # if unit is a BuiltinModuleExtractable, there will be no path attribute - l = Location(file=getattr(unit, "path", str(unit))) + l = Location(file=unit.path) if hasattr(unit, "path") else None return (DiagnosticMessage(Source("py/diagnostics/recursion-error", "Recursion error in Python extractor"), Severity.ERROR) .with_location(l) .text(exception.args[0]) @@ -385,7 +385,7 @@ def recursion_error_message(exception, unit): def internal_error_message(exception, unit): # if unit is a BuiltinModuleExtractable, there will be no path attribute - l = Location(file=getattr(unit, "path", str(unit))) + l = Location(file=unit.path) if hasattr(unit, "path") else None return (DiagnosticMessage(Source("py/diagnostics/internal-error", "Internal error in Python extractor"), Severity.ERROR) .with_location(l) .text("Internal error")