From f0aa815a9aab51a38fa08c0dd5f0143bd91d1d2f Mon Sep 17 00:00:00 2001
From: Michael Hohn <hohn@github.com>
Date: Sun, 21 Nov 2021 16:42:11 -0800
Subject: [PATCH] Fix encoding read error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using
: with open(fname, 'r') as file:
hits the accented letter á in Vrána in the file
: data/wxWidgets-small/src/stc/scintilla/lexers/LexCSS.cxx
it results in a
: UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe1 in position 119: invalid continuation byte

We are reading source code, so we likely don't care about dropping non-ascii; using
: with codecs.open(fname, 'r', encoding="latin-1") as file:
ignores this problem.
---
 sarif_cli/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sarif_cli/__init__.py b/sarif_cli/__init__.py
index 4ad726d..90927f9 100644
--- a/sarif_cli/__init__.py
+++ b/sarif_cli/__init__.py
@@ -1,6 +1,7 @@
 import sys
 import os
 import re
+import codecs
 
 MIN_PYTHON = (3, 7)
 if sys.version_info < MIN_PYTHON:
@@ -97,7 +98,7 @@ def load_lines(root, path, line_from, line_to):
     if not os.path.exists(fname):
         dbg("Missing file: %s" % fname)
         return []
-    with open(fname, 'r') as file:
+    with codecs.open(fname, 'r', encoding="latin-1") as file:
         lines = file.readlines()
         return [line.rstrip("\n\r").replace("\t", " ")
                 for line in lines[line_from-1 : line_to-1+1]]