From dba469e9515f9dbbe4d7dcad1574098e0ede8b02 Mon Sep 17 00:00:00 2001 From: Ad Timmering Date: Mon, 6 May 2024 20:10:01 +0900 Subject: [PATCH] Fixes #99: Add option to set file encoding to CSV reader. --- beancount_reds_importers/libreader/csvreader.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/beancount_reds_importers/libreader/csvreader.py b/beancount_reds_importers/libreader/csvreader.py index 92f8611..d57c35f 100644 --- a/beancount_reds_importers/libreader/csvreader.py +++ b/beancount_reds_importers/libreader/csvreader.py @@ -67,6 +67,7 @@ def initialize_reader(self, file): if getattr(self, "file", None) != file: self.file = file self.reader_ready = self.deep_identify(file) + self.file_encoding = getattr(self, "file_encoding", None) if self.reader_ready: self.file_read_done = False # else: @@ -74,7 +75,7 @@ def initialize_reader(self, file): # print(self.header_identifier, file.head()) def deep_identify(self, file): - return re.match(self.header_identifier, file.head()) + return re.match(self.header_identifier, file.head(encoding=self.file_encoding)) def file_date(self, file): "Get the maximum date from the file." @@ -135,7 +136,7 @@ def convert_date(d): return rdr def read_raw(self, file): - return etl.fromcsv(file.name) + return etl.fromcsv(file.name, encoding=self.file_encoding) def skip_until_main_table(self, rdr, col_labels=None): """Skip csv lines until the header line is found.""" @@ -186,7 +187,9 @@ def read_file(self, file): rdr = self.prepare_raw_file(rdr) # extract main table - rdr = rdr.skip(getattr(self, "skip_head_rows", 0)) # chop unwanted header rows + rdr = rdr.skip( + getattr(self, "skip_head_rows", 0) + ) # chop unwanted header rows rdr = rdr.head( len(rdr) - getattr(self, "skip_tail_rows", 0) - 1 ) # chop unwanted footer rows