Skip to content

Commit

Permalink
Fixes #99: Add option to set file encoding to CSV reader.
Browse files Browse the repository at this point in the history
  • Loading branch information
awtimmering committed May 6, 2024
1 parent 246427f commit dba469e
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions beancount_reds_importers/libreader/csvreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,15 @@ def initialize_reader(self, file):
if getattr(self, "file", None) != file:
self.file = file
self.reader_ready = self.deep_identify(file)
self.file_encoding = getattr(self, "file_encoding", None)
if self.reader_ready:
self.file_read_done = False
# else:
# print("header_identifier failed---------------:")
# print(self.header_identifier, file.head())

def deep_identify(self, file):
return re.match(self.header_identifier, file.head())
return re.match(self.header_identifier, file.head(encoding=self.file_encoding))

def file_date(self, file):
"Get the maximum date from the file."
Expand Down Expand Up @@ -135,7 +136,7 @@ def convert_date(d):
return rdr

def read_raw(self, file):
return etl.fromcsv(file.name)
return etl.fromcsv(file.name, encoding=self.file_encoding)

def skip_until_main_table(self, rdr, col_labels=None):
"""Skip csv lines until the header line is found."""
Expand Down Expand Up @@ -186,7 +187,9 @@ def read_file(self, file):
rdr = self.prepare_raw_file(rdr)

# extract main table
rdr = rdr.skip(getattr(self, "skip_head_rows", 0)) # chop unwanted header rows
rdr = rdr.skip(
getattr(self, "skip_head_rows", 0)
) # chop unwanted header rows
rdr = rdr.head(
len(rdr) - getattr(self, "skip_tail_rows", 0) - 1
) # chop unwanted footer rows
Expand Down

0 comments on commit dba469e

Please sign in to comment.