Skip to content
This repository has been archived by the owner on May 22, 2024. It is now read-only.

tQ, tW, tN - txt2md converter (fixer) #207

Open
wants to merge 16 commits into
base: develop
Choose a base branch
from
65 changes: 65 additions & 0 deletions libraries/client/converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import json
import re
from libraries.app.app import App


def txt2md(rootdir="."):
"""
Converts txt files to markdown
"""
proccessed = False
for dir, subdir, files in os.walk(rootdir):
for fname in files:
filepath = os.path.join(dir, fname)

if os.path.splitext(fname)[1] == ".txt":
with open(filepath, "r") as data_file:
# if content of the file starts from the valid json character
# then it's a json file
content = data_file.read().decode('utf-8')

if re.match(r"^\[|^\{", content):
try:
data = json.loads(content)
md = ""
for elm in data:
if "title" in elm and "body" in elm:
md += "# " + elm["title"] + "\n\n"
md += elm["body"] + "\n\n"

md_filepath = re.sub(r"\.txt$", ".md", filepath)
with open(md_filepath, "w") as md_file:
md_file.write(md)

proccessed = True
except BaseException as e:
App.logger.debug('Error: {0}'.format(e.message))

if os.path.isfile(filepath):
os.remove(filepath)

return proccessed


def txt2usfm(rootdir="."):
"""
Converts txt files to usfm
"""
proccessed = False
for dir, subdir, files in os.walk(rootdir):
for fname in files:
filepath = os.path.join(dir, fname)

if os.path.splitext(fname)[1] == ".txt":
with open(filepath, "r") as data_file:
# if content of the file starts from the valid usfm chapter or verse tag
# then it's a usfm file
if re.match(r"^[\s]*\\c|^[\s]*\\v", data_file.read()):
proccessed = True

if proccessed and os.path.isfile(filepath):
usfm_filepath = re.sub(r"\.txt$", ".usfm", filepath)
os.rename(filepath, usfm_filepath)

return proccessed
22 changes: 22 additions & 0 deletions libraries/client/preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from libraries.door43_tools.bible_books import BOOK_NUMBERS, BOOK_NAMES, BOOK_CHAPTER_VERSES
from libraries.general_tools.file_utils import write_file, read_file
from libraries.resource_container.ResourceContainer import RC
from converters import txt2md


def do_preprocess(rc, repo_dir, output_dir):
Expand Down Expand Up @@ -465,6 +466,13 @@ def run(self):
index_json['chapters'][html_file].append(link)
markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(link, name, chapter.lstrip('0'))
chunk_files = sorted(glob(os.path.join(chapter_dir, '*.md')))

chunk_files_txt = sorted(glob(os.path.join(chapter_dir, '*.txt')))
# If there are txt files in chapter folders, convert them to md format
if len(chunk_files_txt):
if txt2md(chapter_dir):
return self.run()

for chunk_idx, chunk_file in enumerate(chunk_files):
start_verse = os.path.splitext(os.path.basename(chunk_file))[0].lstrip('0')
if chunk_idx < len(chunk_files)-1:
Expand Down Expand Up @@ -515,6 +523,13 @@ def run(self):
index_json['chapters'][key] = {}
index_json['book_codes'][key] = section
term_files = sorted(glob(os.path.join(section_dir, '*.md')))

term_files_txt = sorted(glob(os.path.join(section_dir, '*.txt')))
# If there are txt files in section folders, convert them to md format
if len(term_files_txt):
if txt2md(section_dir):
return self.run()

for term_file in term_files:
term = os.path.splitext(os.path.basename(term_file))[0]
text = read_file(term_file)
Expand Down Expand Up @@ -620,6 +635,13 @@ def run(self):
index_json['chapters'][html_file].append(link)
markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(link, name, chapter.lstrip('0'))
chunk_files = sorted(glob(os.path.join(chapter_dir, '*.md')))

chunk_files_txt = sorted(glob(os.path.join(chapter_dir, '*.txt')))
# If there are txt files in chapter folders, convert them to md format
if len(chunk_files_txt):
if txt2md(chapter_dir):
return self.run()

for move_str in ['front', 'intro']:
self.move_to_front(chunk_files, move_str)
for chunk_idx, chunk_file in enumerate(chunk_files):
Expand Down