unfoldingWord-dev · mXaln · Mar 1, 2018 · Mar 1, 2018 · Mar 2, 2018 · Mar 2, 2018
diff --git a/libraries/client/converters.py b/libraries/client/converters.py
@@ -0,0 +1,65 @@
+import os
+import json
+import re
+from libraries.app.app import App
+
+
+def txt2md(rootdir="."):
+    """
+    Converts txt files to markdown
+    """
+    proccessed = False
+    for dir, subdir, files in os.walk(rootdir):
+        for fname in files:
+            filepath = os.path.join(dir, fname)
+
+            if os.path.splitext(fname)[1] == ".txt":
+                with open(filepath, "r") as data_file:
+                    # if content of the file starts from the valid json character
+                    # then it's a json file
+                    content = data_file.read().decode('utf-8')
+
+                    if re.match(r"^\[|^\{", content):
+                        try:
+                            data = json.loads(content)
+                            md = ""
+                            for elm in data:
+                                if "title" in elm and "body" in elm:
+                                    md += "# " + elm["title"] + "\n\n"
+                                    md += elm["body"] + "\n\n"
+
+                            md_filepath = re.sub(r"\.txt$", ".md", filepath)
+                            with open(md_filepath, "w") as md_file:
+                                md_file.write(md)
+
+                            proccessed = True
+                        except BaseException as e:
+                            App.logger.debug('Error: {0}'.format(e.message))
+
+                if os.path.isfile(filepath):
+                    os.remove(filepath)
+
+    return proccessed
+
+
+def txt2usfm(rootdir="."):
+    """
+    Converts txt files to usfm
+    """
+    proccessed = False
+    for dir, subdir, files in os.walk(rootdir):
+        for fname in files:
+            filepath = os.path.join(dir, fname)
+
+            if os.path.splitext(fname)[1] == ".txt":
+                with open(filepath, "r") as data_file:
+                    # if content of the file starts from the valid usfm chapter or verse tag
+                    # then it's a usfm file
+                    if re.match(r"^[\s]*\\c|^[\s]*\\v", data_file.read()):
+                        proccessed = True
+
+                if proccessed and os.path.isfile(filepath):
+                    usfm_filepath = re.sub(r"\.txt$", ".usfm", filepath)
+                    os.rename(filepath, usfm_filepath)
+
+    return proccessed
diff --git a/libraries/client/preprocessors.py b/libraries/client/preprocessors.py
@@ -7,6 +7,7 @@
 from libraries.door43_tools.bible_books import BOOK_NUMBERS, BOOK_NAMES, BOOK_CHAPTER_VERSES
 from libraries.general_tools.file_utils import write_file, read_file
 from libraries.resource_container.ResourceContainer import RC
+from converters import txt2md
 
 
 def do_preprocess(rc, repo_dir, output_dir):
@@ -465,6 +466,13 @@ def run(self):
                     index_json['chapters'][html_file].append(link)
                     markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(link, name, chapter.lstrip('0'))
                     chunk_files = sorted(glob(os.path.join(chapter_dir, '*.md')))
+
+                    chunk_files_txt = sorted(glob(os.path.join(chapter_dir, '*.txt')))
+                    # If there are txt files in chapter folders, convert them to md format
+                    if len(chunk_files_txt):
+                        if txt2md(chapter_dir):
+                            return self.run()
+
                     for chunk_idx, chunk_file in enumerate(chunk_files):
                         start_verse = os.path.splitext(os.path.basename(chunk_file))[0].lstrip('0')
                         if chunk_idx < len(chunk_files)-1:
@@ -515,6 +523,13 @@ def run(self):
                 index_json['chapters'][key] = {}
                 index_json['book_codes'][key] = section
                 term_files = sorted(glob(os.path.join(section_dir, '*.md')))
+
+                term_files_txt = sorted(glob(os.path.join(section_dir, '*.txt')))
+                # If there are txt files in section folders, convert them to md format
+                if len(term_files_txt):
+                    if txt2md(section_dir):
+                        return self.run()
+
                 for term_file in term_files:
                     term = os.path.splitext(os.path.basename(term_file))[0]
                     text = read_file(term_file)
@@ -620,6 +635,13 @@ def run(self):
                     index_json['chapters'][html_file].append(link)
                     markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(link, name, chapter.lstrip('0'))
                     chunk_files = sorted(glob(os.path.join(chapter_dir, '*.md')))
+
+                    chunk_files_txt = sorted(glob(os.path.join(chapter_dir, '*.txt')))
+                    # If there are txt files in chapter folders, convert them to md format
+                    if len(chunk_files_txt):
+                        if txt2md(chapter_dir):
+                            return self.run()
+
                     for move_str in ['front', 'intro']:
                         self.move_to_front(chunk_files, move_str)
                     for chunk_idx, chunk_file in enumerate(chunk_files):