diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 62165446..1fccc7dd 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -121,5 +121,5 @@ jobs:
         # This test will fail if the output encoding is cp1252
         # Warning: the diff line below is PowerShell syntax, not bash!
         run: |
-          echo ćś | readalongs make-xml -l fra - - > cs.readalong
+          echo ćś | readalongs make-xml -l fra - - | findstr /v meta > cs.readalong
           if (diff (cat cs.readalong) (cat test/data/cs-ref.readalong)) { throw "Output did not match reference" }
diff --git a/.gitignore b/.gitignore
index 60c0f607..cb3111ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -208,3 +208,7 @@ $RECYCLE.BIN/
 *.lnk
 
 # End of https://www.gitignore.io/api/linux,macos,python,windows,visualstudiocode
+
+#temporary file
+.tmp
+.conda
diff --git a/docs/cli-guide.md b/docs/cli-guide.md
index 5c0f1583..b683431d 100644
--- a/docs/cli-guide.md
+++ b/docs/cli-guide.md
@@ -67,7 +67,7 @@ The format of the generated XML is based on [TEI
 Lite](https://tei-c.org/guidelines/customization/lite/) but is
 considerably simplified.  The DTD (document type definition) can be
 found in the ReadAlong Studio source code under
-`readalongs/static/read-along-1.0.dtd`.
+`readalongs/static/read-along-1.1.dtd`.
 
 (dna)=
 
diff --git a/readalongs/_version.py b/readalongs/_version.py
index 2a3eb2f3..a70b1685 100644
--- a/readalongs/_version.py
+++ b/readalongs/_version.py
@@ -1 +1,3 @@
 VERSION = "1.1.0"
+
+READALONG_FILE_FORMAT_VERSION = "1.1"
diff --git a/readalongs/align.py b/readalongs/align.py
index 5e2259aa..60237483 100644
--- a/readalongs/align.py
+++ b/readalongs/align.py
@@ -18,6 +18,7 @@
 from pympi.Praat import TextGrid
 from webvtt import Caption, WebVTT
 
+from readalongs._version import READALONG_FILE_FORMAT_VERSION, VERSION
 from readalongs.audio_utils import (
     extract_section,
     mute_section,
@@ -175,7 +176,7 @@ def parse_and_make_xml(
     """Parse XML input and run tokenization and G2P.
 
     Args:
-        xml_path (str): Path to input in ReadAlong XML format (see static/read-along-1.0.dtd)
+        xml_path (str): Path to input in ReadAlong XML format (see static/read-along-1.1.dtd)
         config (dict): Optional; ReadAlong-Studio configuration to use
         save_temps (str): Optional; Save temporary files, by default None
         verbose_g2p_warnings (boolean): Optional; display all g2p errors and warnings
@@ -568,7 +569,7 @@ def align_audio(
     """Align an XML input file to an audio file.
 
     Args:
-        xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-1.0.dtd)
+        xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-1.1.dtd)
         audio_path (str): Path to audio input. Must be in a format supported by ffmpeg
         unit (str): Optional; Element to create alignments for, by default 'w'
         bare (boolean): Optional;
@@ -1156,7 +1157,8 @@ def convert_to_xhtml(tokenized_xml, title="Book"):
 
 # TODO: add this <!-- DO NOT USE THIS DATA WITHOUT EXPLICIT PERMISSION --> to template
 RAS_TEMPLATE = """<?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}" />
     <text xml:lang="{{main_lang}}" fallback-langs="{{fallback_langs}}">
         <body>
         {{#pages}}
@@ -1177,7 +1179,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"):
 
 
 def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> str:
-    """Create input xml in ReadAlong XML format (see static/read-along-1.0.dtd)
+    """Create input xml in ReadAlong XML format (see static/read-along-1.1.dtd)
         Uses the line sequence to infer paragraph and sentence structure from plain text:
         Assumes a double blank line marks a page break, and a single blank line
         marks a paragraph break.
@@ -1194,6 +1196,8 @@ def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) ->
     kwargs = {
         "main_lang": text_languages[0],
         "fallback_langs": ",".join(text_languages[1:]),
+        "studio_version": VERSION,
+        "format_version": READALONG_FILE_FORMAT_VERSION,
     }
     pages: List[dict] = []
     paragraphs: List[dict] = []
@@ -1223,7 +1227,7 @@ def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) ->
 
 
 def create_input_ras(**kwargs):
-    """Create input xml in ReadAlong XML format (see static/read-along-1.0.dtd)
+    """Create input xml in ReadAlong XML format (see static/read-along-1.1.dtd)
         Uses readlines to infer paragraph and sentence structure from plain text.
         Assumes a double blank line marks a page break, and a single blank line
         marks a paragraph break.
diff --git a/readalongs/static/read-along-1.1.dtd b/readalongs/static/read-along-1.1.dtd
new file mode 100644
index 00000000..8202e169
--- /dev/null
+++ b/readalongs/static/read-along-1.1.dtd
@@ -0,0 +1,103 @@
+<!-- VERSION: 1.1 -->
+<!ELEMENT read-along (meta|text|body|div|span|anchor|silence|graphic|p|s|w)*>
+<!ATTLIST read-along
+ use-assets-folder CDATA #IMPLIED
+ href CDATA #IMPLIED
+ audio CDATA #IMPLIED
+ xml:lang CDATA #IMPLIED
+ language CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ version CDATA #IMPLIED>
+
+<!ELEMENT text (body|div|span|anchor|silence|graphic|p|s|w)*>
+<!ATTLIST text
+ xml:lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ fallback-langs CDATA #IMPLIED
+ id CDATA #IMPLIED>
+
+<!ELEMENT body (div|span|anchor|silence|graphic|p|s|w)*>
+<!ATTLIST body
+ xml:lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ id CDATA #IMPLIED>
+
+<!ELEMENT anchor EMPTY>
+<!ATTLIST anchor time CDATA #REQUIRED>
+
+<!ELEMENT silence EMPTY>
+<!ATTLIST silence dur CDATA #REQUIRED>
+
+<!ELEMENT graphic EMPTY>
+<!ATTLIST graphic
+ url CDATA #REQUIRED
+ id CDATA #IMPLIED>
+
+<!ELEMENT div (#PCDATA|div|span|anchor|silence|graphic|p|s|w)*>
+<!ATTLIST div
+ xml:lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ id CDATA #IMPLIED
+ class CDATA #IMPLIED
+ type CDATA #IMPLIED
+ do-not-align CDATA #IMPLIED
+ time CDATA #IMPLIED
+ dur CDATA #IMPLIED>
+
+<!ELEMENT span (#PCDATA|div|span|anchor|silence|graphic|p|s|w)*>
+<!ATTLIST span
+ xml:lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ id CDATA #IMPLIED
+ class CDATA #IMPLIED
+ type CDATA #IMPLIED
+ do-not-align CDATA #IMPLIED
+ time CDATA #IMPLIED
+ dur CDATA #IMPLIED>
+
+<!ELEMENT p (#PCDATA|span|anchor|silence|s|w)*>
+<!ATTLIST p
+ xml:lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ id CDATA #IMPLIED
+ class CDATA #IMPLIED
+ do-not-align CDATA #IMPLIED
+ time CDATA #IMPLIED
+ dur CDATA #IMPLIED>
+
+<!ELEMENT s (#PCDATA|span|anchor|silence|w)*>
+<!ATTLIST s
+ xml:lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ id CDATA #IMPLIED
+ class CDATA #IMPLIED
+ do-not-align CDATA #IMPLIED
+ time CDATA #IMPLIED
+ dur CDATA #IMPLIED>
+
+<!ELEMENT w (#PCDATA|span|syl)*>
+<!ATTLIST w
+ xml:lang CDATA #IMPLIED
+ effective-g2p-lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ id CDATA #IMPLIED
+ class CDATA #IMPLIED
+ do-not-align CDATA #IMPLIED
+ ARPABET CDATA #IMPLIED
+ time CDATA #IMPLIED
+ dur CDATA #IMPLIED>
+
+<!ELEMENT syl (#PCDATA|span)*>
+<!ATTLIST syl
+ xml:lang CDATA #IMPLIED
+ lang CDATA #IMPLIED
+ id CDATA #IMPLIED
+ class CDATA #IMPLIED
+ do-not-align CDATA #IMPLIED
+ ARPABET CDATA #IMPLIED
+ time CDATA #IMPLIED
+ dur CDATA #IMPLIED>
+
+ <!ELEMENT meta EMPTY>
+ <!ATTLIST meta name CDATA #REQUIRED
+   content CDATA #REQUIRED>
diff --git a/readalongs/text/make_package.py b/readalongs/text/make_package.py
index 7482ad80..312f8ba4 100644
--- a/readalongs/text/make_package.py
+++ b/readalongs/text/make_package.py
@@ -37,7 +37,7 @@
   <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=5.0">
   <meta name="application-name" content="read along">
   <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=5.0">
-  <meta name="generator" content="@readalongs/studio-cli {studio_version}">
+  <meta name="generator" content="@readalongs/studio (cli) {studio_version}">
   <title>{title}</title>
   <script>{js}</script>
   <style attribution="See https://fonts.google.com/attribution for copyrights and font attribution">{fonts}</style>
diff --git a/readalongs/text/util.py b/readalongs/text/util.py
index f283693a..6243f4f6 100644
--- a/readalongs/text/util.py
+++ b/readalongs/text/util.py
@@ -230,7 +230,7 @@ def copy_file_to_zip(zip_path, origin_path, destination_path):
         <meta charset="UTF-8">
         <meta name="application-name" content="read along">
         <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=5.0">
-        <meta name="generator" content="@readalongs/studio-cli {studio_version}">
+        <meta name="generator" content="@readalongs/studio (cli) {studio_version}">
         <title>{title}</title>
         <!-- Import fonts. Material Icons are needed by the web component -->
         <link href="https://fonts.googleapis.com/css?family=Lato%7CMaterial+Icons%7CMaterial+Icons+Outlined" rel="stylesheet">
diff --git a/readalongs/web_api.py b/readalongs/web_api.py
index 9e30bc63..29b2e5e8 100644
--- a/readalongs/web_api.py
+++ b/readalongs/web_api.py
@@ -41,6 +41,7 @@
 from pydantic import BaseModel, Field
 from starlette.background import BackgroundTask
 
+from readalongs._version import READALONG_FILE_FORMAT_VERSION, VERSION
 from readalongs.align import create_ras_from_text, save_label_files, save_subtitles
 from readalongs.log import LOGGER, capture_logs
 from readalongs.text.add_ids_to_xml import add_ids
@@ -77,7 +78,7 @@
 # Call get_langs() when the server loads to load the languages into memory
 LANGS = get_langs()
 # Get the DTD
-DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-1.0.dtd")
+DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-1.1.dtd")
 with open(DTDPATH) as dtdfh:
     DTD = etree.DTD(dtdfh)
 
@@ -323,7 +324,8 @@ class ConvertRequest(BaseModel):
             dedent(
                 """\
                 <?xml version='1.0' encoding='utf-8'?>
-                <read-along version="1.0">
+                <read-along version="%s">
+    <meta name="generator" content="@readalongs/studio (cli) %s"/>
                     <text xml:lang="dan" fallback-langs="und" id="t0">
                         <body id="t0b0">
                             <div type="page" id="t0b0d0">
@@ -337,6 +339,7 @@ class ConvertRequest(BaseModel):
                         </body>
                     </text>
                 </read-along>"""
+                % (READALONG_FILE_FORMAT_VERSION, VERSION)
             )
         ],
     )
diff --git a/test/data/cs-ref.readalong b/test/data/cs-ref.readalong
index 71f3d659..b5d09e82 100644
--- a/test/data/cs-ref.readalong
+++ b/test/data/cs-ref.readalong
@@ -1,5 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="1.1">
     <text xml:lang="fra" fallback-langs="und">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-anchors.readalong b/test/data/ej-fra-anchors.readalong
index bef3896c..8f593076 100644
--- a/test/data/ej-fra-anchors.readalong
+++ b/test/data/ej-fra-anchors.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-anchors2.readalong b/test/data/ej-fra-anchors2.readalong
index b929b7e9..f2595346 100644
--- a/test/data/ej-fra-anchors2.readalong
+++ b/test/data/ej-fra-anchors2.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <anchor time=".5s"/>
         <body>
diff --git a/test/data/ej-fra-dna.readalong b/test/data/ej-fra-dna.readalong
index bf885889..73bdbb93 100644
--- a/test/data/ej-fra-dna.readalong
+++ b/test/data/ej-fra-dna.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-package.readalong b/test/data/ej-fra-package.readalong
index ae1f9f70..1ecda7e9 100644
--- a/test/data/ej-fra-package.readalong
+++ b/test/data/ej-fra-package.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-silence-bad.readalong b/test/data/ej-fra-silence-bad.readalong
index f40b4a80..d8bbdd5c 100644
--- a/test/data/ej-fra-silence-bad.readalong
+++ b/test/data/ej-fra-silence-bad.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-silence.readalong b/test/data/ej-fra-silence.readalong
index a24dd15b..d9de6e72 100644
--- a/test/data/ej-fra-silence.readalong
+++ b/test/data/ej-fra-silence.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-subword.readalong b/test/data/ej-fra-subword.readalong
index 173811aa..070ceb13 100644
--- a/test/data/ej-fra-subword.readalong
+++ b/test/data/ej-fra-subword.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
          it, e.g., <p do-not-align="true">...</p>, or
          <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
diff --git a/test/data/ej-fra-translated.readalong b/test/data/ej-fra-translated.readalong
index 63ffb0ab..7f8bca50 100644
--- a/test/data/ej-fra-translated.readalong
+++ b/test/data/ej-fra-translated.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
          it, e.g., <p do-not-align="true">...</p>, or
          <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
diff --git a/test/data/ej-fra.readalong b/test/data/ej-fra.readalong
index 5c78c992..7531c352 100644
--- a/test/data/ej-fra.readalong
+++ b/test/data/ej-fra.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/fra-prepared.readalong b/test/data/fra-prepared.readalong
index a7b8bc6f..898f01a4 100644
--- a/test/data/fra-prepared.readalong
+++ b/test/data/fra-prepared.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}" />
     <text xml:lang="fra" fallback-langs="und">
         <body>
             <div type="page">
diff --git a/test/data/fra-tokenized.readalong b/test/data/fra-tokenized.readalong
index 2c5d597a..058b883e 100644
--- a/test/data/fra-tokenized.readalong
+++ b/test/data/fra-tokenized.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/mixed-langs.readalong b/test/data/mixed-langs.readalong
index 17c6554e..af345dea 100644
--- a/test/data/mixed-langs.readalong
+++ b/test/data/mixed-langs.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
     <text>
         <body>
             <div type="page">
diff --git a/test/data/patrickxtlan.readalong b/test/data/patrickxtlan.readalong
index 75ebf312..4b8a8560 100644
--- a/test/data/patrickxtlan.readalong
+++ b/test/data/patrickxtlan.readalong
@@ -1,5 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
-<read-along version="1.0">
+<read-along version="{{format_version}}">
+    <meta name="generator" content="@readalongs/studio (cli) {{studio_version}}"/>
   <text>
     <body>
     <p>
diff --git a/test/test_align_cli.py b/test/test_align_cli.py
index c10cb640..868d7c85 100755
--- a/test/test_align_cli.py
+++ b/test/test_align_cli.py
@@ -1,629 +1,633 @@
-#!/usr/bin/env python
-
-"""
-Unit test suite for the readalongs align CLI command
-"""
-
-import os
-import pathlib
-import tempfile
-from os.path import exists, join
-from unittest import main
-
-from basic_test_case import BasicTestCase
-from lxml.html import fromstring
-from sound_swallower_stub import SoundSwallowerStub
-
-from readalongs.cli import align, langs
-
-
-def write_file(filename: str, file_contents: str) -> str:
-    """Write file_contents to file filename, and return its name (filename)"""
-    with open(filename, mode="w", encoding="utf8") as f:
-        f.write(file_contents)
-    return filename
-
-
-class TestAlignCli(BasicTestCase):
-    """Unit test suite for the readalongs align CLI command"""
-
-    def test_invoke_align(self):
-        """Basic readalongs align invocation and some variants"""
-        output = join(self.tempdir, "output")
-        with open("image-for-page1.jpg", "wb"):
-            pass
-        # Run align from plain text
-        results = self.runner.invoke(
-            align,
-            [
-                "-s",
-                "-o",
-                "vtt",
-                "-o",  # tests that we can use -o more than once
-                "srt:TextGrid,eaf",  # tests that we can give -o multiple values, separated by : or ,
-                "-l",
-                "fra",
-                "--align-mode",
-                "auto",
-                "--config",
-                join(self.data_dir, "sample-config.json"),
-                self.add_bom(join(self.data_dir, "ej-fra.txt")),
-                join(self.data_dir, "ej-fra.m4a"),
-                output,
-            ],
-        )
-        # print(results.output)
-        self.assertEqual(results.exit_code, 0)
-        expected_output_files = [
-            "output.readalong",
-            "output.m4a",
-            "index.html",
-            "output.TextGrid",
-            "output.eaf",
-            "output_sentences.srt",
-            "output_sentences.vtt",
-            "output_words.srt",
-            "output_words.vtt",
-            "readme.txt",
-        ]
-        for f in expected_output_files:
-            self.assertTrue(
-                exists(join(output, f)), f"successful alignment should have created {f}"
-            )
-        with open(join(output, "index.html"), encoding="utf8") as f:
-            self.assertIn(
-                '<read-along href="output.readalong" audio="output.m4a"',
-                f.read(),
-            )
-        self.assertTrue(
-            exists(join(output, "tempfiles", "output.tokenized.readalong")),
-            "alignment with -s should have created tempfiles/output.tokenized.readalong",
-        )
-        with open(
-            join(output, "tempfiles", "output.tokenized.readalong"),
-            "r",
-            encoding="utf-8",
-        ) as f:
-            self.assertNotIn("\ufeff", f.read())
-        self.assertTrue(
-            exists(join(output, "assets", "image-for-page1.jpg")),
-            "alignment with image files should have copied image-for-page1.jpg to assets",
-        )
-        self.assertIn("image-for-page2.jpg is accessible ", results.stdout)
-        os.unlink("image-for-page1.jpg")
-        self.assertFalse(exists("image-for-page1.jpg"))
-        self.assertIn("Align mode strict succeeded for sequence 0.", results.stdout)
-        # print(results.stdout)
-
-        # Move the alignment output to compare with further down
-        # We cannot just output to a different name because changing the output file name
-        # changes the contents of the output.
-        output1 = output + "1"
-        os.rename(output, output1)
-        self.assertFalse(exists(output), "os.rename() should have moved dir")
-
-        # Run align again, but on an XML input file with various added DNA text
-        results_dna = self.runner.invoke(
-            align,
-            [
-                "-o",
-                "xhtml",
-                "--align-mode",
-                "moderate",
-                "-s",
-                "--config",
-                join(self.data_dir, "sample-config.json"),
-                self.add_bom(join(self.data_dir, "ej-fra-dna.readalong")),
-                join(self.data_dir, "ej-fra.m4a"),
-                output,
-            ],
-        )
-        self.assertEqual(results_dna.exit_code, 0)
-        # print(results_dna.stdout)
-        self.assertTrue(
-            exists(join(output, "output.readalong")),
-            "successful alignment with DNA should have created output.readalong",
-        )
-        self.assertTrue(
-            exists(join(output, "output.xhtml")),
-            "successful alignment with -o xhtml should have created output.xhtml",
-        )
-        self.assertIn("Please copy image-for-page1.jpg to ", results_dna.stdout)
-        self.assertFalse(
-            exists(join(output, "assets", "image-for-page1.jpg")),
-            "image-for-page1.jpg was not on disk, cannot have been copied",
-        )
-        self.assertIn(
-            "Align mode moderate succeeded for sequence 0.", results_dna.stdout
-        )
-
-        # We test error situations in the same test case, since we reuse the same outputs
-        results_output_exists = self.runner.invoke(
-            align,
-            [
-                join(self.data_dir, "ej-fra-dna.readalong"),
-                join(self.data_dir, "ej-fra.m4a"),
-                output,
-            ],
-        )
-        self.assertNotEqual(results_output_exists.exit_code, 0)
-        self.assertIn(
-            "already exists, use -f to overwrite", results_output_exists.output
-        )
-
-        # Output path exists as a regular file
-        results_output_is_regular_file = self.runner.invoke(
-            align,
-            [
-                join(self.data_dir, "ej-fra-dna.readalong"),
-                join(self.data_dir, "ej-fra.m4a"),
-                join(output, "output.readalong"),
-            ],
-        )
-        self.assertNotEqual(results_output_is_regular_file, 0)
-        self.assertIn(
-            "already exists but is a not a directory",
-            results_output_is_regular_file.output,
-        )
-
-    def test_align_with_package(self):
-        """Test creating a single-file package, with -o html"""
-
-        output = join(self.tempdir, "html")
-        with SoundSwallowerStub("t0b0d0p0s0w0:920:1620", "t0b0d0p0s1w0:1620:1690"):
-            results_html = self.runner.invoke(
-                align,
-                [
-                    join(self.data_dir, "ej-fra-package.readalong"),
-                    join(self.data_dir, "ej-fra.m4a"),
-                    output,
-                    "-o",
-                    "html",
-                    "--config",
-                    self.add_bom(self.data_dir / "sample-config.json"),
-                ],
-            )
-        # print(results_html.output)
-        self.assertEqual(results_html.exit_code, 0)
-        self.assertTrue(
-            exists(join(output, "html.html")),
-            "succesful html alignment should have created html/html.html",
-        )
-
-        with open(join(output, "html.html"), "rb") as fhtml:
-            path_bytes = fhtml.read()
-        htmldoc = fromstring(path_bytes)
-        b64_pattern = r"data:[\w\/\-\+]*;base64,\w*"
-        self.assertRegex(
-            htmldoc.body.xpath("//read-along")[0].attrib["href"], b64_pattern
-        )
-        self.assertRegex(
-            htmldoc.body.xpath("//read-along")[0].attrib["audio"], b64_pattern
-        )
-
-    def not_test_permission_denied(self):
-        """Non-portable test to make sure denied permission triggers an error -- disabled"""
-        # This test is not stable, just disable it.
-        # It apparently also does not work correctly on M1 Macs either, even in Docker.
-
-        import platform
-
-        if platform.system() == "Windows" or "WSL2" in platform.release():
-            # Cannot change the permission on a directory in Windows though
-            # os.mkdir() or os.chmod(), so skip this test
-            return
-        dirname = join(self.tempdir, "permission_denied")
-        os.mkdir(dirname, mode=0o444)
-        results = self.runner.invoke(
-            align,
-            [
-                "-f",
-                join(self.data_dir, "ej-fra-dna.readalong"),
-                join(self.data_dir, "ej-fra.m4a"),
-                dirname,
-            ],
-        )
-        self.assertNotEqual(results, 0)
-        self.assertIn("Cannot write into output folder", results.output)
-
-    def test_langs_cmd(self):
-        """Validates that readalongs langs lists all in-langs that can map to eng-arpabet"""
-        results = self.runner.invoke(langs)
-        self.assertEqual(results.exit_code, 0)
-        self.assertIn("crg-tmd", results.stdout)
-        self.assertIn("crg-dv ", results.stdout)
-        self.assertNotIn("crg ", results.stdout)
-        self.assertNotIn("fn-unicode", results.stdout)
-
-    def test_align_english(self):
-        """Validates that the lexicon-based g2p works for English language alignment"""
-
-        input_filename = write_file(
-            join(self.tempdir, "input"),
-            "This is some text that we will run through the English lexicon "
-            "grapheme to morpheme approach.",
-        )
-        output_dir = join(self.tempdir, "eng-output")
-        # Run align from plain text
-        with SoundSwallowerStub("word:0:1000"):
-            self.runner.invoke(
-                align,
-                [
-                    "-s",
-                    "-l",
-                    "eng",
-                    input_filename,
-                    join(self.data_dir, "ej-fra.m4a"),
-                    output_dir,
-                ],
-            )
-
-        g2p_ref = "".join(
-            (
-                '<s id="t0b0d0p0s0">',
-                '<w id="t0b0d0p0s0w0" ARPABET="DH IH S">This</w> ',
-                '<w id="t0b0d0p0s0w1" ARPABET="IH Z">is</w> ',
-                '<w id="t0b0d0p0s0w2" ARPABET="S AH M">some</w> ',
-                '<w id="t0b0d0p0s0w3" ARPABET="T EH K S T">text</w> ',
-                '<w id="t0b0d0p0s0w4" ARPABET="DH AE T">that</w> ',
-                '<w id="t0b0d0p0s0w5" ARPABET="W IY">we</w> ',
-                '<w id="t0b0d0p0s0w6" ARPABET="W IH L">will</w> ',
-                '<w id="t0b0d0p0s0w7" ARPABET="R AH N">run</w> ',
-                '<w id="t0b0d0p0s0w8" ARPABET="TH R UW">through</w> ',
-                '<w id="t0b0d0p0s0w9" ARPABET="DH AH">the</w> ',
-                '<w id="t0b0d0p0s0w10" ARPABET="IH NG G L IH SH">English</w> ',
-                '<w id="t0b0d0p0s0w11" ARPABET="L EH K S IH K AA N">lexicon</w> ',
-                '<w id="t0b0d0p0s0w12" effective-g2p-lang="und" ARPABET="G D AA P HH EY M EY">grapheme</w> ',
-                '<w id="t0b0d0p0s0w13" ARPABET="T UW">to</w> ',
-                '<w id="t0b0d0p0s0w14" effective-g2p-lang="und" ARPABET="M OW D P HH EY M EY">morpheme</w> ',
-                '<w id="t0b0d0p0s0w15" ARPABET="AH P R OW CH">approach</w>',
-                ".</s>",
-            )
-        )
-
-        tokenized_file = join(
-            self.tempdir, "eng-output", "tempfiles", "eng-output.g2p.readalong"
-        )
-        with open(tokenized_file, "r", encoding="utf8") as f:
-            tok_output = f.read()
-
-        self.assertIn(g2p_ref, tok_output)
-
-    def test_invalid_config(self):
-        """unit testing for invalid config specifications"""
-
-        # --config parameter needs to be <somefile>.json, text with .txt instead
-        result = self.runner.invoke(
-            align,
-            [
-                "--config",
-                join(self.data_dir, "fra.txt"),
-                join(self.data_dir, "fra.txt"),
-                join(self.data_dir, "noise.mp3"),
-                join(self.tempdir, "out-invalid-config-1"),
-            ],
-        )
-        self.assertIn("must be in JSON format", result.stdout)
-
-        # --config parameters needs to contain valid json, test with garbage
-        config_file = join(self.tempdir, "bad-config.json")
-        with open(config_file, "w", encoding="utf8") as f:
-            print("not valid json", file=f)
-        result = self.runner.invoke(
-            align,
-            [
-                "--config",
-                config_file,
-                join(self.data_dir, "fra.txt"),
-                join(self.data_dir, "noise.mp3"),
-                join(self.tempdir, "out-invalid-config-2"),
-            ],
-        )
-        self.assertIn("is not in valid JSON format", result.stdout)
-
-    def test_bad_anchors(self):
-        """Make sure invalid anchors yield appropriate errors"""
-
-        xml_text = """<?xml version='1.0' encoding='utf-8'?>
-            <read-along version="1.0"><text xml:lang="fra"><body><p>
-            <anchor /><s>Bonjour.</s><anchor time="invalid"/>
-            </p></body></text></read-along>
-        """
-        xml_file = join(self.tempdir, "bad-anchor.readalong")
-        with open(xml_file, "w", encoding="utf8") as f:
-            print(xml_text, file=f)
-        bad_anchors_result = self.runner.invoke(
-            align,
-            [
-                xml_file,
-                join(self.data_dir, "noise.mp3"),
-                join(self.tempdir, "out-bad-anchors"),
-            ],
-        )
-        for msg in [
-            'missing "time" attribute',
-            'invalid "time" attribute "invalid"',
-            "Could not parse all anchors",
-            "Aborting.",
-        ]:
-            self.assertIn(msg, bad_anchors_result.stdout)
-
-    def test_misc_align_errors(self):
-        """Test calling readalongs align with misc CLI errors"""
-        results = self.runner.invoke(
-            align,
-            [
-                join(self.data_dir, "ej-fra.txt"),
-                join(self.data_dir, "ej-fra.m4a"),
-                join(self.tempdir, "out-missing-l"),
-            ],
-        )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("No input language specified", results.output)
-
-        with SoundSwallowerStub("[NOISE]:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    join(self.data_dir, "fra-prepared.readalong"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "noise-only"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("produced 0 segments", results.output)
-
-        with SoundSwallowerStub(
-            "[NOISE]:0:1", "w0:1:1000", "<sil>:1000:1100", "w1:1100:2000"
-        ):
-            results = self.runner.invoke(
-                align,
-                [
-                    join(self.data_dir, "ej-fra.readalong"),
-                    join(self.data_dir, "ej-fra.m4a"),
-                    join(self.tempdir, "two-words"),
-                ],
-            )
-        # print(results.output)
-        # We don't check results.exit_code since that's a soft warning, not a hard error
-        self.assertIn("produced 2 segments", results.output)
-        self.assertIn(
-            "Alignment produced a different number of segments and tokens than were in the input.",
-            results.output,
-        )
-
-    def test_infer_plain_text_or_xml(self):
-        """align -i is obsolete, now we infer plain text vs XML; test that!"""
-
-        # plain text with guess by contents
-        infile1 = write_file(join(self.tempdir, "infile1"), "some plain text")
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    infile1,
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir1"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        # This error message confirms it's being processed as plain text
-        self.assertIn("No input language specified for plain text", results.output)
-
-        # plain text by extension
-        infile2 = write_file(join(self.tempdir, "infile2.txt"), "<?xml but .txt")
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    infile2,
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir2"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        # This error message confirms it's being processed as plain text
-        self.assertIn("No input language specified for plain text", results.output)
-
-        # XML with guess by contents
-        infile3 = self.add_bom(
-            write_file(
-                join(self.tempdir, "infile3"),
-                "<?xml version='1.0' encoding='utf-8'?><text>blah blah</text>",
-            )
-        )
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    infile3,
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir3"),
-                ],
-            )
-        self.assertEqual(results.exit_code, 0)
-
-        # XML with guess by contents, but with content error
-        infile4 = write_file(
-            join(self.tempdir, "infile4"),
-            "<?xml version='1.0' encoding='utf-8'?><text>blah blah</bad_tag>",
-        )
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    infile4,
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir4"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("Error parsing XML", results.output)
-
-        # XML by file extension
-        infile5 = write_file(join(self.tempdir, "infile5.readalong"), "Not XML!")
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    infile5,
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir5"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("Error parsing XML", results.output)
-
-    def test_obsolete_switches(self):
-        # Giving -i switch generates an obsolete-switch error message
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    "-i",
-                    join(self.data_dir, "fra.txt"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir6"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("is obsolete.", results.output)
-
-        # Giving --g2p-verbose switch generates an obsolete-switch error message
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    "--g2p-verbose",
-                    join(self.data_dir, "fra.txt"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir7"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("is obsolete.", results.output)
-
-        # Giving --g2p-fallback switch generates an obsolete-switch error message
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    "--g2p-fallback",
-                    "fra:end:und",
-                    join(self.data_dir, "fra.txt"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir8"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("is obsolete.", results.output)
-
-    def test_oo_option(self):
-        """Exercise the hidden -oo / --output-orth option"""
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    "-oo",
-                    "eng-arpabet",
-                    join(self.data_dir, "ej-fra.readalong"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir9"),
-                ],
-            )
-        self.assertEqual(results.exit_code, 0)
-
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    "-oo",
-                    "not-an-alphabet",
-                    join(self.data_dir, "ej-fra.readalong"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir10"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("Could not g2p", results.output)
-        self.assertIn("not-an-alphabet", results.output)
-
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    "-oo",
-                    "dan-ipa",
-                    join(self.data_dir, "ej-fra.readalong"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir11"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("Could not g2p", results.output)
-        self.assertIn("no path", results.output)
-
-        with SoundSwallowerStub("word:0:1"):
-            results = self.runner.invoke(
-                align,
-                [
-                    "-oo",
-                    "dan-ipa",
-                    "-l",
-                    "eng",
-                    join(self.data_dir, "fra.txt"),
-                    join(self.data_dir, "noise.mp3"),
-                    join(self.tempdir, "outdir12"),
-                ],
-            )
-        self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("Could not g2p", results.output)
-        self.assertIn('no path from "eng" to ', results.output)
-
-    def add_bom(self, filename):
-        """Create a temporary copy of filename with the a BOM in it, in self.tempdir"""
-        # We pepper calls to add_bom() around the test suite, to make sure all
-        # different kinds of input files are accepted with and without a BOM
-        output_file = tempfile.NamedTemporaryFile(
-            mode="wb",
-            dir=self.tempdir,
-            delete=False,
-            prefix="bom_",
-            suffix=os.path.basename(filename),
-        )
-        output_file.write(b"\xef\xbb\xbf")
-        with open(filename, "rb") as file_binary:
-            output_file.write(file_binary.read())
-        output_file.close()
-        return output_file.name
-
-    def test_add_bom(self):
-        """Make sure add_bom does what we mean it to, i.e., test the test harness."""
-
-        def slurp_bin(filename):
-            with open(filename, "rb") as f:
-                return f.read()
-
-        def slurp_text(filename, encoding):
-            with open(filename, "r", encoding=encoding) as f:
-                return f.read()
-
-        base_file = write_file(self.tempdir / "add-bom-input.txt", "Random Text été")
-        bom_file = self.add_bom(base_file)
-        self.assertEqual(
-            slurp_text(base_file, "utf-8"), slurp_text(bom_file, "utf-8-sig")
-        )
-        self.assertEqual(
-            slurp_text(bom_file, "utf-8"), "\ufeff" + slurp_text(base_file, "utf-8")
-        )
-        self.assertNotEqual(slurp_bin(base_file), slurp_bin(bom_file))
-        self.assertEqual(b"\xef\xbb\xbf" + slurp_bin(base_file), slurp_bin(bom_file))
-
-        bom_file_pathlib = self.add_bom(pathlib.Path(base_file))
-        self.assertEqual(
-            slurp_text(base_file, "utf-8"), slurp_text(bom_file_pathlib, "utf-8-sig")
-        )
-
-
-if __name__ == "__main__":
+#!/usr/bin/env python
+
+"""
+Unit test suite for the readalongs align CLI command
+"""
+
+import os
+import pathlib
+import tempfile
+from os.path import exists, join
+from unittest import main
+
+from basic_test_case import BasicTestCase
+from lxml.html import fromstring
+from sound_swallower_stub import SoundSwallowerStub
+
+from readalongs._version import READALONG_FILE_FORMAT_VERSION, VERSION
+from readalongs.cli import align, langs
+
+
+def write_file(filename: str, file_contents: str) -> str:
+    """Write file_contents to file filename, and return its name (filename)"""
+    with open(filename, mode="w", encoding="utf8") as f:
+        f.write(file_contents)
+    return filename
+
+
+class TestAlignCli(BasicTestCase):
+    """Unit test suite for the readalongs align CLI command"""
+
+    def test_invoke_align(self):
+        """Basic readalongs align invocation and some variants"""
+        output = join(self.tempdir, "output")
+        with open("image-for-page1.jpg", "wb"):
+            pass
+        # Run align from plain text
+        results = self.runner.invoke(
+            align,
+            [
+                "-s",
+                "-o",
+                "vtt",
+                "-o",  # tests that we can use -o more than once
+                "srt:TextGrid,eaf",  # tests that we can give -o multiple values, separated by : or ,
+                "-l",
+                "fra",
+                "--align-mode",
+                "auto",
+                "--config",
+                join(self.data_dir, "sample-config.json"),
+                self.add_bom(join(self.data_dir, "ej-fra.txt")),
+                join(self.data_dir, "ej-fra.m4a"),
+                output,
+            ],
+        )
+        # print(results.output)
+        self.assertEqual(results.exit_code, 0)
+        expected_output_files = [
+            "output.readalong",
+            "output.m4a",
+            "index.html",
+            "output.TextGrid",
+            "output.eaf",
+            "output_sentences.srt",
+            "output_sentences.vtt",
+            "output_words.srt",
+            "output_words.vtt",
+            "readme.txt",
+        ]
+        for f in expected_output_files:
+            self.assertTrue(
+                exists(join(output, f)), f"successful alignment should have created {f}"
+            )
+        with open(join(output, "index.html"), encoding="utf8") as f:
+            self.assertIn(
+                '<read-along href="output.readalong" audio="output.m4a"',
+                f.read(),
+            )
+        self.assertTrue(
+            exists(join(output, "tempfiles", "output.tokenized.readalong")),
+            "alignment with -s should have created tempfiles/output.tokenized.readalong",
+        )
+        with open(
+            join(output, "tempfiles", "output.tokenized.readalong"),
+            "r",
+            encoding="utf-8",
+        ) as f:
+            self.assertNotIn("\ufeff", f.read())
+        self.assertTrue(
+            exists(join(output, "assets", "image-for-page1.jpg")),
+            "alignment with image files should have copied image-for-page1.jpg to assets",
+        )
+        self.assertIn("image-for-page2.jpg is accessible ", results.stdout)
+        os.unlink("image-for-page1.jpg")
+        self.assertFalse(exists("image-for-page1.jpg"))
+        self.assertIn("Align mode strict succeeded for sequence 0.", results.stdout)
+        # print(results.stdout)
+
+        # Move the alignment output to compare with further down
+        # We cannot just output to a different name because changing the output file name
+        # changes the contents of the output.
+        output1 = output + "1"
+        os.rename(output, output1)
+        self.assertFalse(exists(output), "os.rename() should have moved dir")
+
+        # Run align again, but on an XML input file with various added DNA text
+        results_dna = self.runner.invoke(
+            align,
+            [
+                "-o",
+                "xhtml",
+                "--align-mode",
+                "moderate",
+                "-s",
+                "--config",
+                join(self.data_dir, "sample-config.json"),
+                self.add_bom(join(self.data_dir, "ej-fra-dna.readalong")),
+                join(self.data_dir, "ej-fra.m4a"),
+                output,
+            ],
+        )
+        self.assertEqual(results_dna.exit_code, 0)
+        # print(results_dna.stdout)
+        self.assertTrue(
+            exists(join(output, "output.readalong")),
+            "successful alignment with DNA should have created output.readalong",
+        )
+        self.assertTrue(
+            exists(join(output, "output.xhtml")),
+            "successful alignment with -o xhtml should have created output.xhtml",
+        )
+        self.assertIn("Please copy image-for-page1.jpg to ", results_dna.stdout)
+        self.assertFalse(
+            exists(join(output, "assets", "image-for-page1.jpg")),
+            "image-for-page1.jpg was not on disk, cannot have been copied",
+        )
+        self.assertIn(
+            "Align mode moderate succeeded for sequence 0.", results_dna.stdout
+        )
+
+        # We test error situations in the same test case, since we reuse the same outputs
+        results_output_exists = self.runner.invoke(
+            align,
+            [
+                join(self.data_dir, "ej-fra-dna.readalong"),
+                join(self.data_dir, "ej-fra.m4a"),
+                output,
+            ],
+        )
+        self.assertNotEqual(results_output_exists.exit_code, 0)
+        self.assertIn(
+            "already exists, use -f to overwrite", results_output_exists.output
+        )
+
+        # Output path exists as a regular file
+        results_output_is_regular_file = self.runner.invoke(
+            align,
+            [
+                join(self.data_dir, "ej-fra-dna.readalong"),
+                join(self.data_dir, "ej-fra.m4a"),
+                join(output, "output.readalong"),
+            ],
+        )
+        self.assertNotEqual(results_output_is_regular_file, 0)
+        self.assertIn(
+            "already exists but is a not a directory",
+            results_output_is_regular_file.output,
+        )
+
+    def test_align_with_package(self):
+        """Test creating a single-file package, with -o html"""
+
+        output = join(self.tempdir, "html")
+        with SoundSwallowerStub("t0b0d0p0s0w0:920:1620", "t0b0d0p0s1w0:1620:1690"):
+            results_html = self.runner.invoke(
+                align,
+                [
+                    join(self.data_dir, "ej-fra-package.readalong"),
+                    join(self.data_dir, "ej-fra.m4a"),
+                    output,
+                    "-o",
+                    "html",
+                    "--config",
+                    self.add_bom(self.data_dir / "sample-config.json"),
+                ],
+            )
+        # print(results_html.output)
+        self.assertEqual(results_html.exit_code, 0)
+        self.assertTrue(
+            exists(join(output, "html.html")),
+            "succesful html alignment should have created html/html.html",
+        )
+
+        with open(join(output, "html.html"), "rb") as fhtml:
+            path_bytes = fhtml.read()
+        htmldoc = fromstring(path_bytes)
+        b64_pattern = r"data:[\w\/\-\+]*;base64,\w*"
+        self.assertRegex(
+            htmldoc.body.xpath("//read-along")[0].attrib["href"], b64_pattern
+        )
+        self.assertRegex(
+            htmldoc.body.xpath("//read-along")[0].attrib["audio"], b64_pattern
+        )
+
+    def not_test_permission_denied(self):
+        """Non-portable test to make sure denied permission triggers an error -- disabled"""
+        # This test is not stable, just disable it.
+        # It apparently also does not work correctly on M1 Macs either, even in Docker.
+
+        import platform
+
+        if platform.system() == "Windows" or "WSL2" in platform.release():
+            # Cannot change the permission on a directory in Windows though
+            # os.mkdir() or os.chmod(), so skip this test
+            return
+        dirname = join(self.tempdir, "permission_denied")
+        os.mkdir(dirname, mode=0o444)
+        results = self.runner.invoke(
+            align,
+            [
+                "-f",
+                join(self.data_dir, "ej-fra-dna.readalong"),
+                join(self.data_dir, "ej-fra.m4a"),
+                dirname,
+            ],
+        )
+        self.assertNotEqual(results, 0)
+        self.assertIn("Cannot write into output folder", results.output)
+
+    def test_langs_cmd(self):
+        """Validates that readalongs langs lists all in-langs that can map to eng-arpabet"""
+        results = self.runner.invoke(langs)
+        self.assertEqual(results.exit_code, 0)
+        self.assertIn("crg-tmd", results.stdout)
+        self.assertIn("crg-dv ", results.stdout)
+        self.assertNotIn("crg ", results.stdout)
+        self.assertNotIn("fn-unicode", results.stdout)
+
+    def test_align_english(self):
+        """Validates that the lexicon-based g2p works for English language alignment"""
+
+        input_filename = write_file(
+            join(self.tempdir, "input"),
+            "This is some text that we will run through the English lexicon "
+            "grapheme to morpheme approach.",
+        )
+        output_dir = join(self.tempdir, "eng-output")
+        # Run align from plain text
+        with SoundSwallowerStub("word:0:1000"):
+            self.runner.invoke(
+                align,
+                [
+                    "-s",
+                    "-l",
+                    "eng",
+                    input_filename,
+                    join(self.data_dir, "ej-fra.m4a"),
+                    output_dir,
+                ],
+            )
+
+        g2p_ref = "".join(
+            (
+                '<s id="t0b0d0p0s0">',
+                '<w id="t0b0d0p0s0w0" ARPABET="DH IH S">This</w> ',
+                '<w id="t0b0d0p0s0w1" ARPABET="IH Z">is</w> ',
+                '<w id="t0b0d0p0s0w2" ARPABET="S AH M">some</w> ',
+                '<w id="t0b0d0p0s0w3" ARPABET="T EH K S T">text</w> ',
+                '<w id="t0b0d0p0s0w4" ARPABET="DH AE T">that</w> ',
+                '<w id="t0b0d0p0s0w5" ARPABET="W IY">we</w> ',
+                '<w id="t0b0d0p0s0w6" ARPABET="W IH L">will</w> ',
+                '<w id="t0b0d0p0s0w7" ARPABET="R AH N">run</w> ',
+                '<w id="t0b0d0p0s0w8" ARPABET="TH R UW">through</w> ',
+                '<w id="t0b0d0p0s0w9" ARPABET="DH AH">the</w> ',
+                '<w id="t0b0d0p0s0w10" ARPABET="IH NG G L IH SH">English</w> ',
+                '<w id="t0b0d0p0s0w11" ARPABET="L EH K S IH K AA N">lexicon</w> ',
+                '<w id="t0b0d0p0s0w12" effective-g2p-lang="und" ARPABET="G D AA P HH EY M EY">grapheme</w> ',
+                '<w id="t0b0d0p0s0w13" ARPABET="T UW">to</w> ',
+                '<w id="t0b0d0p0s0w14" effective-g2p-lang="und" ARPABET="M OW D P HH EY M EY">morpheme</w> ',
+                '<w id="t0b0d0p0s0w15" ARPABET="AH P R OW CH">approach</w>',
+                ".</s>",
+            )
+        )
+
+        tokenized_file = join(
+            self.tempdir, "eng-output", "tempfiles", "eng-output.g2p.readalong"
+        )
+        with open(tokenized_file, "r", encoding="utf8") as f:
+            tok_output = f.read()
+
+        self.assertIn(g2p_ref, tok_output)
+
+    def test_invalid_config(self):
+        """unit testing for invalid config specifications"""
+
+        # --config parameter needs to be <somefile>.json, text with .txt instead
+        result = self.runner.invoke(
+            align,
+            [
+                "--config",
+                join(self.data_dir, "fra.txt"),
+                join(self.data_dir, "fra.txt"),
+                join(self.data_dir, "noise.mp3"),
+                join(self.tempdir, "out-invalid-config-1"),
+            ],
+        )
+        self.assertIn("must be in JSON format", result.stdout)
+
+        # --config parameters needs to contain valid json, test with garbage
+        config_file = join(self.tempdir, "bad-config.json")
+        with open(config_file, "w", encoding="utf8") as f:
+            print("not valid json", file=f)
+        result = self.runner.invoke(
+            align,
+            [
+                "--config",
+                config_file,
+                join(self.data_dir, "fra.txt"),
+                join(self.data_dir, "noise.mp3"),
+                join(self.tempdir, "out-invalid-config-2"),
+            ],
+        )
+        self.assertIn("is not in valid JSON format", result.stdout)
+
+    def test_bad_anchors(self):
+        """Make sure invalid anchors yield appropriate errors"""
+
+        xml_text = """<?xml version='1.0' encoding='utf-8'?>
+            <read-along version="%s"><meta name="generator" content="@readalongs/studio (cli) %s"/><text xml:lang="fra"><body><p>
+            <anchor /><s>Bonjour.</s><anchor time="invalid"/>
+            </p></body></text></read-along>
+        """ % (
+            READALONG_FILE_FORMAT_VERSION,
+            VERSION,
+        )
+        xml_file = join(self.tempdir, "bad-anchor.readalong")
+        with open(xml_file, "w", encoding="utf8") as f:
+            print(xml_text, file=f)
+        bad_anchors_result = self.runner.invoke(
+            align,
+            [
+                xml_file,
+                join(self.data_dir, "noise.mp3"),
+                join(self.tempdir, "out-bad-anchors"),
+            ],
+        )
+        for msg in [
+            'missing "time" attribute',
+            'invalid "time" attribute "invalid"',
+            "Could not parse all anchors",
+            "Aborting.",
+        ]:
+            self.assertIn(msg, bad_anchors_result.stdout)
+
+    def test_misc_align_errors(self):
+        """Test calling readalongs align with misc CLI errors"""
+        results = self.runner.invoke(
+            align,
+            [
+                join(self.data_dir, "ej-fra.txt"),
+                join(self.data_dir, "ej-fra.m4a"),
+                join(self.tempdir, "out-missing-l"),
+            ],
+        )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("No input language specified", results.output)
+
+        with SoundSwallowerStub("[NOISE]:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    join(self.data_dir, "fra-prepared.readalong"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "noise-only"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("produced 0 segments", results.output)
+
+        with SoundSwallowerStub(
+            "[NOISE]:0:1", "w0:1:1000", "<sil>:1000:1100", "w1:1100:2000"
+        ):
+            results = self.runner.invoke(
+                align,
+                [
+                    join(self.data_dir, "ej-fra.readalong"),
+                    join(self.data_dir, "ej-fra.m4a"),
+                    join(self.tempdir, "two-words"),
+                ],
+            )
+        # print(results.output)
+        # We don't check results.exit_code since that's a soft warning, not a hard error
+        self.assertIn("produced 2 segments", results.output)
+        self.assertIn(
+            "Alignment produced a different number of segments and tokens than were in the input.",
+            results.output,
+        )
+
+    def test_infer_plain_text_or_xml(self):
+        """align -i is obsolete, now we infer plain text vs XML; test that!"""
+
+        # plain text with guess by contents
+        infile1 = write_file(join(self.tempdir, "infile1"), "some plain text")
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    infile1,
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir1"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        # This error message confirms it's being processed as plain text
+        self.assertIn("No input language specified for plain text", results.output)
+
+        # plain text by extension
+        infile2 = write_file(join(self.tempdir, "infile2.txt"), "<?xml but .txt")
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    infile2,
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir2"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        # This error message confirms it's being processed as plain text
+        self.assertIn("No input language specified for plain text", results.output)
+
+        # XML with guess by contents
+        infile3 = self.add_bom(
+            write_file(
+                join(self.tempdir, "infile3"),
+                "<?xml version='1.0' encoding='utf-8'?><text>blah blah</text>",
+            )
+        )
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    infile3,
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir3"),
+                ],
+            )
+        self.assertEqual(results.exit_code, 0)
+
+        # XML with guess by contents, but with content error
+        infile4 = write_file(
+            join(self.tempdir, "infile4"),
+            "<?xml version='1.0' encoding='utf-8'?><text>blah blah</bad_tag>",
+        )
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    infile4,
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir4"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Error parsing XML", results.output)
+
+        # XML by file extension
+        infile5 = write_file(join(self.tempdir, "infile5.readalong"), "Not XML!")
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    infile5,
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir5"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Error parsing XML", results.output)
+
+    def test_obsolete_switches(self):
+        # Giving -i switch generates an obsolete-switch error message
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-i",
+                    join(self.data_dir, "fra.txt"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir6"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("is obsolete.", results.output)
+
+        # Giving --g2p-verbose switch generates an obsolete-switch error message
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "--g2p-verbose",
+                    join(self.data_dir, "fra.txt"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir7"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("is obsolete.", results.output)
+
+        # Giving --g2p-fallback switch generates an obsolete-switch error message
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "--g2p-fallback",
+                    "fra:end:und",
+                    join(self.data_dir, "fra.txt"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir8"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("is obsolete.", results.output)
+
+    def test_oo_option(self):
+        """Exercise the hidden -oo / --output-orth option"""
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "eng-arpabet",
+                    join(self.data_dir, "ej-fra.readalong"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir9"),
+                ],
+            )
+        self.assertEqual(results.exit_code, 0)
+
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "not-an-alphabet",
+                    join(self.data_dir, "ej-fra.readalong"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir10"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Could not g2p", results.output)
+        self.assertIn("not-an-alphabet", results.output)
+
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "dan-ipa",
+                    join(self.data_dir, "ej-fra.readalong"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir11"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Could not g2p", results.output)
+        self.assertIn("no path", results.output)
+
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "dan-ipa",
+                    "-l",
+                    "eng",
+                    join(self.data_dir, "fra.txt"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir12"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Could not g2p", results.output)
+        self.assertIn('no path from "eng" to ', results.output)
+
+    def add_bom(self, filename):
+        """Create a temporary copy of filename with the a BOM in it, in self.tempdir"""
+        # We pepper calls to add_bom() around the test suite, to make sure all
+        # different kinds of input files are accepted with and without a BOM
+        output_file = tempfile.NamedTemporaryFile(
+            mode="wb",
+            dir=self.tempdir,
+            delete=False,
+            prefix="bom_",
+            suffix=os.path.basename(filename),
+        )
+        output_file.write(b"\xef\xbb\xbf")
+        with open(filename, "rb") as file_binary:
+            output_file.write(file_binary.read())
+        output_file.close()
+        return output_file.name
+
+    def test_add_bom(self):
+        """Make sure add_bom does what we mean it to, i.e., test the test harness."""
+
+        def slurp_bin(filename):
+            with open(filename, "rb") as f:
+                return f.read()
+
+        def slurp_text(filename, encoding):
+            with open(filename, "r", encoding=encoding) as f:
+                return f.read()
+
+        base_file = write_file(self.tempdir / "add-bom-input.txt", "Random Text été")
+        bom_file = self.add_bom(base_file)
+        self.assertEqual(
+            slurp_text(base_file, "utf-8"), slurp_text(bom_file, "utf-8-sig")
+        )
+        self.assertEqual(
+            slurp_text(bom_file, "utf-8"), "\ufeff" + slurp_text(base_file, "utf-8")
+        )
+        self.assertNotEqual(slurp_bin(base_file), slurp_bin(bom_file))
+        self.assertEqual(b"\xef\xbb\xbf" + slurp_bin(base_file), slurp_bin(bom_file))
+
+        bom_file_pathlib = self.add_bom(pathlib.Path(base_file))
+        self.assertEqual(
+            slurp_text(base_file, "utf-8"), slurp_text(bom_file_pathlib, "utf-8-sig")
+        )
+
+
+if __name__ == "__main__":
     main()
diff --git a/test/test_dtd.py b/test/test_dtd.py
index fde55957..3e36e749 100644
--- a/test/test_dtd.py
+++ b/test/test_dtd.py
@@ -9,7 +9,7 @@
 from lxml import etree
 
 DTDPATH = os.path.join(
-    dirname(__file__), "..", "readalongs", "static", "read-along-1.0.dtd"
+    dirname(__file__), "..", "readalongs", "static", "read-along-1.1.dtd"
 )
 
 VALID_RAS = """
diff --git a/test/test_g2p_cli.py b/test/test_g2p_cli.py
index 953c1a09..830ce549 100755
--- a/test/test_g2p_cli.py
+++ b/test/test_g2p_cli.py
@@ -9,6 +9,7 @@
 from basic_test_case import BasicTestCase
 from lxml import etree
 from sound_swallower_stub import SoundSwallowerStub
+from test_make_xml_cli import updateFormatVersion, updateStudioVersion
 
 from readalongs.align import align_audio
 from readalongs.cli import align, g2p, make_xml, tokenize
@@ -76,9 +77,13 @@ def test_mixed_langs(self):
             ref_file, encoding="utf8"
         ) as ref_f:
             self.maxDiff = None
+            # update version info
+            ref_list = list(ref_f)
+            ref_list[1] = updateFormatVersion(ref_list[1])
+            ref_list[2] = updateStudioVersion(ref_list[2])
             self.assertListEqual(
                 list(output_f),
-                list(ref_f),
+                ref_list,
                 f"output {g2p_file} and reference {ref_file} differ.",
             )
 
diff --git a/test/test_make_xml_cli.py b/test/test_make_xml_cli.py
index a36a23db..9a448532 100755
--- a/test/test_make_xml_cli.py
+++ b/test/test_make_xml_cli.py
@@ -10,10 +10,18 @@
 
 from basic_test_case import BasicTestCase
 
+# from readalongs.log import LOGGER
+from readalongs._version import READALONG_FILE_FORMAT_VERSION, VERSION
 from readalongs.align import create_input_ras, create_ras_from_text
 from readalongs.cli import align, make_xml
 
-# from readalongs.log import LOGGER
+
+def updateFormatVersion(input):
+    return input.replace("{{format_version}}", READALONG_FILE_FORMAT_VERSION)
+
+
+def updateStudioVersion(input):
+    return input.replace("{{studio_version}}", VERSION)
 
 
 class TestMakeXMLCli(BasicTestCase):
@@ -90,9 +98,13 @@ def test_output_correct(self):
             ref_file, encoding="utf8"
         ) as ref_f:
             self.maxDiff = None
+            # update version info
+            ref_list = list(ref_f)
+            ref_list[1] = updateFormatVersion(ref_list[1])
+            ref_list[2] = updateStudioVersion(ref_list[2])
             self.assertListEqual(
                 list(output_f),
-                list(ref_f),
+                ref_list,
                 f"output {xml_file} and reference {ref_file} differ.",
             )
 
diff --git a/test/test_misc.py b/test/test_misc.py
index 340ea948..9ce219ec 100755
--- a/test/test_misc.py
+++ b/test/test_misc.py
@@ -13,7 +13,7 @@
 from pep440 import is_canonical
 from test_dna_utils import segments_from_pairs
 
-from readalongs._version import VERSION
+from readalongs._version import READALONG_FILE_FORMAT_VERSION, VERSION
 from readalongs.align import split_silences
 from readalongs.log import LOGGER, capture_logs
 from readalongs.text.util import (
@@ -91,7 +91,8 @@ def test_split_silences(self):
         self.assertEqual(words, ref)
 
     def test_get_attrib_recursive(self):
-        raw_xml = """<read-along version="1.0">
+        raw_xml = """<read-along version="%s">
+    <meta name="generator" content="@readalongs/studio (cli) %s"/>
             <text lang="text">
             <p lang="p1"><s>stuff</s><s lang="p1s2">nonsense</s></p>
             <p><s lang="p2s1">stuff</s><s>nonsense</s></p>
@@ -103,8 +104,12 @@ def test_get_attrib_recursive(self):
             <p><s xml:lang="p4s1" lang="not:xml:lang">stuff</s><s>nonsense<s xml:lang="p4p2c">!</s></s></p>
             </text>
             </read-along>
-        """
+        """ % (
+            READALONG_FILE_FORMAT_VERSION,
+            VERSION,
+        )
         xml = parse_xml(raw_xml)
+
         for i, s, lang in zip(
             itertools.count(),
             xml.xpath(".//s"),
diff --git a/test/test_web_api.py b/test/test_web_api.py
index dab9ba4f..9d92e3f1 100755
--- a/test/test_web_api.py
+++ b/test/test_web_api.py
@@ -7,6 +7,7 @@
 
 from basic_test_case import BasicTestCase
 
+from readalongs._version import READALONG_FILE_FORMAT_VERSION, VERSION
 from readalongs.log import LOGGER
 from readalongs.text.add_ids_to_xml import add_ids
 from readalongs.text.convert_xml import convert_xml
@@ -31,7 +32,12 @@ def API_CLIENT(self):
     def slurp_data_file(self, filename: str) -> str:
         """Convenience function to slurp a whole file in self.data_dir"""
         with open(os.path.join(self.data_dir, filename), encoding="utf8") as f:
-            return f.read().strip()
+            return (
+                f.read()
+                .strip()
+                .replace("{{format_version}}", READALONG_FILE_FORMAT_VERSION)
+                .replace("{{studio_version}}", VERSION)
+            )
 
     def test_assemble_from_plain_text(self):
         # Test the assemble endpoint with plain text
@@ -198,9 +204,9 @@ def test_debug(self):
         self.assertIsNone(content["g2ped"])
 
     hej_verden_xml = dedent(
-        """\
-        <?xml version='1.0' encoding='utf-8'?>
-        <read-along version="1.0">
+        """<?xml version='1.0' encoding='utf-8'?>
+        <read-along version="%s">
+    <meta name="generator" content="@readalongs/studio (cli) %s"/>
             <text xml:lang="dan" fallback-langs="und" id="t0">
                 <body id="t0b0">
                     <div type="page" id="t0b0d0">
@@ -215,6 +221,7 @@ def test_debug(self):
             </text>
         </read-along>
         """
+        % (READALONG_FILE_FORMAT_VERSION, VERSION)
     )
 
     def test_convert_to_TextGrid(self):
@@ -431,9 +438,9 @@ def test_cleanup_even_if_error(self):
         # that exception in a sane way, with a 422 status code, while
         # also making sure the temporary directory gets deleted.
         overlap_xml = dedent(
-            """\
-        <?xml version='1.0' encoding='utf-8'?>
-        <read-along version="1.0">
+            """<?xml version='1.0' encoding='utf-8'?>
+        <read-along version="%s">
+    <meta name="generator" content="@readalongs/studio (cli) %s"/>
             <text xml:lang="dan" fallback-langs="und" id="t0">
                 <body id="t0b0">
                     <div type="page" id="t0b0d0">
@@ -448,6 +455,7 @@ def test_cleanup_even_if_error(self):
             </text>
         </read-along>
             """
+            % (READALONG_FILE_FORMAT_VERSION, VERSION)
         )
         request = {
             "dur": 83.1,