diff --git a/src/modm_data/html/document.py b/src/modm_data/html/document.py
index 3371819..4335fb5 100644
--- a/src/modm_data/html/document.py
+++ b/src/modm_data/html/document.py
@@ -1,7 +1,7 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-import re
+import re, os
 import logging
 from pathlib import Path
 from functools import cached_property
@@ -13,7 +13,7 @@
 class Document:
     def __init__(self, path: str):
         self.path = Path(path)
-        self.relpath = self.path.relative_to(Path().cwd())
+        self.relpath = os.path.relpath(self.path, Path().cwd())
         self.fullname = self.path.stem
         self.name = self.fullname.split("-")[0]
         self.version = self.fullname.split("-")[1]
@@ -41,6 +41,8 @@ def chapter(self, pattern: str) -> Chapter:
             LOGGER.error(f"Cannot find chapter with pattern '{pattern}'!")
         if len(chapters) > 1:
             LOGGER.error(f"Found multiple chapters with pattern '{pattern}'!")
+            for chapter in chapters:
+                LOGGER.error(f"  - {chapter.name}")
         assert len(chapters) == 1
         return chapters[0]
 
diff --git a/src/modm_data/html/stmicro/__init__.py b/src/modm_data/html/stmicro/__init__.py
index b70414f..bca0b82 100644
--- a/src/modm_data/html/stmicro/__init__.py
+++ b/src/modm_data/html/stmicro/__init__.py
@@ -1,7 +1,8 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-from .datasheet import DatasheetMicro, DatasheetSensor
+from .datasheet_sensor import DatasheetSensor
+from .datasheet_stm32 import DatasheetStm32
 from .reference import ReferenceManual
 from .document import load_documents, load_document_devices
 from .document import datasheet_for_device, reference_manual_for_device
diff --git a/src/modm_data/html/stmicro/datasheet_sensor.py b/src/modm_data/html/stmicro/datasheet_sensor.py
new file mode 100644
index 0000000..adbb57e
--- /dev/null
+++ b/src/modm_data/html/stmicro/datasheet_sensor.py
@@ -0,0 +1,25 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+import re
+import itertools
+from pathlib import Path
+from functools import cached_property, cache
+from collections import defaultdict
+
+from .helper import split_device_filter, split_package
+from ...html.text import ReDict
+
+import modm_data.html as html
+
+
+class DatasheetSensor(html.Document):
+    def __init__(self, path: str):
+        super().__init__(path)
+
+    def __repr__(self) -> str:
+        return f"DSsensor({self.fullname})"
+
+    @cache
+    def register_map(self, assert_table=True):
+        pass
diff --git a/src/modm_data/html/stmicro/datasheet.py b/src/modm_data/html/stmicro/datasheet_stm32.py
similarity index 97%
rename from src/modm_data/html/stmicro/datasheet.py
rename to src/modm_data/html/stmicro/datasheet_stm32.py
index 5fe4147..c77f16d 100644
--- a/src/modm_data/html/stmicro/datasheet.py
+++ b/src/modm_data/html/stmicro/datasheet_stm32.py
@@ -14,14 +14,14 @@
 import modm_data.html as html
 
 
-class DatasheetMicro(html.Document):
+class DatasheetStm32(html.Document):
     def __init__(self, path: str):
         super().__init__(path)
         self._id = {}
         self._devices = {}
 
     def __repr__(self) -> str:
-        return f"DSµC({self.fullname})"
+        return f"DSstm32({self.fullname})"
 
     @cached_property
     def device_family(self) -> str:
@@ -247,11 +247,3 @@ def packages_pins(self):
                         data_pin["alternate"][af].extend(signals)
 
         return data_packages, data_pins
-
-
-class DatasheetSensor(html.Document):
-    def __init__(self, path: str):
-        super().__init__(path)
-
-    def __repr__(self) -> str:
-        return f"DSsens({self.fullname})"
diff --git a/src/modm_data/html/stmicro/document.py b/src/modm_data/html/stmicro/document.py
index ade7d16..3fa7649 100644
--- a/src/modm_data/html/stmicro/document.py
+++ b/src/modm_data/html/stmicro/document.py
@@ -5,7 +5,8 @@
 from collections import defaultdict
 from ...html import Document
 from ...utils import cache_path, ext_path
-from .datasheet import DatasheetMicro, DatasheetSensor
+from .datasheet_stm32 import DatasheetStm32
+from .datasheet_sensor import DatasheetSensor
 from .reference import ReferenceManual
 from ...owl import DeviceIdentifier
 from ...owl.stmicro import did_from_string
@@ -27,7 +28,7 @@ def load_documents() -> list:
             # FIXME: Better detection that DS13252 is a STM32WB55 module, not a chip!
             if any("STM32" in h.html for h in chap[0].headings()) and \
                 "DS13252" not in doc.name and "DS14096" not in doc.name:
-                documents[doc.name][doc.version] = DatasheetMicro(path)
+                documents[doc.name][doc.version] = DatasheetStm32(path)
             else:
                 documents[doc.name][doc.version] = DatasheetSensor(path)
         elif "RM" in doc.name:
@@ -35,7 +36,7 @@ def load_documents() -> list:
     return documents
 
 
-def load_document_devices(use_cached=True) -> tuple[dict[DeviceIdentifier, DatasheetMicro],
+def load_document_devices(use_cached=True) -> tuple[dict[DeviceIdentifier, DatasheetStm32],
                                                     dict[DeviceIdentifier, ReferenceManual]]:
     global DOCUMENT_CACHE
     if DOCUMENT_CACHE is not None:
@@ -48,7 +49,7 @@ def load_document_devices(use_cached=True) -> tuple[dict[DeviceIdentifier, Datas
 
         docs = {}
         for path in set(json_data["ds"].values()):
-            docs[path] = DatasheetMicro(path)
+            docs[path] = DatasheetStm32(path)
         for path in set(json_data["rm"].values()):
             docs[path] = ReferenceManual(path)
         datasheets = {did_from_string(did): docs[path]
@@ -63,7 +64,7 @@ def load_document_devices(use_cached=True) -> tuple[dict[DeviceIdentifier, Datas
             doc = list(versions.values())[-1]
             # print(doc.path_pdf.relative_to(Path().cwd()), doc.path.relative_to(Path().cwd()))
             # print(doc.devices)
-            if isinstance(doc, DatasheetMicro):
+            if isinstance(doc, DatasheetStm32):
                 if not doc.devices:
                     raise ValueError(f"{doc} has no associated devices!")
                 for dev in doc.devices:
@@ -120,7 +121,7 @@ def _document_for_device(did: DeviceIdentifier, documents):
     return None
 
 
-def datasheet_for_device(did: DeviceIdentifier) -> DatasheetMicro:
+def datasheet_for_device(did: DeviceIdentifier) -> DatasheetStm32:
     datasheets, _ = load_document_devices()
     return _document_for_device(did, datasheets)
 
diff --git a/src/modm_data/html2owl/stmicro/__main__.py b/src/modm_data/html2owl/stmicro/__main__.py
index bea1104..0d11c32 100644
--- a/src/modm_data/html2owl/stmicro/__main__.py
+++ b/src/modm_data/html2owl/stmicro/__main__.py
@@ -9,7 +9,7 @@
 from collections import defaultdict
 from multiprocessing.pool import ThreadPool
 
-from modm_data.html.stmicro import DatasheetMicro, ReferenceManual, load_documents
+from modm_data.html.stmicro import DatasheetStm32, ReferenceManual, load_documents
 from modm_data.owl import Store
 from modm_data.py2owl.stmicro import owl_from_doc
 
@@ -25,7 +25,7 @@ def main():
         for name, versions in load_documents().items():
             # always use latest version for now
             doc = list(versions.values())[-1]
-            if isinstance(doc, DatasheetMicro):
+            if isinstance(doc, DatasheetStm32):
                 docs.append(doc)
             elif isinstance(doc, ReferenceManual):
                 docs.append(doc)
@@ -40,7 +40,7 @@ def main():
 
     path = Path(args.document).absolute()
     if path.stem.startswith("DS"):
-        doc = DatasheetMicro(path)
+        doc = DatasheetStm32(path)
     elif path.stem.startswith("RM"):
         doc = ReferenceManual(path)
 
diff --git a/src/modm_data/html2svd/stmicro/__init__.py b/src/modm_data/html2svd/stmicro/__init__.py
index 22e4fbf..ee08e88 100644
--- a/src/modm_data/html2svd/stmicro/__init__.py
+++ b/src/modm_data/html2svd/stmicro/__init__.py
@@ -2,3 +2,4 @@
 # SPDX-License-Identifier: MPL-2.0
 
 from .reference import memory_map_from_reference_manual
+from .datasheet import memory_map_from_datasheet
diff --git a/src/modm_data/html2svd/stmicro/__main__.py b/src/modm_data/html2svd/stmicro/__main__.py
index 80347d1..9f2270c 100644
--- a/src/modm_data/html2svd/stmicro/__main__.py
+++ b/src/modm_data/html2svd/stmicro/__main__.py
@@ -8,8 +8,8 @@
 from pathlib import Path
 from multiprocessing.pool import ThreadPool
 
-from modm_data.html.stmicro import ReferenceManual, load_documents
-from modm_data.html2svd.stmicro import memory_map_from_reference_manual
+from modm_data.html.stmicro import ReferenceManual, DatasheetSensor, load_documents
+from modm_data.html2svd.stmicro import memory_map_from_reference_manual, memory_map_from_datasheet
 from modm_data.svd import format_svd, write_svd
 from modm_data.utils import ext_path
 from anytree import RenderTree
@@ -17,7 +17,8 @@
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--document", type=str, default="")
+    parser.add_argument("--stm32", type=Path)
+    parser.add_argument("--sensor", type=Path)
     parser.add_argument("--all", action="store_true", default=False)
     args = parser.parse_args()
 
@@ -30,7 +31,7 @@ def main():
                 docs.append(doc)
 
         Path("log/stmicro/svd").mkdir(exist_ok=True, parents=True)
-        calls = [f"python3 -m modm_data.html2svd.stmicro --document {doc.path} "
+        calls = [f"python3 -m modm_data.html2svd.stmicro --stm32 {doc.path} "
                  f"> log/stmicro/svd/html_{doc.name}.txt 2>&1" for doc in docs]
         with ThreadPool() as pool:
             retvals = list(tqdm.tqdm(pool.imap(lambda c: subprocess.run(c, shell=True), calls), total=len(calls)))
@@ -38,12 +39,17 @@ def main():
             if retval.returncode != 0: print(call)
         return all(r.returncode == 0 for r in retvals)
 
-    path = Path(args.document).absolute()
-    doc = ReferenceManual(path)
+    if args.stm32:
+        doc = ReferenceManual(args.stm32.absolute())
+    elif args.sensor:
+        doc = DatasheetSensor(args.sensor.absolute())
     print(doc.path_pdf.relative_to(Path().cwd()),
           doc.path.relative_to(Path().cwd()))
 
-    mmaptrees = memory_map_from_reference_manual(doc)
+    if args.stm32:
+        mmaptrees = memory_map_from_reference_manual(doc)
+    elif args.sensor:
+        mmaptrees = memory_map_from_datasheet(doc)
     for mmaptree in mmaptrees:
         print(RenderTree(mmaptree, maxlevel=2))
         svd = format_svd(mmaptree)
diff --git a/src/modm_data/html2svd/stmicro/datasheet.py b/src/modm_data/html2svd/stmicro/datasheet.py
new file mode 100644
index 0000000..35e93f4
--- /dev/null
+++ b/src/modm_data/html2svd/stmicro/datasheet.py
@@ -0,0 +1,379 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+import re
+from functools import cached_property
+from collections import defaultdict
+from anytree import RenderTree
+
+from ...html.stmicro.helper import split_device_filter
+from ...svd import *
+from ...header2svd.stmicro.tree import _normalize_order
+from ...cubemx import cubemx_device_list
+from ...html import replace as html_replace
+
+
+def _deduplicate_bit_fields(bit_fields):
+    named_fields = defaultdict(set)
+    for field in sorted(bit_fields, key=lambda f: f.position):
+        named_fields[field.name].add(field.position)
+
+    new_fields = []
+    for name, positions in named_fields.items():
+        position = min(positions)
+        width = max(positions) + 1 - position
+        new_fields.append(BitField(name, position, width))
+
+    return new_fields
+
+
+def _peripheral_map_to_tree(chapter, peripheral_maps):
+    cap_replace = {"STM32F415/417xx": "STM32F415/417"}
+
+    peripheral_trees = []
+    for caption, (heading, register_map) in peripheral_maps.items():
+        print(caption)
+        if match := re.search(f"OTG_[FH]S", caption):
+            replace_name = peripheral_name = "OTG"
+        elif match := re.search(f"JPEG", caption):
+            replace_name = peripheral_name = "JPEG"
+        elif match := re.search(f"CCU ", caption):
+            peripheral_name = "CANCCU"
+            replace_name = "FDCAN_CCU"
+        else:
+            peripheral_names = {n.split("_")[0] for n in register_map.keys()}
+            replace_name = peripheral_name = list(sorted(peripheral_names))[-1]
+            if all(p.startswith("COMP") for p in peripheral_names):
+                peripheral_name = "COMP"
+                replace_name = ""
+            if all(p.startswith("OPAMP") for p in peripheral_names):
+                peripheral_name = "OPAMP"
+                replace_name = ""
+            elif len(peripheral_names) > 1:
+                print(f"Multiple peripheral names detected: {peripheral_names}")
+
+        if peripheral_name == "M7": continue
+        # Some chapters have multiple tables for multiple instances
+        filters = defaultdict(set)
+        instances = set()
+        if peripheral_name.startswith("LPTIM"):
+            replace_name = peripheral_name = "LPTIM"
+        elif peripheral_name.startswith("DLYB"):
+            instances.add("DLYB")
+        elif peripheral_name.startswith("TIM"):
+            peripheral_name = "TIM"
+            if match := re.search(r"TIM(\d+) +to +TIM(\d+)", caption):
+                irange = list(sorted([int(match.group(1)), int(match.group(2))]))
+                irange = range(irange[0], irange[1] + 1)
+                instances.add(f"TIM({'|'.join(map(str, irange))})")
+            for pfilter in re.findall(r"TIM\d+(?:/\d+)*", caption):
+                if "/" in pfilter:
+                    pfilter = f"TIM({pfilter[3:].replace('/', '|')})"
+                instances.add(f"^{pfilter}$")
+        elif "GPIOx" in peripheral_name:
+            peripheral_name = "GPIO"
+            for pfilter in re.findall(r"GPIO[A-Z](?:[/A-Z]+]+)?", caption):
+                if "/" in pfilter:
+                    pfilter = f"GPIO({pfilter[4:].replace('/', '|')})"
+                instances.add(pfilter)
+        if instances:
+            filters["instances"].update(instances)
+
+        devices = set()
+        for pfilter in re.findall(r"STM32[\w/]+", html_replace(caption, **cap_replace)):
+            devices.update(split_device_filter(pfilter) if "/" in pfilter else [pfilter])
+        if devices:
+            filters["devices"].update(d.replace("x", ".") for d in devices)
+
+        if "connectivity line" in chapter.name:
+            filters["devices"].add("STM32F10[57]")
+        elif "low medium high and xl density" in chapter.name:
+            filters["devices"].add("STM32F10[123]")
+
+        peripheral_type = PeripheralType(peripheral_name, _chapter=chapter,
+                                         filters=dict(filters), section=heading)
+        for rname, (offset, bitfields) in register_map.items():
+            filters = {}
+            if replace_name:
+                if replace_name == "OTG" and (match := re.match("^OTG_[FH]S", rname)):
+                    filters["instances"] = {match.group(0)}
+                    nrname = rname.replace(match.group(0) + "_", "")
+                else:
+                    nrname = rname.replace(replace_name + "_", "")
+                if len(rname) == len(nrname) and "_" in rname:
+                    instance = rname.split("_")[0]
+                    filters["instances"] = {instance+"$"}
+                    nrname = rname.replace(instance + "_", "")
+                    print(instance, nrname)
+                rname = nrname
+            if match := re.match("(.*?)connectivitylinedevices", rname):
+                rname = match.group(1)
+                filters["devices"] = {r"STM32F10[57]"}
+            elif match := re.match("(.*?)low,medium,highandXLdensitydevices", rname):
+                rname = match.group(1)
+                filters["devices"] = {r"STM32F10[123]"}
+            try: offset = int(offset, 16)
+            except: pass
+            register_type = Register(rname, offset, filters=filters, parent=peripheral_type)
+            fields = [BitField(field, bit) for bit, field in bitfields.items()]
+            register_type.children = _deduplicate_bit_fields(fields)
+
+        peripheral_trees.append(peripheral_type)
+
+    return peripheral_trees
+
+
+def _expand_register_offsets(peripheral_trees):
+    for peripheral in peripheral_trees:
+        unexpanded = defaultdict(list)
+        for register in peripheral.children:
+            if (isinstance(register.offset, str) or
+                ("CAN" in peripheral.name and "F1R2" in register.name) or
+                ("GFXMMU" in peripheral.name and "LUT0L" in register.name) or
+                ("GFXMMU" in peripheral.name and "LUT0H" in register.name) or
+                ("HSEM" in peripheral.name and "R1" in register.name)):
+                unexpanded[str(register.offset)].append(register)
+        for offsets, registers in unexpanded.items():
+            print(offsets, registers)
+
+            conv = lambda i: int(i, 16)
+            # if match := re.search(r"x=([\d,]+)", registers[0].name):
+            #     offsets = [offsets] * len(match.group(1).split(","))
+            if any(pat in offsets for pat in ["x=", "channelnumber"]):
+                if matches := re.findall(r"(0x[\dA-Fa-f]+)\(x=\w+\)", offsets):
+                    orange = enumerate(map(conv, matches))
+                    formula = "x"
+                elif "channelnumber" in offsets:
+                    orange = enumerate(range(0, 16))
+                    formula = offsets.replace("channelnumber", "x")
+                elif "moni-ringunitnumber" in offsets:
+                    orange = [(i, i) for i in range(1, 6)]
+                    formula = offsets.split("(x=")[0]
+                else:
+                    match = re.search(r"\(x=(\d+)(?:-\.?|\.\.)(\d+)", offsets)
+                    orange = [(i, i) for i in range(int(match.group(1)), int(match.group(2)) + 1)]
+                    formula = re.split(r"\(x=|,", offsets)[0]
+                offsets = [(ii, eval(formula, None, {"x": x})) for ii, x in orange]
+                print(formula, offsets, orange)
+            elif "-" in offsets:
+                omin, omax = list(map(conv, offsets.split("-")))
+                offsets = enumerate(range(omin, omax+1, 4))
+            elif "or" in offsets:
+                offsets = enumerate(list(map(conv, offsets.split("or"))))
+            elif "F1R2" in registers[0].name:
+                offsets = enumerate(range(int(offsets), int(offsets)+4*25*2+1, 4))
+            elif "LUT0" in registers[0].name:
+                offsets = enumerate(range(int(offsets), int(offsets)+4*2044+1, 8))
+            elif "HSEM" in peripheral.name:
+                print(offsets)
+                offsets = enumerate(range(int(offsets), int(offsets)+4*29+1, 4))
+            else:
+                print(f"Unknown expansion format for {offsets}!")
+                return False
+
+            fields = registers[0].children
+            if all(re.match(r"BKP\d+R", r.name) for r in registers):
+                name_template = lambda i: f"BKP{i}R"
+            elif "SAI" in peripheral.name:
+                name_template = lambda i: f"{registers[0].name[1:]}{chr(i+ord('A'))}"
+            elif "HRTIM" in peripheral.name:
+                name_template = lambda i: registers[0].name.replace("x", chr(i+ord('A')))
+            elif "CAN" in peripheral.name:
+                name_template = lambda i: f"F{(i+3)//2}R{(i+1)%2+1}"
+            elif "GFXMMU" in peripheral.name:
+                name_template = lambda i: f"LUT{i}{registers[0].name[-1]}"
+            elif "HSEM" in peripheral.name:
+                name_template = lambda i: f"{registers[0].name[:-1]}{i+1}"
+            elif len(registers) == 1:
+                # if "x=" in registers[0].name:
+                #     name_template = lambda i: f"{registers[0].name.split('x=')[0]}.{i}"
+                if "x" in registers[0].name:
+                    name_template = lambda i: registers[0].name.replace("x", str(i))
+                else:
+                    name_template = lambda i: f"{registers[0].name}.{i}"
+            else:
+                print(f"Unknown expansion pattern for {registers}!")
+                return False
+
+            for ii, offset in offsets:
+                nreg = Register(name_template(ii), offset, filters=registers[0].filters, parent=peripheral)
+                nreg.children = [BitField(f.name, f.position, f.width) for f in fields]
+            for register in registers:
+                register.parent = None
+
+    return True
+
+
+def _link_instance_to_type(ds, peripheral_types, instance_offsets):
+    cap_replace = {}
+    peripherals = set()
+    for caption, locations in ds.peripherals.items():
+        filters = defaultdict(set)
+        devices = set()
+        for pfilter in re.findall(r"STM32[\w/]+", html_replace(caption, **cap_replace)):
+            devices.update(split_device_filter(pfilter) if "/" in pfilter else [pfilter])
+        if "Low and medium-density device" in caption:
+            devices.add("STM32F10..[468B]")
+        elif "High-density device" in caption:
+            devices.add("STM32F10..[CDE]")
+        if devices:
+            filters["devices"].update(d.replace("x", ".") for d in devices)
+
+        for (names, amin, amax, bus, sections) in locations:
+            for name in names:
+                ptypes = [t for tname, types in peripheral_types.items() for t in types if tname == name]
+                if not ptypes:
+                    ptypes = [t for tname, types in peripheral_types.items() for t in types if tname in name]
+                if not ptypes:
+                    ptypes = [t for tname, types in peripheral_types.items()
+                              for t in types if t.section in sections]
+                if not ptypes and name.startswith("UART"):
+                    ptypes = [t for tname, types in peripheral_types.items() for t in types if tname == "USART"]
+                if not ptypes and "BKP" == name:
+                    ptypes = [t for tname, types in peripheral_types.items() for t in types if tname == "RTC"]
+                if not ptypes:
+                    print(f"Cannot find peripheral type for instance {name} in section {sections}!")
+                    nsections = list(sorted({t.section for types in peripheral_types.values() for t in types}))
+                    print(f"Available sections are {nsections}.")
+                    exit(1)
+                offsets = [v for k, v in instance_offsets.items() if re.search(k, name)]
+                if offsets: amin += offsets[0]
+                p = Peripheral(name, ptypes, amin, filters=dict(filters), sections=sections)
+                peripherals.add(p)
+    return peripherals
+
+
+def _resolve_filters(filters, **kw):
+    keys = []
+    for key, value in kw.items():
+        if values := filters.get(key):
+            keys.append(key)
+            if any(re.search(pat, value, flags=re.IGNORECASE) for pat in values):
+                return True
+    return not keys
+
+
+def _normalize_instances(memtree, peripherals, device):
+    for peripheral in peripherals:
+        if not _resolve_filters(peripheral.filters, devices=device.string):
+            continue
+        ptypes = peripheral.type
+        if len(ptypes) > 1:
+            ptypes = [ptype for ptype in sorted(peripheral.type, key=lambda p: -len(p.filters))
+                      if _resolve_filters(ptype.filters, instances=peripheral.name, devices=device.string)]
+            if len(ptypes) > 1 and any(p.filters for p in ptypes):
+                ptypes = [p for p in ptypes if p.filters]
+            if len(ptypes) > 1:
+                nptypes = [p for p in ptypes if any(p.section.startswith(per) or per.startswith(p.section)
+                                                    for per in peripheral.sections)]
+                if nptypes: ptypes = nptypes
+            for pname in ["DMAMUX", "BDMA", "OCTOSPI"]:
+                if len(ptypes) > 1 and pname in peripheral.name:
+                    ptypes = [p for p in ptypes if pname in p.name]
+
+        if len(ptypes) != 1:
+            print(f"Unknown peripheral type {device} {peripheral} {ptypes}!")
+            continue
+        ptype = ptypes[0]
+
+        nper = Peripheral(peripheral.name, ptype, peripheral.address,
+                          filters=peripheral.filters, parent=memtree)
+        rmap = defaultdict(list)
+        for treg in ptype.children:
+            rmap[treg.name].append(treg)
+
+        for name, tregs in rmap.items():
+            regs = [reg for reg in sorted(tregs, key=lambda p: -len(p.filters))
+                    if _resolve_filters(reg.filters, instances=peripheral.name, devices=device.string)]
+            if len(regs) > 1 and any(r.filters for r in regs):
+                regs = [r for r in regs if r.filters]
+            if len(regs) != 1:
+                if len(regs) > 1:
+                    print(f"Unsuccessful register filtering {peripheral.name} {device}: {tregs}!")
+                continue
+            treg = regs[0]
+            if _resolve_filters(treg.filters, devices=device.string, instances=nper.name):
+                preg = Register(treg.name, offset=treg.offset, width=treg.width,
+                                filters=treg.filters, parent=nper)
+                for tbit in treg.children:
+                    BitField(tbit.name, tbit.position, tbit.width, parent=preg)
+
+
+def _build_device_trees(ds, peripheral_types, instance_offsets):
+    devices = ds.filter_devices(modm_device_list())
+    memtrees = []
+
+    for device in devices:
+        memtree = Device(device)
+        peripherals = _link_instance_to_type(ds, peripheral_types, instance_offsets)
+        _normalize_instances(memtree, peripherals, device)
+        memtrees.append(memtree)
+    return memtrees
+
+
+def _compactify_device_trees(memtrees):
+    memtree_hashes = defaultdict(list)
+    for memtree in memtrees:
+        memtree_hashes[hash(memtree)].append(memtree)
+
+    new_memtrees = []
+    for memtrees in memtree_hashes.values():
+        memtree = memtrees[0]
+        for mtree in memtrees[1:]:
+            memtree.compatible.extend(mtree.compatible)
+        memtree.compatible.sort(key=lambda d: d.string)
+        memtree.name = memtree.compatible[0]
+        new_memtrees.append(memtree)
+
+    return new_memtrees
+
+
+def memory_map_from_datasheet(ds):
+    register = ds.chapter(r"chapter +\d+ +register +mapping")
+    table = register.tables("register")[0]
+    print(table)
+    registers = {}
+    for row in table.cell_rows():
+        cname = row.match_value("name")[0].text()
+        ctype = row.match_value("type")[0].text()
+        caddr = row.match_value(r"address.*?hex")[0].text()
+        cvalue = row.match_value(r"default")[0].text()
+        ccomment = row.match_value(r"comment")[0].text()
+        if not ctype: continue
+        cvalue = int(cvalue, 2) if cvalue.isdigit() else None
+        print(cname, ctype, int(caddr, 16), cvalue, ccomment)
+
+
+
+
+
+    exit(1)
+
+    peripheral_types = defaultdict(set)
+    instance_offsets = {}
+    for chapter in all_chapters:
+        print()
+        peripheral_maps, peripheral_offsets = ds.peripheral_maps(chapter, assert_table=chapter in type_chapters)
+        instance_offsets.update(peripheral_offsets)
+        peripheral_maps = _peripheral_map_to_tree(chapter, peripheral_maps)
+        if not _expand_register_offsets(peripheral_maps):
+            exit(1)
+        for pmap in peripheral_maps:
+            print(pmap)
+            # print(RenderTree(pmap, maxlevel=2))
+            peripheral_types[pmap.name].add(pmap)
+
+    for name, pmaps in peripheral_types.items():
+        print(name)
+        for pmap in pmaps:
+            print(pmap.section, pmap._chapter._relpath)
+            print(RenderTree(pmap, maxlevel=2))
+
+
+    memtrees = _build_device_trees(ds, peripheral_types, instance_offsets)
+    # for tree in memtrees:
+    #     print(RenderTree(tree, maxlevel=2))
+    #     exit(1)
+    memtrees = _compactify_device_trees(memtrees)
+    memtrees = [_normalize_order(memtree) for memtree in memtrees]
+    return memtrees
diff --git a/src/modm_data/pdf/__init__.py b/src/modm_data/pdf/__init__.py
index ed2f441..aa8d6b4 100644
--- a/src/modm_data/pdf/__init__.py
+++ b/src/modm_data/pdf/__init__.py
@@ -16,5 +16,6 @@
 from .page import Page
 from .character import Character
 from .link import ObjLink, WebLink
-from .graphics import Path, Image
+from .path import Path
+from .image import Image
 from .render import render_page_pdf
diff --git a/src/modm_data/pdf/document.py b/src/modm_data/pdf/document.py
index 00c7c4d..58917af 100644
--- a/src/modm_data/pdf/document.py
+++ b/src/modm_data/pdf/document.py
@@ -21,7 +21,7 @@
 from collections import defaultdict
 from .page import Page
 
-LOGGER = logging.getLogger(__name__)
+_LOGGER = logging.getLogger(__name__)
 
 
 # We cannot monkey patch this class, since it's a named tuple. :-(
@@ -48,11 +48,11 @@ def __init__(self, path: Path, autoclose: bool = False):
         """
         path = Path(path)
         self.name: str = path.stem
-        super().__init__(path, autoclose=autoclose)
         """Stem of the document file name"""
+        super().__init__(path, autoclose=autoclose)
         self._path = path
         self._bbox_cache = defaultdict(dict)
-        LOGGER.debug(f"Loading: {path}")
+        _LOGGER.debug(f"Loading: {path}")
 
     @cached_property
     def metadata(self) -> dict[str, str]:
@@ -84,7 +84,7 @@ def toc(self) -> list[pp.PdfOutlineItem]:
             outline = _OutlineItem(toc.level, toc.title, toc.is_closed,
                                    toc.n_kids, toc.page_index or last_page_index,
                                    toc.view_mode, toc.view_pos)
-            last_page_index = toc.page_index
+            last_page_index = toc.page_index or last_page_index
             tocs.add(outline)
         return list(sorted(list(tocs), key=lambda o: (o.page_index, o.level, o.title)))
 
diff --git a/src/modm_data/pdf/image.py b/src/modm_data/pdf/image.py
new file mode 100644
index 0000000..24a4041
--- /dev/null
+++ b/src/modm_data/pdf/image.py
@@ -0,0 +1,86 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+"""
+# PDF Images
+
+Images support bitmap data.
+"""
+
+from functools import cached_property
+import pypdfium2 as pp
+from ..utils import Point, Rectangle, Line
+
+
+class Image(pp.PdfImage):
+    """
+    This class extends `pypdfium2.PdfImage` to align it with the interface of
+    the `Path` class so that it can be used in the same
+    algorithms without filtering.
+
+    You must construct the images by calling `modm_data.pdf.page.Page.images`.
+
+    .. note:: Images are currently ignored.
+    """
+    # Overwrite the PdfPageObject.__new__ function
+    def __new__(cls, *args, **kwargs):
+        return object.__new__(cls)
+
+    def __init__(self, obj):
+        """
+        :param obj: Page object of the image.
+        """
+        super().__init__(obj.raw, obj.page, obj.pdf, obj.level)
+        assert pp.raw.FPDFPageObj_GetType(obj.raw) == pp.raw.FPDF_PAGEOBJ_IMAGE
+        self.type = pp.raw.FPDF_PAGEOBJ_IMAGE
+
+        self.count: int = 4
+        """Number of segments. Always 4 due to rectangular image form.
+           (For compatibility with `Path.count`.)"""
+        self.stroke: int = 0
+        """The border stroke color. Always 0.
+           (For compatibility with `Path.stroke`.)"""
+        self.fill: int = 0
+        """The image fill color. Always 0.
+           (For compatibility with `Path.fill`.)"""
+        self.width: float = 0
+        """The border line width. Always 0.
+           (For compatibility with `Path.width`.)"""
+
+    @cached_property
+    def matrix(self) -> pp.PdfMatrix:
+        """The transformation matrix."""
+        return self.get_matrix()
+
+    @cached_property
+    def bbox(self) -> Rectangle:
+        """The bounding box of the image."""
+        bbox = Rectangle(*self.get_pos())
+        if self.page.rotation:
+            bbox = Rectangle(bbox.p0.y, self.page.height - bbox.p1.x,
+                             bbox.p1.y, self.page.height - bbox.p0.x)
+        return bbox
+
+    @cached_property
+    def points(self) -> list[Point]:
+        """
+        The 4 points of the bounding box.
+        (For compatibility with `Path.points`.)
+        """
+        points = self.bbox.points
+        if self.page.rotation:
+            points = [Point(p.y, self.page.height - p.x, p.type) for p in points]
+        return points
+
+    @cached_property
+    def lines(self) -> list[Line]:
+        """
+        The 4 lines of the bounding box.
+        (For compatibility with `Path.lines`.)
+        """
+        p = self.points
+        return [Line(p[0], p[1], p[1].type, 0), Line(p[1], p[2], p[2].type, 0),
+                Line(p[2], p[3], p[3].type, 0), Line(p[3], p[0], p[0].type, 0)]
+
+    def __repr__(self) -> str:
+        return f"I{self.bbox}"
diff --git a/src/modm_data/pdf/page.py b/src/modm_data/pdf/page.py
index 3d86f1d..2beb50c 100644
--- a/src/modm_data/pdf/page.py
+++ b/src/modm_data/pdf/page.py
@@ -19,10 +19,11 @@
 from ..utils import Rectangle, Region
 from .character import Character
 from .link import ObjLink, WebLink
-from .graphics import Path, Image
+from .path import Path
+from .image import Image
 from .structure import Structure
 
-LOGGER = logging.getLogger(__name__)
+_LOGGER = logging.getLogger(__name__)
 
 
 class Page(pp.PdfPage):
@@ -46,7 +47,7 @@ def __init__(self, document: "modm_data.pdf.Document", index: int):
         self._weblinks = None
         self._linked = False
 
-        LOGGER.debug(f"Loading: {index}")
+        _LOGGER.debug(f"Loading: {index}")
 
         self._text = self.get_textpage()
         self._linkpage = pp.raw.FPDFLink_LoadWebLinks(self._text)
@@ -177,9 +178,8 @@ def images(self) -> list[Image]:
         """All images."""
         return [Image(o) for o in self.get_objects([pp.raw.FPDF_PAGEOBJ_IMAGE])]
 
-    def graphic_clusters(self, predicate: Callable[[Path|Image], bool] = None,
-                         absolute_tolerance: float = None) -> \
-                                            list[tuple[Rectangle, list[Path]]]:
+    def graphic_clusters(self, predicate: Callable[[Path | Image], bool] = None,
+                         absolute_tolerance: float = None) -> list[tuple[Rectangle, list[Path]]]:
         if absolute_tolerance is None:
             absolute_tolerance = min(self.width, self.height) * 0.01
 
@@ -287,4 +287,4 @@ def _key(char):
                 bbox = bbox.rotated(-self.rotation - char._rotation).translated(char.origin)
                 char._bbox = bbox
             elif char.unicode not in {0x20, 0xa, 0xd}:
-                LOGGER.debug(f"Unable to fix bbox for {char.descr()}!")
+                _LOGGER.debug(f"Unable to fix bbox for {char.descr()}!")
diff --git a/src/modm_data/pdf/graphics.py b/src/modm_data/pdf/path.py
similarity index 66%
rename from src/modm_data/pdf/graphics.py
rename to src/modm_data/pdf/path.py
index aca3f32..bf59f28 100644
--- a/src/modm_data/pdf/graphics.py
+++ b/src/modm_data/pdf/path.py
@@ -7,8 +7,6 @@
 PDF uses a subset of the PostScript graphics language, which draws vector paths
 with various rendering options. We are only interested in the basic properties,
 in particular, for recognizing table cell borders.
-
-In addition, images support bitmap data.
 """
 
 import ctypes
@@ -148,77 +146,3 @@ def lines(self) -> list[Line]:
     def __repr__(self) -> str:
         points = ",".join(repr(p) for p in self.points)
         return f"P{self.count}={points}"
-
-
-class Image(pp.PdfImage):
-    """
-    This class extends `pypdfium2.PdfImage` to align it with the interface of
-    the `Path` class so that it can be used in the same
-    algorithms without filtering.
-
-    You must construct the images by calling `modm_data.pdf.page.Page.images`.
-
-    .. note:: Images are currently ignored.
-    """
-    # Overwrite the PdfPageObject.__new__ function
-    def __new__(cls, *args, **kwargs):
-        return object.__new__(cls)
-
-    def __init__(self, obj):
-        """
-        :param obj: Page object of the image.
-        """
-        super().__init__(obj.raw, obj.page, obj.pdf, obj.level)
-        assert pp.raw.FPDFPageObj_GetType(obj.raw) == pp.raw.FPDF_PAGEOBJ_IMAGE
-        self.type = pp.raw.FPDF_PAGEOBJ_IMAGE
-
-        self.count: int = 4
-        """Number of segments. Always 4 due to rectangular image form.
-           (For compatibility with `Path.count`.)"""
-        self.stroke: int = 0
-        """The border stroke color. Always 0.
-           (For compatibility with `Path.stroke`.)"""
-        self.fill: int = 0
-        """The image fill color. Always 0.
-           (For compatibility with `Path.fill`.)"""
-        self.width: float = 0
-        """The border line width. Always 0.
-           (For compatibility with `Path.width`.)"""
-
-    @cached_property
-    def matrix(self) -> pp.PdfMatrix:
-        """The transformation matrix."""
-        return self.get_matrix()
-
-    @cached_property
-    def bbox(self) -> Rectangle:
-        """The bounding box of the image."""
-        bbox = Rectangle(*self.get_pos())
-        if self.page.rotation:
-            bbox = Rectangle(bbox.p0.y, self.page.height - bbox.p1.x,
-                             bbox.p1.y, self.page.height - bbox.p0.x)
-        return bbox
-
-    @cached_property
-    def points(self) -> list[Point]:
-        """
-        The 4 points of the bounding box.
-        (For compatibility with `Path.points`.)
-        """
-        points = self.bbox.points
-        if self.page.rotation:
-            points = [Point(p.y, self.page.height - p.x, p.type) for p in points]
-        return points
-
-    @cached_property
-    def lines(self) -> list[Line]:
-        """
-        The 4 lines of the bounding box.
-        (For compatibility with `Path.lines`.)
-        """
-        p = self.points
-        return [Line(p[0], p[1], p[1].type, 0), Line(p[1], p[2], p[2].type, 0),
-                Line(p[2], p[3], p[3].type, 0), Line(p[3], p[0], p[0].type, 0)]
-
-    def __repr__(self) -> str:
-        return f"I{self.bbox}"
diff --git a/src/modm_data/pdf2html/__init__.py b/src/modm_data/pdf2html/__init__.py
index bf28123..c272980 100644
--- a/src/modm_data/pdf2html/__init__.py
+++ b/src/modm_data/pdf2html/__init__.py
@@ -7,5 +7,5 @@
 
 from . import stmicro
 from .render import render_page_pdf
-from .line import CharCluster, CharLine
-from .figure import Figure
+from .convert import convert, patch
+from .html import format_document, write_html
diff --git a/src/modm_data/pdf2html/stmicro/ast.py b/src/modm_data/pdf2html/ast.py
similarity index 51%
rename from src/modm_data/pdf2html/stmicro/ast.py
rename to src/modm_data/pdf2html/ast.py
index 226c0c9..ee252c4 100644
--- a/src/modm_data/pdf2html/stmicro/ast.py
+++ b/src/modm_data/pdf2html/ast.py
@@ -2,17 +2,16 @@
 # SPDX-License-Identifier: MPL-2.0
 
 import logging
-from lxml import etree
 import anytree
-from anytree import RenderTree
+from anytree import RenderTree, Node
 from collections import defaultdict
-from ...utils import list_strip, Rectangle, ReversePreOrderIter
+from ..utils import Rectangle, ReversePreOrderIter
 from .table import VirtualTable, TableCell
 
-LOGGER = logging.getLogger(__name__)
+_LOGGER = logging.getLogger(__name__)
 
 
-def _normalize_area(area):
+def _normalize_area(area: Node) -> Node:
     for child in ReversePreOrderIter(area):
         if child.name.startswith("list"):
             # We need to normalize the xpos back to the first character
@@ -24,13 +23,13 @@ def _normalize_area(area):
     return area
 
 
-def merge_area(document, area, debug=False):
+def merge_area(document: Node, area: Node, debug: bool = False) -> Node:
     if document is None:
-        document = anytree.Node("document", xpos=0, _page=area.page, _doc=area.page.pdf, _end=None)
+        document = Node("document", xpos=0, _page=area.page, _doc=area.page.pdf, _end=None)
         document._end = document
     if not area.children:
         return document
-    if debug: print()
+    if debug: _LOGGER.debug()
 
     def _find_end(node):
         # Find the last leaf node but skip lines, paragraphs, captions/tables/figures
@@ -43,7 +42,7 @@ def _find_ancestor(filter_):
                      if filter_(c)), document.root)
 
     area = _normalize_area(area)
-    if debug: print(RenderTree(area))
+    if debug: _LOGGER.debug(RenderTree(area))
     children = area.children
     # All area nodes up to the next top-level element must now be
     # xpos-aligned with the previous area's last leaf node
@@ -51,7 +50,7 @@ def _find_ancestor(filter_):
                           if c.name.startswith("head")), len(children))
     x_em = area.page._spacing["x_em"]
 
-    if debug: print("area=", area, "connect_index=", connect_index)
+    if debug: _LOGGER.debug("area=", area, "connect_index=", connect_index)
     # Align these children with the last leaf node xpos
     for child in children[:connect_index]:
         if any(child.name.startswith(name) for name in {"list"}):
@@ -68,10 +67,10 @@ def _find_ancestor(filter_):
         child.parent = host
         document._end = _find_end(document)
         if debug:
-            print("child=", child)
-            print("host=", host)
-            print("end=", document._end)
-            print()
+            _LOGGER.debug(f"{child=}", )
+            _LOGGER.debug(f"{host=}")
+            _LOGGER.debug(f"end={document._end}")
+            _LOGGER.debug()
 
     # Add the remaining top-level children to connect index node
     if connect_index < len(children):
@@ -82,19 +81,19 @@ def _find_ancestor(filter_):
     document._end = _find_end(document)
 
     if debug:
-        print()
-        print()
+        _LOGGER.debug()
+        _LOGGER.debug()
 
     return document
 
 
-def _normalize_lists(node):
+def normalize_lists(node: Node) -> Node:
     lists = []
     current = []
     current_name = None
     for child in node.children:
         # Normalize the lists from the leaves up
-        _normalize_lists(child)
+        normalize_lists(child)
         # then split the children based on their names
         if current_name is None or child.name == current_name:
             current.append(child)
@@ -110,7 +109,7 @@ def _normalize_lists(node):
     for llist in lists:
         # Insert a new list group node and redirect all children to it
         if llist[0].name.startswith("list"):
-            nlist = anytree.Node(llist[0].name, obj=llist[0].obj,
+            nlist = Node(llist[0].name, obj=llist[0].obj,
                                  start=llist[0].value, xpos=llist[0].xpos)
             for lnode in llist:
                 lnode.name = "element"
@@ -125,7 +124,7 @@ def _normalize_lists(node):
     return node
 
 
-def _normalize_paragraphs(document):
+def normalize_paragraphs(document: Node) -> Node:
     paras = anytree.search.findall(document, filter_=lambda n: n.name == "para")
     parents = set(p.parent for p in paras if p.parent.name in {"element", "caption", "document", "cell"})
     for parent in parents:
@@ -144,17 +143,17 @@ def _normalize_paragraphs(document):
     return document
 
 
-def _normalize_lines(document):
+def normalize_lines(document: Node) -> Node:
     paras = anytree.search.findall(document, filter_=lambda n: n.name == "para")
     for para in paras:
-        text = anytree.Node("text")
+        text = Node("text")
         for line in para.children:
             line.parent = text
         para.children = [text]
     return document
 
 
-def _normalize_captions(document):
+def normalize_captions(document: Node) -> Node:
     captions = anytree.search.findall(document, filter_=lambda n: n.name == "caption")
     for caption in captions:
         cindex = caption.parent.children.index(caption)
@@ -165,12 +164,12 @@ def _normalize_captions(document):
                 sibling.number = caption.number
                 break
         else:
-            LOGGER.error(f"Discarding caption {caption}!\n{RenderTree(caption)}")
+            _LOGGER.error(f"Discarding caption {caption}!\n{RenderTree(caption)}")
             caption.parent = None
     return document
 
 
-def _normalize_headings(document):
+def normalize_headings(document: Node) -> Node:
     headings = anytree.search.findall(document, filter_=lambda n: n.name.startswith("head"))
     for heading in headings:
         para = heading.children[0]
@@ -185,7 +184,7 @@ def _normalize_headings(document):
     return document
 
 
-def _normalize_registers(document):
+def normalize_registers(document: Node) -> Node:
     bits_list = []
     sections = anytree.search.findall(document, filter_=lambda n: n.name == "section")
     for section in (sections + (document,)):
@@ -195,7 +194,7 @@ def _normalize_registers(document):
             if child.name == "bit":
                 # Insert a new bits group node and redirect all children to it
                 if bits is None or bits._page != child._page:
-                    bits = anytree.Node("table", xpos=child.xpos, obj=None,
+                    bits = Node("table", xpos=child.xpos, obj=None,
                                         _type="bits", _width=1, _page=child._page)
                     new_children.append(bits)
                     bits_list.append(bits)
@@ -229,7 +228,7 @@ def _normalize_registers(document):
     return document
 
 
-def _normalize_tables(document):
+def normalize_tables(document: Node) -> Node:
     content_tables = defaultdict(list)
     register_tables = []
     bits_tables = []
@@ -298,7 +297,7 @@ def _push():
     return document
 
 
-def _normalize_chapters(document) -> list:
+def normalize_chapters(document: Node) -> Node:
     headings = anytree.search.findall(document, filter_=lambda n: n.name in ["head1", "head2"], maxlevel=3)
     idxs = [document.children.index(h.parent) for h in headings] + [len(document.children)]
     if idxs[0] != 0:
@@ -321,300 +320,8 @@ def _normalize_chapters(document) -> list:
         chapters.append( (chapter_name, filename, document.children[idx0:idx1 + 1]) )
 
     for title, filename, nodes in chapters:
-        chapter = anytree.Node("chapter", title=title, _filename=filename, parent=document)
+        chapter = Node("chapter", title=title, _filename=filename, parent=document)
         for node in nodes:
             node.parent = chapter
 
     return document
-
-
-def normalize_document(document):
-    def _debug(func, indata, debug=0):
-        print(func.__name__[1:])
-        if debug == -1:
-            print(RenderTree(indata))
-            print()
-        outdata = func(indata)
-        if debug == 1:
-            print(RenderTree(outdata))
-            print()
-        return outdata
-
-    document = _debug(_normalize_lines, document)
-    document = _debug(_normalize_captions, document)
-    document = _debug(_normalize_lists, document)
-    document = _debug(_normalize_paragraphs, document)
-    document = _debug(_normalize_headings, document)
-    document = _debug(_normalize_registers, document)
-    document = _debug(_normalize_tables, document)
-    # document = _debug(_normalize_chapters, document)
-    return document
-
-
-def _format_html_figure(xmlnode, figurenode):
-    tnode = etree.Element("table")
-    tnode.set("width", f"{int(figurenode._width * 50)}%")
-    xmlnode.append(tnode)
-
-    captionnode = next((c for c in figurenode.children if c.name == "caption"), None)
-    if captionnode is not None:
-        tnode.set("id", f"figure{captionnode.number}")
-        caption = etree.Element("caption")
-        tnode.append(caption)
-        _format_html(caption, captionnode, with_newlines=True)
-
-    ynode = etree.Element("tr")
-    tnode.append(ynode)
-
-    xynode = etree.Element("td")
-    ynode.append(xynode)
-    xynode.text = "(omitted)"
-
-
-def _format_html_table(xmlnode, tablenode):
-    tnode = etree.Element("table")
-    xmlnode.append(tnode)
-    # Format the caption
-    captionnode = next((c for c in tablenode.children if c.name == "caption"), None)
-    if captionnode is not None:
-        tnode.set("id", f"table{captionnode.number}")
-        caption = etree.Element("caption")
-        tnode.append(caption)
-        _format_html(caption, captionnode, with_newlines=True)
-    if tablenode.obj._type == "register":
-        tnode.set("class", "rt")
-    if tablenode.obj._type == "bitfield":
-        tnode.set("class", "bt")
-
-    # Cells are ordered (y, x) positions
-    ypos = -1
-    ynode = None
-    header_rows = tablenode.obj.header_rows
-    for cell in tablenode.obj.cells:
-        # Add another row to the table
-        if ypos != cell.y or ynode is None:
-            ypos = cell.y
-            ynode = etree.Element("tr")
-            tnode.append(ynode)
-
-        # Add the right cell with spans and style
-        xynodespan = xynode = etree.Element("th" if cell.is_header else "td")
-        ynode.append(xynode)
-        if cell.xspan > 1:
-            xynode.set("colspan", str(cell.xspan))
-        if cell.yspan > 1:
-            xynode.set("rowspan", str(cell.yspan))
-        if not cell.rotation and tablenode.obj._type != "register" and cell.left_aligned:
-            xynode.set("class", "tl")
-        if cell.rotation:
-            xynodespan = etree.Element("span")
-            xynodespan.set("class", "tv")
-            xynode.append(xynodespan)
-        if (cell.y + cell.yspan) == header_rows:
-            if cl := xynode.get("class"):
-                xynode.set("class", "thb " + cl)
-            else:
-                xynode.set("class", "thb")
-
-        if cell._is_simple:
-            xynodespan.text = cell.content.strip()
-        else:
-            cell_doc = anytree.Node("document", _page=cell.ast.page)
-            cell.ast.parent = cell_doc
-            cell_doc = _normalize_lines(cell_doc)
-            cell_doc = _normalize_lists(cell_doc)
-            cell_doc = _normalize_paragraphs(cell_doc)
-            # print(RenderTree(cell_doc))
-            _format_html(xynodespan, cell_doc, with_newlines=True,
-                         ignore_formatting={"bold"} if cell.is_header else None)
-
-
-def _format_char(node, state, chars, ignore):
-    NOFMT = {
-        "superscript": False,
-        "subscript": False,
-        "italic": False,
-        "bold": False,
-        "underline": False,
-    }
-    if state is None: state = NOFMT
-    char = chars[0]
-    if char["char"] in {'\r'}:
-        return (True, node, state)
-
-    # print(node, state, char["char"])
-    diffs = {}
-    for key in NOFMT:
-        if state[key] != char[key] and key not in ignore:
-            diffs[key] = char[key]
-    # if diffs: print(diffs)
-    if not diffs:
-        prev_name = node.children[-1].name if node.children else None
-        # print(node)
-        if prev_name != "newline" and char["char"] == '\n':
-            # if not (prev_name == "chars" and node.children[-1].chars[-1] == " "):
-            anytree.Node("newline", parent=node)
-        elif prev_name != "chars":
-            anytree.Node("chars", parent=node, chars=char["char"])
-        else:
-            node.children[-1].chars += char["char"]
-        return (True, node, state)
-    else:
-        disable = [key for key, value in diffs.items() if not value]
-        if disable:
-            state[node.name] = False
-            return (False, node.parent, state)
-        else:
-            enable = [key for key, value in diffs.items() if value][0]
-            fmtnode = anytree.Node(enable, parent=node)
-            state[enable] = True
-            return (False, fmtnode, state)
-
-
-def _format_lines(textnode, ignore, with_newlines, with_start):
-    char_props = textnode.root._page._char_properties
-    formatn = anytree.Node("format")
-    chars = []
-    for line in textnode.children:
-        if line.name == "line":
-            for char in line.obj.chars[0 if with_start else line.start:]:
-                if not with_newlines and char.unicode in {0xa, 0xd}:
-                    continue
-                chars.append(char_props(line.obj, char))
-            if with_newlines and chars[-1]["char"] not in {'\n'}:
-                char = char_props(line.obj, line.obj.chars[-1])
-                char["char"] = '\n'
-                chars.append(char)
-
-    chars = list_strip(chars, lambda c: c["char"] in {' ', '\n'})
-    state = None
-    node = formatn
-    while chars:
-        popchar, node, state = _format_char(node, state, chars, ignore)
-        if popchar: chars.pop(0)
-    return formatn
-
-
-def _format_html_fmt(xmlnode, treenode, tail=False):
-    CONV = {
-        "superscript": "sup",
-        "subscript": "sub",
-        "italic": "i",
-        "bold": "b",
-        "underline": "u",
-        "newline": "br",
-    }
-    # print(xmlnode, treenode)
-    if treenode.name == "chars":
-        # print(f"{'tail' if tail else 'text'} char={treenode.chars}")
-        if tail:
-            xmlnode.tail = (xmlnode.tail or "") + treenode.chars
-        else:
-            xmlnode.text = (xmlnode.text or "") + treenode.chars
-        return (tail, xmlnode)
-    else:
-        # print(f"sub {treenode.name}")
-        if tail: xmlnode = xmlnode.getparent()
-        subnode = etree.SubElement(xmlnode, CONV[treenode.name])
-        tail = False
-        iternode = subnode
-        for child in treenode.children:
-            tail, iternode = _format_html_fmt(iternode, child, tail)
-        return (True, subnode)
-
-
-def _format_html_text(xmlnode, treenode, ignore=None, with_newlines=False, with_start=True):
-    fmttree = _format_lines(treenode, ignore or set(), with_newlines, with_start)
-    tail = False
-    fmtnode = xmlnode
-    for child in fmttree.children:
-        tail, fmtnode = _format_html_fmt(fmtnode, child, tail)
-
-    # print(RenderTree(fmttree))
-    # print(etree.tostring(xmlnode, pretty_print=True).decode("utf-8"))
-
-
-def _format_html(xmlnode, treenode, ignore_formatting=None,
-                 with_newlines=False, with_start=True):
-    if ignore_formatting is None:
-        ignore_formatting = set()
-    # print(xmlnode, treenode.name)
-    current = xmlnode
-    if treenode.name.startswith("head"):
-        current = etree.Element(f"h{treenode.name[4]}")
-        if treenode.marker:
-            current.set("id", f"section{treenode.marker}")
-        xmlnode.append(current)
-        ignore_formatting = ignore_formatting | {"bold", "italic", "underline"}
-
-    elif treenode.name in {"para"}:
-        current = etree.Element("p")
-        xmlnode.append(current)
-
-    elif treenode.name in {"note"}:
-        current = etree.Element("div")
-        current.set("class", "nt")
-        xmlnode.append(current)
-
-    elif treenode.name == "text":
-        _format_html_text(xmlnode, treenode, ignore_formatting, with_newlines, with_start)
-
-    elif treenode.name == "page":
-        if not current.get("id"):
-            current.set("id", f"page{treenode.number}")
-        print(f"{treenode.number}.", end="", flush=True)
-        return
-
-    elif treenode.name == "table":
-        _format_html_table(xmlnode, treenode)
-        return
-
-    elif treenode.name == "figure":
-        _format_html_figure(xmlnode, treenode)
-        return
-
-    elif treenode.name == "bits":
-        _format_html_bits(xmlnode, treenode)
-        return
-
-    elif treenode.name.startswith("list"):
-        if treenode.name[4] in {"b", "s"}:
-            current = etree.Element("ul")
-        else:
-            current = etree.Element("ol")
-        xmlnode.append(current)
-
-    elif treenode.name == "element":
-        current = etree.Element("li")
-        if xmlnode.tag == "ol":
-            current.set("value", str(treenode.value))
-        xmlnode.append(current)
-        with_start = False
-
-    for child in treenode.children:
-        _format_html(current, child, ignore_formatting, with_newlines, with_start)
-
-
-def format_document(document):
-    html = etree.Element("html")
-
-    head = etree.Element("head")
-    html.append(head)
-
-    link = etree.Element("link")
-    link.set("rel", "stylesheet")
-    link.set("href", "../style.css")
-    head.append(link)
-
-    body = etree.Element("body")
-    html.append(body)
-
-    _format_html(body, document, with_newlines=True)
-
-    html = etree.ElementTree(html)
-    return html
-
-
-def write_html(html, path, pretty=True):
-    with open(path, "wb") as f:
-        html.write(f, pretty_print=pretty, doctype="<!DOCTYPE html>")
diff --git a/src/modm_data/pdf2html/cell.py b/src/modm_data/pdf2html/cell.py
new file mode 100644
index 0000000..2c051eb
--- /dev/null
+++ b/src/modm_data/pdf2html/cell.py
@@ -0,0 +1,125 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+from functools import cached_property
+from anytree import Node
+from ..utils import Rectangle
+from .line import CharLine
+
+
+class TableCell:
+    class Borders:
+        """The four borders of a Cell"""
+        def __init__(self, l, b, r, t):
+            self.l = l
+            self.b = b
+            self.r = r
+            self.t = t
+
+    def __init__(self, table, position, bbox, borders, is_simple=False):
+        self._table = table
+        self._bboxes = [bbox]
+        self.b = borders
+        """Borders of the cell"""
+        self.positions = [position]
+        """Index positions of the cell"""
+        self.is_header = False
+        """Is this cell a header?"""
+        self._is_simple = is_simple
+
+    def _merge(self, other):
+        self.positions.extend(other.positions)
+        self.positions.sort()
+        self._bboxes.append(other.bbox)
+        self._invalidate()
+
+    def _move(self, x, y):
+        self.positions = [(py + y, px + x) for (py, px) in self.positions]
+        self.positions.sort()
+        self._invalidate()
+
+    def _expand(self, dx, dy):
+        ymax, xmax = self.positions[-1]
+        for yi in range(ymax, ymax + dy + 1):
+            for xi in range(xmax, xmax + dx + 1):
+                self.positions.append((yi, xi))
+        self.positions.sort()
+        self._invalidate()
+
+    def _invalidate(self):
+        for key, value in self.__class__.__dict__.items():
+            if isinstance(value, cached_property):
+                self.__dict__.pop(key, None)
+
+    @cached_property
+    def x(self) -> int:
+        """The horizontal position of the cell."""
+        return self.positions[0][1]
+
+    @cached_property
+    def y(self) -> int:
+        """The vertical position of the cell."""
+        return self.positions[0][0]
+
+    @cached_property
+    def xspan(self) -> int:
+        """The horizontal span of the cell."""
+        return self.positions[-1][1] - self.positions[0][1] + 1
+
+    @cached_property
+    def yspan(self) -> int:
+        """The vertical span of the cell."""
+        return self.positions[-1][0] - self.positions[0][0] + 1
+
+    @cached_property
+    def rotation(self) -> int:
+        """The rotation of the cell text."""
+        if not self.lines: return 0
+        return self.lines[0].rotation
+
+    @cached_property
+    def bbox(self) -> Rectangle:
+        """The tight bounding box of this cell."""
+        return Rectangle(min(bbox.left   for bbox in self._bboxes),
+                         min(bbox.bottom for bbox in self._bboxes),
+                         max(bbox.right  for bbox in self._bboxes),
+                         max(bbox.top    for bbox in self._bboxes))
+
+    @cached_property
+    def lines(self) -> list[CharLine]:
+        """The character lines in this cell."""
+        return self._table._page.charlines_in_area(self.bbox)
+
+    @cached_property
+    def content(self):
+        """The concatenated text content of the table cell."""
+        return "".join(c.char for line in self.lines for c in line.chars)
+
+    @cached_property
+    def is_left_aligned(self) -> bool:
+        """Is the text in the cell left aligned?"""
+        x_em = self._table._page._spacing["x_em"]
+        for line in self.lines:
+            if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right):
+                return True
+        return False
+
+    @cached_property
+    def ast(self) -> Node:
+        """The abstract syntax tree of the cell without graphics."""
+        ast = self._table._page.ast_in_area(self.bbox, with_graphics=False,
+                                            ignore_xpos=not self.is_left_aligned,
+                                            with_bits=False, with_notes=False)
+        ast.name = "cell"
+        return ast
+
+    def __repr__(self) -> str:
+        positions = ",".join(f"({p[1]},{p[0]})" for p in self.positions)
+        borders = ""
+        if self.b.l: borders += "["
+        if self.b.b: borders += "_"
+        if self.b.t: borders += "^"
+        if self.b.r: borders += "]"
+        start = "CellH" if self.is_header else "Cell"
+        return start + f"[{positions}] {borders}"
+
diff --git a/src/modm_data/pdf2html/stmicro/convert.py b/src/modm_data/pdf2html/convert.py
similarity index 81%
rename from src/modm_data/pdf2html/stmicro/convert.py
rename to src/modm_data/pdf2html/convert.py
index 1f5ed3b..62504f7 100644
--- a/src/modm_data/pdf2html/stmicro/convert.py
+++ b/src/modm_data/pdf2html/convert.py
@@ -3,10 +3,11 @@
 
 from anytree import RenderTree
 
-from .ast import merge_area, normalize_document
-from .ast import format_document, write_html
-from ..render import render_page_pdf
-from ...utils import pkg_apply_patch, pkg_file_exists
+from .html import format_document, write_html
+from .render import render_page_pdf
+from ..utils import pkg_apply_patch, pkg_file_exists
+from .ast import merge_area
+from pathlib import Path
 import pypdfium2 as pp
 import subprocess
 
@@ -19,7 +20,7 @@ def convert(doc, page_range, output_path, format_chapters=False, pretty=True,
     debug_doc = None
     debug_index = 0
     for page in doc.pages(page_range):
-        if not render_all and any(c in page.top for c in {"Contents", "List of ", "Index"}):
+        if not render_all and not page.is_relevant:
             continue
         print(f"\n\n=== {page.top} #{page.number} ===\n")
 
@@ -50,7 +51,7 @@ def convert(doc, page_range, output_path, format_chapters=False, pretty=True,
             print("No pages parsed, empty document!")
             return True
 
-        document = normalize_document(document)
+        document = doc._normalize(document)
         if show_tree:
             print(RenderTree(document))
 
@@ -72,15 +73,14 @@ def convert(doc, page_range, output_path, format_chapters=False, pretty=True,
     return True
 
 
-def patch(doc, output_path, patch_file=None) -> bool:
+def patch(doc, data_module, output_path: Path, patch_file: Path = None) -> bool:
     if patch_file is None:
-        from . import data
         # First try the patch file for the specific version
         patch_file = f"{doc.name}.patch"
-        if not pkg_file_exists(data, patch_file):
+        if not pkg_file_exists(data_module, patch_file):
             # Then try the patch file shared between versions
             patch_file = f"{doc.name.split('-')[0]}.patch"
-            if not pkg_file_exists(data, patch_file):
+            if not pkg_file_exists(data_module, patch_file):
                 return True
-        return pkg_apply_patch(data, patch_file, output_path)
+        return pkg_apply_patch(data_module, patch_file, output_path)
     return apply_patch(patch_file, output_path)
diff --git a/src/modm_data/pdf2html/html.py b/src/modm_data/pdf2html/html.py
new file mode 100644
index 0000000..8db89a8
--- /dev/null
+++ b/src/modm_data/pdf2html/html.py
@@ -0,0 +1,279 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+import logging
+from lxml import etree
+import anytree
+from anytree import RenderTree
+from ..utils import list_strip
+from .ast import normalize_lines, normalize_lists, normalize_paragraphs
+
+_LOGGER = logging.getLogger(__name__)
+
+def _format_html_figure(xmlnode, figurenode):
+    tnode = etree.Element("table")
+    tnode.set("width", f"{int(figurenode._width * 50)}%")
+    xmlnode.append(tnode)
+
+    captionnode = next((c for c in figurenode.children if c.name == "caption"), None)
+    if captionnode is not None:
+        tnode.set("id", f"figure{captionnode.number}")
+        caption = etree.Element("caption")
+        tnode.append(caption)
+        _format_html(caption, captionnode, with_newlines=True)
+
+    ynode = etree.Element("tr")
+    tnode.append(ynode)
+
+    xynode = etree.Element("td")
+    ynode.append(xynode)
+    xynode.text = "(omitted)"
+
+
+def _format_html_table(xmlnode, tablenode):
+    tnode = etree.Element("table")
+    xmlnode.append(tnode)
+    # Format the caption
+    captionnode = next((c for c in tablenode.children if c.name == "caption"), None)
+    if captionnode is not None:
+        tnode.set("id", f"table{captionnode.number}")
+        caption = etree.Element("caption")
+        tnode.append(caption)
+        _format_html(caption, captionnode, with_newlines=True)
+    if tablenode.obj._type == "register":
+        tnode.set("class", "rt")
+    if tablenode.obj._type == "bitfield":
+        tnode.set("class", "bt")
+
+    # Cells are ordered (y, x) positions
+    ypos = -1
+    ynode = None
+    header_rows = tablenode.obj.header_rows
+    for cell in tablenode.obj.cells:
+        # Add another row to the table
+        if ypos != cell.y or ynode is None:
+            ypos = cell.y
+            ynode = etree.Element("tr")
+            tnode.append(ynode)
+
+        # Add the right cell with spans and style
+        xynodespan = xynode = etree.Element("th" if cell.is_header else "td")
+        ynode.append(xynode)
+        if cell.xspan > 1:
+            xynode.set("colspan", str(cell.xspan))
+        if cell.yspan > 1:
+            xynode.set("rowspan", str(cell.yspan))
+        if not cell.rotation and tablenode.obj._type != "register" and cell.is_left_aligned:
+            xynode.set("class", "tl")
+        if cell.rotation:
+            xynodespan = etree.Element("span")
+            xynodespan.set("class", "tv")
+            xynode.append(xynodespan)
+        if (cell.y + cell.yspan) == header_rows:
+            if cl := xynode.get("class"):
+                xynode.set("class", "thb " + cl)
+            else:
+                xynode.set("class", "thb")
+
+        if cell._is_simple:
+            xynodespan.text = cell.content.strip()
+        else:
+            cell_doc = anytree.Node("document", _page=cell.ast.page)
+            cell.ast.parent = cell_doc
+            cell_doc = normalize_lines(cell_doc)
+            cell_doc = normalize_lists(cell_doc)
+            cell_doc = normalize_paragraphs(cell_doc)
+            # _LOGGER.debug(RenderTree(cell_doc))
+            _format_html(xynodespan, cell_doc, with_newlines=True,
+                         ignore_formatting={"bold"} if cell.is_header else None)
+
+
+def _format_char(node, state, chars, ignore):
+    NOFMT = {
+        "superscript": False,
+        "subscript": False,
+        "italic": False,
+        "bold": False,
+        "underline": False,
+    }
+    if state is None: state = NOFMT
+    char = chars[0]
+    if char["char"] in {'\r'}:
+        return (True, node, state)
+
+    # print(node, state, char["char"])
+    diffs = {}
+    for key in NOFMT:
+        if state[key] != char[key] and key not in ignore:
+            diffs[key] = char[key]
+    # if diffs: print(diffs)
+    if not diffs:
+        prev_name = node.children[-1].name if node.children else None
+        # print(node)
+        if prev_name != "newline" and char["char"] == '\n':
+            # if not (prev_name == "chars" and node.children[-1].chars[-1] == " "):
+            anytree.Node("newline", parent=node)
+        elif prev_name != "chars":
+            anytree.Node("chars", parent=node, chars=char["char"])
+        else:
+            node.children[-1].chars += char["char"]
+        return (True, node, state)
+    else:
+        disable = [key for key, value in diffs.items() if not value]
+        if disable:
+            state[node.name] = False
+            return (False, node.parent, state)
+        else:
+            enable = [key for key, value in diffs.items() if value][0]
+            fmtnode = anytree.Node(enable, parent=node)
+            state[enable] = True
+            return (False, fmtnode, state)
+
+
+def _format_lines(textnode, ignore, with_newlines, with_start):
+    char_props = textnode.root._page._char_properties
+    formatn = anytree.Node("format")
+    chars = []
+    for line in textnode.children:
+        if line.name == "line":
+            for char in line.obj.chars[0 if with_start else line.start:]:
+                if not with_newlines and char.unicode in {0xa, 0xd}:
+                    continue
+                chars.append(char_props(line.obj, char))
+            if with_newlines and chars[-1]["char"] not in {'\n'}:
+                char = char_props(line.obj, line.obj.chars[-1])
+                char["char"] = '\n'
+                chars.append(char)
+
+    chars = list_strip(chars, lambda c: c["char"] in {' ', '\n'})
+    state = None
+    node = formatn
+    while chars:
+        popchar, node, state = _format_char(node, state, chars, ignore)
+        if popchar: chars.pop(0)
+    return formatn
+
+
+def _format_html_fmt(xmlnode, treenode, tail=False):
+    CONV = {
+        "superscript": "sup",
+        "subscript": "sub",
+        "italic": "i",
+        "bold": "b",
+        "underline": "u",
+        "newline": "br",
+    }
+    # print(xmlnode, treenode)
+    if treenode.name == "chars":
+        # print(f"{'tail' if tail else 'text'} char={treenode.chars}")
+        if tail:
+            xmlnode.tail = (xmlnode.tail or "") + treenode.chars
+        else:
+            xmlnode.text = (xmlnode.text or "") + treenode.chars
+        return (tail, xmlnode)
+    else:
+        # print(f"sub {treenode.name}")
+        if tail: xmlnode = xmlnode.getparent()
+        subnode = etree.SubElement(xmlnode, CONV[treenode.name])
+        tail = False
+        iternode = subnode
+        for child in treenode.children:
+            tail, iternode = _format_html_fmt(iternode, child, tail)
+        return (True, subnode)
+
+
+def _format_html_text(xmlnode, treenode, ignore=None, with_newlines=False, with_start=True):
+    fmttree = _format_lines(treenode, ignore or set(), with_newlines, with_start)
+    tail = False
+    fmtnode = xmlnode
+    for child in fmttree.children:
+        tail, fmtnode = _format_html_fmt(fmtnode, child, tail)
+
+    # print(RenderTree(fmttree))
+    # print(etree.tostring(xmlnode, pretty_print=True).decode("utf-8"))
+
+
+def _format_html(xmlnode, treenode, ignore_formatting=None,
+                 with_newlines=False, with_start=True):
+    if ignore_formatting is None:
+        ignore_formatting = set()
+    # print(xmlnode, treenode.name)
+    current = xmlnode
+    if treenode.name.startswith("head"):
+        current = etree.Element(f"h{treenode.name[4]}")
+        if treenode.marker:
+            current.set("id", f"section{treenode.marker}")
+        xmlnode.append(current)
+        ignore_formatting = ignore_formatting | {"bold", "italic", "underline"}
+
+    elif treenode.name in {"para"}:
+        current = etree.Element("p")
+        xmlnode.append(current)
+
+    elif treenode.name in {"note"}:
+        current = etree.Element("div")
+        current.set("class", "nt")
+        xmlnode.append(current)
+
+    elif treenode.name == "text":
+        _format_html_text(xmlnode, treenode, ignore_formatting, with_newlines, with_start)
+
+    elif treenode.name == "page":
+        if not current.get("id"):
+            current.set("id", f"page{treenode.number}")
+        print(f"{treenode.number}.", end="", flush=True)
+        return
+
+    elif treenode.name == "table":
+        _format_html_table(xmlnode, treenode)
+        return
+
+    elif treenode.name == "figure":
+        _format_html_figure(xmlnode, treenode)
+        return
+
+    elif treenode.name == "bits":
+        _format_html_bits(xmlnode, treenode)
+        return
+
+    elif treenode.name.startswith("list"):
+        if treenode.name[4] in {"b", "s"}:
+            current = etree.Element("ul")
+        else:
+            current = etree.Element("ol")
+        xmlnode.append(current)
+
+    elif treenode.name == "element":
+        current = etree.Element("li")
+        if xmlnode.tag == "ol":
+            current.set("value", str(treenode.value))
+        xmlnode.append(current)
+        with_start = False
+
+    for child in treenode.children:
+        _format_html(current, child, ignore_formatting, with_newlines, with_start)
+
+
+def format_document(document):
+    html = etree.Element("html")
+
+    head = etree.Element("head")
+    html.append(head)
+
+    link = etree.Element("link")
+    link.set("rel", "stylesheet")
+    link.set("href", "../style.css")
+    head.append(link)
+
+    body = etree.Element("body")
+    html.append(body)
+
+    _format_html(body, document, with_newlines=True)
+
+    html = etree.ElementTree(html)
+    return html
+
+
+def write_html(html, path, pretty=True):
+    with open(path, "wb") as f:
+        html.write(f, pretty_print=pretty, doctype="<!DOCTYPE html>")
diff --git a/src/modm_data/pdf2html/line.py b/src/modm_data/pdf2html/line.py
index 5b0eb88..31d6e0e 100644
--- a/src/modm_data/pdf2html/line.py
+++ b/src/modm_data/pdf2html/line.py
@@ -3,6 +3,7 @@
 
 from functools import cached_property
 from ..utils import Rectangle
+from ..pdf import Character
 
 
 class CharCluster:
@@ -12,7 +13,7 @@ class CharCluster:
     character stream of the PDF page.
     """
 
-    def __init__(self, line, chars: list):
+    def __init__(self, line: "CharLine", chars: list[Character]):
         self._line = line
         self.chars = chars
 
@@ -49,16 +50,19 @@ def __init__(self, page, chars: list, bottom: float,
 
     @cached_property
     def bbox(self) -> Rectangle:
+        """Bounding box of the character line"""
         return Rectangle(min(c.bbox.left for c in self.chars),
                          min(c.bbox.bottom for c in self.chars),
                          max(c.bbox.right for c in self.chars),
                          max(c.bbox.top for c in self.chars))
 
     @cached_property
-    def fonts(self) -> set:
+    def fonts(self) -> set[str]:
+        """All font names in this character line"""
         return set(c.font for c in self.chars if c.font)
 
-    def contains_font(self, *fragments) -> bool:
+    def contains_font(self, *fragments: str) -> bool:
+        """:return: True if any fragment is part of the font names"""
         for fragment in fragments:
             if any(fragment in font for font in self.fonts):
                 return True
@@ -66,22 +70,23 @@ def contains_font(self, *fragments) -> bool:
 
     @cached_property
     def content(self) -> str:
+        """Text contained in the character line"""
         return "".join(c.char for c in self.chars)
 
-    def clusters(self, atol: float = None) -> list[CharCluster]:
-        # Find clusters of characters in a line incl. whitespace chars
+    def clusters(self, absolute_tolerance: float = None) -> list[CharCluster]:
+        """Find clusters of characters in a line separated by `absolute_tolerance`."""
         def _cluster(clusters, chars):
             if chars:
                 clusters.append(CharCluster(self, chars))
 
         # We want to group the chars if the space between them is > 1em
-        if atol is None:
-            atol = self._page._spacing["x_em"] * 1
+        if absolute_tolerance is None:
+            absolute_tolerance = self._page._spacing["x_em"] * 1
         clusters = []
         current_chars = [self.chars[0]]
         last_char = current_chars[0]
         for next_char in self.chars[1:]:
-            if next_char.bbox.left - last_char.bbox.right < atol:
+            if next_char.bbox.left - last_char.bbox.right < absolute_tolerance:
                 # Keep this char in the current cluster
                 current_chars.append(next_char)
                 if next_char.unicode not in {0x20, 0xa, 0xd}:
diff --git a/src/modm_data/pdf2html/page.py b/src/modm_data/pdf2html/page.py
new file mode 100644
index 0000000..33f687a
--- /dev/null
+++ b/src/modm_data/pdf2html/page.py
@@ -0,0 +1,380 @@
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
+
+import re
+import math
+import logging
+import textwrap
+import statistics
+from typing import Callable
+from functools import cached_property, cache, reduce
+from collections import defaultdict
+from .table import Table
+from .figure import Figure
+from .line import CharLine
+from ..utils import HLine, VLine, Rectangle, Region
+from ..pdf import Path, Image, Page as PdfPage, Character
+from anytree import Node
+
+
+_LOGGER = logging.getLogger(__name__)
+
+
+class Page(PdfPage):
+    def __init__(self, document, index: int):
+        super().__init__(document, index)
+        self._template = "default"
+        self.is_relevant: bool = True
+        """Is this page relevant for the conversion?"""
+
+    def _unicode_filter(self, code: int) -> int:
+        return code
+
+    @cached_property
+    def _spacing(self) -> dict[str, float]:
+        content = 0.1
+        return {
+            # Horizontal spacing: left->right
+            "x_em": 0.01 * self.width,
+            "x_left": content * self.width,
+            "x_right": (1 - content) * self.width,
+            "x_content": 0.2 * self.width,
+            # Vertical spacing: bottom->top
+            "y_em": 0.01 * self.height,
+            # Max table line thickness
+            "y_tline": 0.005 * self.height,
+            # Max line height distance to detect paragraphs
+            "lh": 0.9,
+            # Max line height distance to detect super-/subscript
+            "sc": 0.3,
+            # Table header cell bold text threshold
+            "th": 0.3,
+        }
+
+    def _line_size(self, line: CharLine) -> str:
+        rsize = line.height
+        if rsize >= 17.5: return "h1"
+        elif rsize >= 15.5: return "h2"
+        elif rsize >= 13.5: return "h3"
+        elif rsize >= 11.4: return "h4"
+        elif rsize >= 8.5: return "n"
+        else: return "fn"
+
+    def _colors(self, color: int) -> str:
+        if 0xff <= color <= 0xff: return "black"
+        if 0xffffffff <= color <= 0xffffffff: return "white"
+        return "unknown"
+
+    @cached_property
+    def _areas(self) -> dict[str, list[Rectangle] | Rectangle]:
+        content = Rectangle(0.1, 0.1, 0.9, 0.9)
+        areas = {"content": [content]}
+        scaled_areas = {}
+        def _s(r):
+            return Rectangle(r.left * self.width, r.bottom * self.height,
+                             r.right * self.width, r.top * self.height)
+        for name, area in areas.items():
+            scaled_areas[name] = [_s(r) for r in area] if isinstance(area, list) else _s(area)
+        return scaled_areas
+
+    def _char_properties(self, line, char):
+        cp = {
+            "superscript": False,
+            "subscript": False,
+            "bold": any(frag in char.font for frag in {"Bold"}),
+            "italic": any(frag in char.font for frag in {"Italic", "Oblique"}),
+            "underline": (char.objlink or char.weblink) is not None,
+            "size": round(line.height),
+            "relsize": self._line_size(line),
+            "char": chr(char.unicode),
+        }
+        if line.rotation:
+            if char.origin.x < (line.origin - 0.25 * line.height):
+                cp["superscript"] = True
+            elif char.origin.x > (line.origin + 0.15 * line.height):
+                cp["subscript"] = True
+        elif char.origin.y > (line.origin + 0.25 * line.height):
+            cp["superscript"] = True
+        elif char.origin.y < (line.origin - 0.15 * line.height):
+            cp["subscript"] = True
+        return cp
+
+    def text_in_named_area(self, name: str, check_length: bool = True) -> str | None:
+        """
+        Find all text in the named area.
+
+        :param name: the name of the area(s) to query.
+        :param check_length: assert that the text has a length.
+        :return: the concatenated text of the named area(s) or `None` if area not found.
+        """
+        if name not in self._areas: return None
+        text = ""
+        areas = self._areas[name]
+        if not isinstance(areas, list): areas = [areas]
+        for area in areas: text += self.text_in_area(area)
+        if check_length: assert text
+        return text
+
+    def charlines_in_area(self, area: Rectangle,
+                          predicate: Callable[[Character], bool] = None,
+                          rtol: float = None) -> list[CharLine]:
+        """
+        Coalesce the characters in the area and predicate into lines.
+
+        1. Every character in the area is filtered by the `predicate`.
+        2. Character orientation is split into horizontal (left->right) and
+           vertical (bottom->top) character lines sorted by x or y position.
+           Lines containing only whitespace are discarded.
+        3. Overlapping character lines are merged into sub- and superscript
+           using `rtol * max(current_line.height, next_line.height)` as the
+           tolerance for checking if the lines overlap.
+        4. The characters in the merged lines are re-sorted by origin.
+
+        :param area: Area to search for characters.
+        :param predicate: Function to discard characters in the area or include all by default.
+        :param rtol: Relative tolerance to separate lines vertically or use `sc` spacing by default.
+        :return: A list of character lines sorted by x or y position.
+        """
+        if rtol is None: rtol = self._spacing["sc"]
+        # Split all chars into lines based on rounded origin
+        origin_lines_y = defaultdict(list)
+        origin_lines_x = defaultdict(list)
+        for char in self.chars_in_area(area):
+            # Ignore all characters we don't want
+            if predicate is not None and not predicate(char):
+                continue
+            cunicode = self._unicode_filter(char.unicode)
+            if cunicode is None: continue
+            char.unicode = cunicode
+            if char.unicode < 32 and char.unicode not in {0xa}:
+                continue
+            # Ignore characters without width that are not spaces
+            if not char.width and char.unicode not in {0xa, 0xd, 0x20}:
+                _LOGGER.error(f"Unknown char width for {char}: {char.bbox}")
+            # Split up the chars depending on the orientation
+            if 45 < char.rotation <= 135 or 225 < char.rotation <= 315:
+                origin_lines_x[round(char.origin.x, 1)].append(char)
+            elif char.rotation <= 45 or 135 < char.rotation <= 225 or 315 < char.rotation:
+                origin_lines_y[round(char.origin.y, 1)].append(char)
+            else:
+                _LOGGER.error("Unknown char rotation:", char, char.rotation)
+
+        # Convert characters into lines
+        bbox_lines_y = []
+        for chars in origin_lines_y.values():
+            # Remove lines with whitespace only
+            if all(c.unicode in {0xa, 0xd, 0x20} for c in chars):
+                continue
+            origin = statistics.fmean(c.origin.y for c in chars)
+            line = CharLine(self, chars,
+                            min(c.bbox.bottom for c in chars),
+                            origin,
+                            max(c.bbox.top for c in chars),
+                            max(c.height for c in chars),
+                            sort_origin=self.height - origin)
+            bbox_lines_y.append(line)
+            # print(line, line.top, line.origin, line.bottom, line.height)
+        bbox_lines = sorted(bbox_lines_y, key=lambda l: l._sort_origin)
+
+        bbox_lines_x = []
+        for chars in origin_lines_x.values():
+            # Remove lines with whitespace only
+            if all(c.unicode in {0xa, 0xd, 0x20} for c in chars):
+                continue
+            line = CharLine(self, chars,
+                            min(c.bbox.left for c in chars),
+                            statistics.fmean(c.origin.x for c in chars),
+                            max(c.bbox.right for c in chars),
+                            max(c.width for c in chars),
+                            270 if sum(c.rotation for c in chars) <= 135 * len(chars) else 90)
+            bbox_lines_x.append(line)
+        bbox_lines += sorted(bbox_lines_x, key=lambda l: l._sort_origin)
+
+        if not bbox_lines:
+            return []
+
+        # Merge lines that have overlapping bbox_lines
+        # FIXME: This merges lines that "collide" vertically like in formulas
+        merged_lines = []
+        current_line = bbox_lines[0]
+        for next_line in bbox_lines[1:]:
+            height = max(current_line.height, next_line.height)
+            # Calculate overlap via normalize origin (increasing with line index)
+            if ((current_line._sort_origin + rtol * height) >
+                (next_line._sort_origin - rtol * height)):
+                # if line.rotation or self.rotation:
+                #     # The next line overlaps this one, we merge the shorter line
+                #     # (typically super- and subscript) into taller line
+                #     use_current = len(current_line.chars) >= len(next_line.chars)
+                # else:
+                use_current = current_line.height >= next_line.height
+                line = current_line if use_current else next_line
+                current_line = CharLine(self, current_line.chars + next_line.chars,
+                                        line.bottom, line.origin, line.top,
+                                        height, line.rotation,
+                                        sort_origin=line._sort_origin)
+            else:
+                # The next line does not overlap the current line
+                merged_lines.append(current_line)
+                current_line = next_line
+        # append last line
+        merged_lines.append(current_line)
+
+        # Sort all lines horizontally based on character origin
+        sorted_lines = []
+        for line in merged_lines:
+            if line.rotation == 90:
+                def sort_key(char):
+                    if char.unicode in {0xa, 0xd}:
+                        return char.tbbox.midpoint.y - 1e9
+                    return char.tbbox.midpoint.y
+            elif line.rotation == 270:
+                def sort_key(char):
+                    if char.unicode in {0xa, 0xd}:
+                        return -char.tbbox.midpoint.y + 1e9
+                    return -char.tbbox.midpoint.y
+            else:
+                def sort_key(char):
+                    if char.unicode in {0xa, 0xd}:
+                        return char.origin.x + 1e9
+                    return char.origin.x
+            sorted_lines.append(CharLine(self, sorted(line.chars, key=sort_key),
+                                         line.bottom, line.origin,
+                                         line.top, line.height,
+                                         line.rotation, area.left,
+                                         sort_origin=line._sort_origin))
+
+        return sorted_lines
+
+    def graphic_bboxes_in_area(self, area: Rectangle, with_graphics: bool = True) -> list[tuple[Rectangle, Table | Figure | None]]:
+        """
+        Coalesce the graphics in the area into full width bounding boxes.
+
+        1. Group vertically overlapping graphics.
+        2. Widen the overlapped graphics bounding boxes to the edges of the area.
+
+        :param area: area to search for content.
+        :param with_graphics: search for graphics in the area.
+        :return: list of tuples (bounding box, graphic objects or `None`).
+        """
+        if with_graphics:
+            graphics = self.graphics_in_area(area)
+            regions = []
+            # Check if graphics bounding boxes overlap vertically and group them
+            for graphic in sorted(graphics, key=lambda g: (-g.bbox.top, g.bbox.x)):
+                gbbox = graphic.bbox.joined(graphic.cbbox) if graphic.cbbox else graphic.bbox
+                for reg in regions:
+                    if reg.overlaps(gbbox.bottom, gbbox.top):
+                        # They overlap, so merge them
+                        reg.v0 = min(reg.v0, gbbox.bottom)
+                        reg.v1 = max(reg.v1, gbbox.top)
+                        reg.objs.append(graphic)
+                        break
+                else:
+                    regions.append(Region(gbbox.bottom, gbbox.top, graphic))
+
+            # print(regions)
+            # Coalesce all overlapped graphics objects into full width areas
+            areas = []
+            ypos = area.top
+            for reg in regions:
+                if ypos - reg.v1 > self._spacing["y_em"]:
+                    areas.append((Rectangle(area.left, reg.v1, area.right, ypos), None))
+                for obj in reg.objs:
+                    oarea = obj.bbox.joined(obj.cbbox) if obj.cbbox else obj.bbox
+                    areas.append((oarea, obj))
+                ypos = reg.v0
+            areas.append((Rectangle(area.left, area.bottom, area.right, ypos), None))
+        else:
+            areas = [(area, None)]
+        return areas
+
+    def objects_in_area(self, area: Rectangle, with_graphics: bool = True) -> list[CharLine | Table | Figure]:
+        """
+        Find all content objects in this area.
+
+        :param area: area to search for content.
+        :param with_graphics: search for graphics in the area.
+        :return: list of content objects sorted top to bottom.
+        """
+        self._link_characters()
+        areas = self.graphic_bboxes_in_area(area, with_graphics)
+        objects = []
+        for narea, obj in areas:
+            if obj is None:
+                objects += self.charlines_in_area(narea)
+            else:
+                oarea = obj.bbox.joined(obj.cbbox) if obj.cbbox else obj.bbox
+                predicate = lambda c: not obj.bbox.contains(c.origin)
+                lines = self.charlines_in_area(oarea, predicate)
+                # print(obj, oarea, lines, [line.content for line in lines])
+                objects += list(sorted(lines + [obj], key=lambda o: (-o.bbox.y, o.bbox.x)))
+        return objects
+
+    def graphics_in_area(self, area: Rectangle) -> list[Table | Figure]:
+        """
+        Find all tables and figures in this area.
+
+        :param area: area to search for graphics.
+        :return: list of tables and figures.
+        """
+        return []
+
+    def ast_in_area(self, area: Rectangle, with_graphics: bool = True) -> Node:
+        """
+        Convert the area content into an abstract syntax tree.
+
+        :param area: area to search for content.
+        :param with_graphics: including graphics in the area.
+        :return: An abstract syntax tree including the content formatting.
+        """
+        return Node("area", obj=area, xpos=int(area.left), page=self)
+
+    @property
+    def content_ast(self) -> list[Node]:
+        """The abstract syntax trees in the content area."""
+        ast = []
+        with_graphics = True
+        for area in self._areas["content"]:
+            ast.append(self.ast_in_area(area, with_graphics=with_graphics))
+        # Add a page node to the first leaf to keep track of where a page starts
+        first_leaf = next((n for n in iter(ast[0].descendants) if n.is_leaf), ast[0])
+        Node("page", parent=first_leaf, xpos=first_leaf.xpos, number=self.number)
+        return ast
+
+    @property
+    def content_objects(self) -> list[CharLine | Table | Figure]:
+        """All objects in the content areas."""
+        objs = []
+        for area in self._areas["content"]:
+            objs.extend(self.objects_in_area(area))
+        return objs
+
+    @property
+    def content_graphics(self) -> list[Table | Figure]:
+        """All graphics in the content areas."""
+        objs = []
+        for area in self._areas["content"]:
+            objs.extend(self.graphics_in_area(area))
+        return objs
+
+    @property
+    def content_lines(self) -> list[CharLine]:
+        """All lines in the content areas."""
+        objs = []
+        for area in self._areas["content"]:
+            objs.extend(self.charlines_in_area(area))
+        return objs
+
+    @property
+    def content_tables(self) -> list[Table]:
+        """All tables in the content areas."""
+        return [o for o in self.content_graphics if isinstance(o, Table)]
+
+    @property
+    def content_figures(self) -> list[Figure]:
+        """All figures in the content areas."""
+        return [o for o in self.content_graphics if isinstance(o, Figure)]
+
+    def __repr__(self) -> str:
+        return f"Page({self.number})"
diff --git a/src/modm_data/pdf2html/render.py b/src/modm_data/pdf2html/render.py
index 0717bd4..526eb61 100644
--- a/src/modm_data/pdf2html/render.py
+++ b/src/modm_data/pdf2html/render.py
@@ -11,7 +11,7 @@
 
 def render_page_pdf(doc, page, new_doc = None, index = 0):
     """
-    Test doc string
+
 
     :param doc: PDF document
     :param page: PDF page
diff --git a/src/modm_data/pdf2html/stmicro/__init__.py b/src/modm_data/pdf2html/stmicro/__init__.py
index 4adcde6..fd9ce58 100644
--- a/src/modm_data/pdf2html/stmicro/__init__.py
+++ b/src/modm_data/pdf2html/stmicro/__init__.py
@@ -1,7 +1,5 @@
 # Copyright 2022, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-from .page import Page, is_compatible
-from .ast import normalize_document, merge_area, format_document, write_html
-from .convert import convert, patch
+
 from .document import Document
diff --git a/src/modm_data/pdf2html/stmicro/__main__.py b/src/modm_data/pdf2html/stmicro/__main__.py
index 40d2ef0..208e2f6 100644
--- a/src/modm_data/pdf2html/stmicro/__main__.py
+++ b/src/modm_data/pdf2html/stmicro/__main__.py
@@ -3,15 +3,16 @@
 
 import re
 import tqdm
+import logging
 import argparse
 import subprocess
 from pathlib import Path
 from multiprocessing.pool import ThreadPool
 
-import modm_data
-from . import convert, patch
+from .. import convert, patch
 
 def main():
+    import modm_data
     parser = argparse.ArgumentParser()
     parser.add_argument("--document", type=Path)
     parser.add_argument("--output", type=str, default="")
@@ -25,12 +26,14 @@ def main():
     parser.add_argument("--chapters", action="store_true")
     parser.add_argument("--tags", action="store_true")
     parser.add_argument("--all", action="store_true")
+    parser.add_argument("-v", dest="verbose", action="count", default=0)
     args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
 
     doc = modm_data.pdf2html.stmicro.Document(args.document)
-    # if doc.page_count == 0 or not doc.page(1).width:
-    #     print("Corrupt PDF!")
-    #     exit(1)
+    if doc.page_count == 0 or not doc.page(1).width:
+        print("Corrupt PDF!")
+        exit(1)
 
     if args.page or args.range:
         page_range = list(map(lambda p: p - 1, args.page or []))
@@ -79,7 +82,8 @@ def main():
         for retval, call in zip(retvals, calls):
             if retval.returncode != 0: print(call)
         if all(r.returncode == 0 for r in retvals):
-            return patch(doc, output_dir)
+            from . import data
+            return patch(doc, data, output_dir)
         return False
 
     return convert(doc, page_range, output_path, format_chapters=args.chapters,
diff --git a/src/modm_data/pdf2html/stmicro/document.py b/src/modm_data/pdf2html/stmicro/document.py
index fdecf9b..3931033 100644
--- a/src/modm_data/pdf2html/stmicro/document.py
+++ b/src/modm_data/pdf2html/stmicro/document.py
@@ -1,13 +1,44 @@
 # Copyright 2023, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
+import logging
 from .page import Page as StmPage
 from ...pdf import Document as PdfDocument
+from ..ast import normalize_lines, normalize_captions, normalize_lists
+from ..ast import normalize_paragraphs, normalize_headings, normalize_registers
+from ..ast import normalize_tables, normalize_chapters
+
+_LOGGER = logging.getLogger(__name__)
+
+def _debug(func, indata, debug=0):
+    _LOGGER.debug(func.__name__)
+    if debug == -1:
+        _LOGGER.debug(RenderTree(indata))
+        _LOGGER.debug()
+    outdata = func(indata)
+    if debug == 1:
+        _LOGGER.debug(RenderTree(outdata))
+        _LOGGER.debug()
+    return outdata
+
+
+def _normalize_document(document):
+    document = _debug(normalize_lines, document)
+    document = _debug(normalize_captions, document)
+    document = _debug(normalize_lists, document)
+    document = _debug(normalize_paragraphs, document)
+    document = _debug(normalize_headings, document)
+    document = _debug(normalize_registers, document)
+    document = _debug(normalize_tables, document)
+    # document = _debug(normalize_chapters, document)
+    return document
+
 
 
 class Document(PdfDocument):
     def __init__(self, path: str):
         super().__init__(path)
+        self._normalize = _normalize_document
 
     def page(self, index: int) -> StmPage:
         assert index < self.page_count
diff --git a/src/modm_data/pdf2html/stmicro/page.py b/src/modm_data/pdf2html/stmicro/page.py
index b56b56e..68cec3c 100644
--- a/src/modm_data/pdf2html/stmicro/page.py
+++ b/src/modm_data/pdf2html/stmicro/page.py
@@ -8,15 +8,17 @@
 import statistics
 from functools import cached_property, cache, reduce
 from collections import defaultdict
-from .table import Table
+from ..table import Table
 from ..figure import Figure
 from ..line import CharLine
 from ...utils import HLine, VLine, Rectangle, Region
 from ...pdf import Path, Image, Page as PdfPage
+from ..page import Page as BasePage
 from anytree import Node
 
 
-LOGGER = logging.getLogger(__name__)
+_LOGGER = logging.getLogger(__name__)
+
 
 def is_compatible(document) -> bool:
     if "stmicro" in document.metadata.get("Author", "").lower():
@@ -24,7 +26,7 @@ def is_compatible(document) -> bool:
     return False
 
 
-def areas_black_white(page) -> dict:
+def _areas_black_white(page) -> dict:
     def _scale(r):
         if page.rotation:
             return Rectangle(r.bottom * page.width, (1 - r.right) * page.height,
@@ -94,7 +96,7 @@ def _scale(r):
     return scaled_areas
 
 
-def areas_blue_gray(page) -> dict:
+def _areas_blue_gray(page) -> dict:
     def _scale(r):
         return Rectangle(r.left * page.width, r.bottom * page.height,
                          r.right * page.width, r.top * page.height)
@@ -146,7 +148,7 @@ def _scale(r):
     return scaled_areas
 
 
-def spacing_black_white(page) -> dict:
+def _spacing_black_white(page) -> dict:
     content = 0.1125
     spacing = {
         # Horizontal spacing: left->right
@@ -177,10 +179,10 @@ def spacing_black_white(page) -> dict:
             "lh": 1.2,
             "sc": 0.4,
         })
-    return spacing
+    return spacing | _spacing_special(page)
 
 
-def spacing_blue_gray(page) -> dict:
+def _spacing_blue_gray(page) -> dict:
     content = 0.07
     spacing = {
         # Horizontal spacing: left->right
@@ -210,10 +212,25 @@ def spacing_blue_gray(page) -> dict:
             "lh": 1.6,
             "sc": 0.2,
         })
-    return spacing
+    return spacing | _spacing_special(page)
+
+
+def _spacing_special(page) -> dict:
+    # Patches to detect the header cells correctly
+    if ((page.pdf.name == "DS12930-v1" and page.index in range(90, 106)) or
+        (page.pdf.name == "DS12931-v1" and page.index in range(89, 105))):
+        return {"th": 0.1}
+    if ((page.pdf.name == "RM0453-v2" and page.index in [1354]) or
+        (page.pdf.name == "RM0456-v2" and page.index in [2881]) or
+        (page.pdf.name == "RM0456-v3" and page.index in [2880]) or
+        (page.pdf.name == "RM0461-v4" and page.index in [1246])):
+        return {"th": 0.5}
+    if ((page.pdf.name == "RM0456-v2" and page.index in [3005])):
+        return {"th": 0.52}
+    return {}
 
 
-def linesize_black_white(line: float) -> str:
+def _linesize_black_white(line: CharLine) -> str:
     rsize = line.height
     if rsize >= 17.5: return "h1"
     elif rsize >= 15.5: return "h2"
@@ -223,7 +240,7 @@ def linesize_black_white(line: float) -> str:
     else: return "fn"
 
 
-def linesize_blue_gray(line: float) -> str:
+def _linesize_blue_gray(line: CharLine) -> str:
     rsize = round(line.height)
     if rsize >= 16: return "h1"
     elif rsize >= 14: return "h2"
@@ -233,7 +250,7 @@ def linesize_blue_gray(line: float) -> str:
     else: return "fn"
 
 
-def colors_black_white(color: int) -> str:
+def _colors_black_white(color: int) -> str:
     if 0xff <= color <= 0xff:
         return "black"
     if 0xffffffff <= color <= 0xffffffff:
@@ -241,7 +258,7 @@ def colors_black_white(color: int) -> str:
     return "unknown"
 
 
-def colors_blue_gray(color: int) -> str:
+def _colors_blue_gray(color: int) -> str:
     if 0xff <= color <= 0xff:
         return "black"
     if 0xffffffff <= color <= 0xffffffff:
@@ -257,230 +274,53 @@ def colors_blue_gray(color: int) -> str:
     return "unknown"
 
 
-class Page(PdfPage):
-
+class Page(BasePage):
     def __init__(self, document, index: int):
         super().__init__(document, index)
-        self._template = "black_white"
         producer = self.pdf.metadata.get("Producer", "").lower()
-        if "acrobat" in producer:
-            pass # default
+        self._template = "black_white"
+        if "acrobat" in producer or "adobe" in producer:
+            pass
         elif "antenna" in producer:
             self._template = "blue_gray"
         else:
-            LOGGER.error(f"Unknown page template! Defaulting to Black/White template. '{producer}'")
+            _LOGGER.error(f"Unknown page template! Defaulting to Black/White template. '{producer}'")
 
         if "blue_gray" in self._template:
-            self._areas = areas_blue_gray(self)
-            self._spacing = spacing_blue_gray(self)
-            self._colors = colors_blue_gray
-            self._line_size = linesize_blue_gray
+            self._areas = _areas_blue_gray(self)
+            self._spacing = _spacing_blue_gray(self)
+            self._colors = _colors_blue_gray
+            self._line_size = _linesize_blue_gray
         elif "black_white" in self._template:
-            self._areas = areas_black_white(self)
-            self._spacing = spacing_black_white(self)
-            self._colors = colors_black_white
-            self._line_size = linesize_black_white
-
-        # Patches to detect the header cells correctly
-        if ((self.pdf.name == "DS12930-v1" and self.index in range(90, 106)) or
-            (self.pdf.name == "DS12931-v1" and self.index in range(89, 105))):
-            self._spacing["th"] = 0.1
-        if ((self.pdf.name == "RM0453-v2" and self.index in [1354]) or
-            (self.pdf.name == "RM0456-v2" and self.index in [2881]) or
-            (self.pdf.name == "RM0456-v3" and self.index in [2880]) or
-            (self.pdf.name == "RM0461-v4" and self.index in [1246])):
-            self._spacing["th"] = 0.5
-        if ((self.pdf.name == "RM0456-v2" and self.index in [3005])):
-            self._spacing["th"] = 0.52
-
-    def _text_in_area(self, name, check_length=True) -> str:
-        if name not in self._areas: return ""
-        text = ""
-        areas = self._areas[name]
-        if not isinstance(areas, list): areas = [areas]
-        for area in areas:
-            text += self.text_in_area(area)
-        if check_length: assert text
-        return text
+            self._areas = _areas_black_white(self)
+            self._spacing = _spacing_black_white(self)
+            self._colors = _colors_black_white
+            self._line_size = _linesize_black_white
+
+    def _unicode_filter(self, code: int) -> int:
+        # Ignore Carriage Return characters and ® (superscript issues)
+        if code in {0xd, ord("®")}: return None
+        # Correct some weird unicode stuffing choices
+        if code in {2}: return ord("-")
+        if code in {61623, 61664}: return ord("•")
+        return code
 
     @cached_property
     def identifier(self) -> str:
-        return self._text_in_area("id", check_length=False)
+        return self.text_in_named_area("id", check_length=False)
 
     @cached_property
     def top(self) -> str:
         if self.index == 0:
             return "Cover"
-        return self._text_in_area("top", check_length=False)
+        return self.text_in_named_area("top", check_length=False)
 
+    @cached_property
     def is_relevant(self) -> bool:
         if any(c in self.top for c in {"Contents", "List of ", "Index"}):
             return False
         return True
 
-    def _charlines_filtered(self, area, predicate = None, rtol = None) -> list[CharLine]:
-        if rtol is None: rtol = self._spacing["sc"]
-        # Split all chars into lines based on rounded origin
-        origin_lines_y = defaultdict(list)
-        origin_lines_x = defaultdict(list)
-        for char in self.chars_in_area(area):
-            # Ignore all characters we don't want
-            if predicate is not None and not predicate(char):
-                continue
-            # Ignore Carriage Return characters and ® (superscript issues)
-            if char.unicode in {0xd, ord("®")}:
-                continue
-            # Correct some weird unicode stuffing choices
-            if char.unicode in {2}:
-                char.unicode = ord("-")
-            if char.unicode in {61623, 61664}:
-                char.unicode = ord("•")
-            if char.unicode < 32 and char.unicode not in {0xa}:
-                continue
-            # Ignore characters without width that are not spaces
-            if not char.width and char.unicode not in {0xa, 0xd, 0x20}:
-                LOGGER.error(f"Unknown char width for {char}: {char.bbox}")
-            # Split up the chars depending on the orientation
-            if 45 < char.rotation <= 135 or 225 < char.rotation <= 315:
-                origin_lines_x[round(char.origin.x, 1)].append(char)
-            elif char.rotation <= 45 or 135 < char.rotation <= 225 or 315 < char.rotation:
-                origin_lines_y[round(char.origin.y, 1)].append(char)
-            else:
-                LOGGER.error("Unknown char rotation:", char, char.rotation)
-
-        # Convert characters into lines
-        bbox_lines_y = []
-        for chars in origin_lines_y.values():
-            # Remove lines with whitespace only
-            if all(c.unicode in {0xa, 0xd, 0x20} for c in chars):
-                continue
-            origin = statistics.fmean(c.origin.y for c in chars)
-            line = CharLine(self, chars,
-                            min(c.bbox.bottom for c in chars),
-                            origin,
-                            max(c.bbox.top for c in chars),
-                            max(c.height for c in chars),
-                            sort_origin=self.height - origin)
-            bbox_lines_y.append(line)
-            # print(line, line.top, line.origin, line.bottom, line.height)
-        bbox_lines = sorted(bbox_lines_y, key=lambda l: l._sort_origin)
-
-        bbox_lines_x = []
-        for chars in origin_lines_x.values():
-            # Remove lines with whitespace only
-            if all(c.unicode in {0xa, 0xd, 0x20} for c in chars):
-                continue
-            line = CharLine(self, chars,
-                            min(c.bbox.left for c in chars),
-                            statistics.fmean(c.origin.x for c in chars),
-                            max(c.bbox.right for c in chars),
-                            max(c.width for c in chars),
-                            270 if sum(c.rotation for c in chars) <= 135 * len(chars) else 90)
-            bbox_lines_x.append(line)
-        bbox_lines += sorted(bbox_lines_x, key=lambda l: l._sort_origin)
-
-        if not bbox_lines:
-            return []
-
-        # Merge lines that have overlapping bbox_lines
-        # FIXME: This merges lines that "collide" vertically like in formulas
-        merged_lines = []
-        current_line = bbox_lines[0]
-        for next_line in bbox_lines[1:]:
-            height = max(current_line.height, next_line.height)
-            # Calculate overlap via normalize origin (increasing with line index)
-            if ((current_line._sort_origin + rtol * height) >
-                (next_line._sort_origin - rtol * height)):
-                # if line.rotation or self.rotation:
-                #     # The next line overlaps this one, we merge the shorter line
-                #     # (typically super- and subscript) into taller line
-                #     use_current = len(current_line.chars) >= len(next_line.chars)
-                # else:
-                use_current = current_line.height >= next_line.height
-                line = current_line if use_current else next_line
-                current_line = CharLine(self, current_line.chars + next_line.chars,
-                                        line.bottom, line.origin, line.top,
-                                        height, line.rotation,
-                                        sort_origin=line._sort_origin)
-            else:
-                # The next line does not overlap the current line
-                merged_lines.append(current_line)
-                current_line = next_line
-        # append last line
-        merged_lines.append(current_line)
-
-        # Sort all lines horizontally based on character origin
-        sorted_lines = []
-        for line in merged_lines:
-            if line.rotation == 90:
-                def sort_key(char):
-                    if char.unicode in {0xa, 0xd}:
-                        return char.tbbox.midpoint.y - 1e9
-                    return char.tbbox.midpoint.y
-            elif line.rotation == 270:
-                def sort_key(char):
-                    if char.unicode in {0xa, 0xd}:
-                        return -char.tbbox.midpoint.y + 1e9
-                    return -char.tbbox.midpoint.y
-            else:
-                def sort_key(char):
-                    if char.unicode in {0xa, 0xd}:
-                        return char.origin.x + 1e9
-                    return char.origin.x
-            sorted_lines.append(CharLine(self, sorted(line.chars, key=sort_key),
-                                         line.bottom, line.origin,
-                                         line.top, line.height,
-                                         line.rotation, area.left,
-                                         sort_origin=line._sort_origin))
-
-        return sorted_lines
-
-    def _content_areas(self, area: Rectangle, with_graphics: bool = True) -> list:
-        if with_graphics:
-            graphics = self._graphics_filtered(area)
-            regions = []
-            for graphic in sorted(graphics, key=lambda g: (-g.bbox.top, g.bbox.x)):
-                gbbox = graphic.bbox.joined(graphic.cbbox) if graphic.cbbox else graphic.bbox
-                for reg in regions:
-                    if reg.overlaps(gbbox.bottom, gbbox.top):
-                        # They overlap, so merge them
-                        reg.v0 = min(reg.v0, gbbox.bottom)
-                        reg.v1 = max(reg.v1, gbbox.top)
-                        reg.objs.append(graphic)
-                        break
-                else:
-                    regions.append(Region(gbbox.bottom, gbbox.top, graphic))
-
-            # print(regions)
-            areas = []
-            ypos = area.top
-            for reg in regions:
-                if ypos - reg.v1 > self._spacing["y_em"]:
-                    areas.append((Rectangle(area.left, reg.v1, area.right, ypos), None))
-                for obj in reg.objs:
-                    oarea = obj.bbox.joined(obj.cbbox) if obj.cbbox else obj.bbox
-                    areas.append((oarea, obj))
-                ypos = reg.v0
-            areas.append((Rectangle(area.left, area.bottom, area.right, ypos), None))
-        else:
-            areas = [(area, None)]
-        return areas
-
-    def _objects_filtered(self, area: Rectangle, with_graphics: bool = True) -> list:
-        self._link_characters()
-        areas = self._content_areas(area, with_graphics)
-        objects = []
-        for narea, obj in areas:
-            if obj is None:
-                objects += self._charlines_filtered(narea)
-            else:
-                oarea = obj.bbox.joined(obj.cbbox) if obj.cbbox else obj.bbox
-                predicate = lambda c: not obj.bbox.contains(c.origin)
-                lines = self._charlines_filtered(oarea, predicate)
-                # print(obj, oarea, lines, [line.content for line in lines])
-                objects += list(sorted(lines + [obj], key=lambda o: (-o.bbox.y, o.bbox.x)))
-        return objects
-
     @property
     def content_ast(self) -> list:
         ast = []
@@ -492,13 +332,13 @@ def content_ast(self) -> list:
                                re.search("ordering +information|part +numbering", item.title, re.IGNORECASE)), -1)
             with_graphics = (order_page != self.index)
         for area in self._areas["content"]:
-            ast.append(self._ast_filtered(area, with_graphics=with_graphics))
+            ast.append(self.ast_in_area(area, with_graphics=with_graphics))
         # Add a page node to the first leaf to keep track of where a page starts
         first_leaf = next((n for n in iter(ast[0].descendants) if n.is_leaf), ast[0])
         Node("page", parent=first_leaf, xpos=first_leaf.xpos, number=self.number)
         return ast
 
-    def _graphics_filtered(self, area) -> list:
+    def graphics_in_area(self, area: Rectangle) -> list[Table | Figure]:
         # Find all graphic clusters in this area
         em = self._spacing["y_em"]
         large_area = area.offset_x(em/2)
@@ -511,7 +351,7 @@ def _graphics_filtered(self, area) -> list:
 
         # Find the captions and group them by y origin to catch side-by-side figures
         ycaptions = defaultdict(list)
-        for line in self._charlines_filtered(area, lambda c: "Bold" in c.font):
+        for line in self.charlines_in_area(area, lambda c: "Bold" in c.font):
             for cluster in line.clusters():
                 for phrase in [r"Figure \d+\.", r"Table \d+\."]:
                     if re.match(phrase, cluster.content):
@@ -531,7 +371,7 @@ def _graphics_filtered(self, area) -> list:
                                 if b.bottom <= bottom and
                                    left <= b.left and b.right <= right), None)
                 if graphic is None:
-                    LOGGER.error(f"Graphic cluster not found for caption {''.join(c.char for c in chars)}")
+                    _LOGGER.error(f"Graphic cluster not found for caption {''.join(c.char for c in chars)}")
                     continue
 
                 if self._template == "blue_gray":
@@ -545,7 +385,7 @@ def _graphics_filtered(self, area) -> list:
                             break
                         cbbox = nbbox
                         cchars = nchars
-                elif self._template == "black_white":
+                else:
                     cbbox = Rectangle(left, min(graphic[0].top, bottom), right, top)
 
                 otype = phrase.split(" ")[0].lower()
@@ -583,6 +423,7 @@ def _graphics_filtered(self, area) -> list:
         for gbbox, paths in graphic_clusters:
             if gbbox.width < self._spacing["x_em"] or gbbox.height < self._spacing["y_em"]:
                 continue
+            category = ""
             if any(isinstance(p, Image) for p in paths):
                 category = "figure"
             elif self._template == "blue_gray":
@@ -643,9 +484,9 @@ def _graphics_filtered(self, area) -> list:
                                 elif line.direction == line.Direction.HORIZONTAL:
                                     ylines.append(line.specialize())
                                 else:
-                                    LOGGER.warn(f"Line not vertical or horizontal: {line}")
+                                    _LOGGER.warn(f"Line not vertical or horizontal: {line}")
                             else:
-                                LOGGER.warn(f"Path too long: {path}")
+                                _LOGGER.warn(f"Path too long: {path}")
                         elif self._colors(path.fill) == "darkblue":
                             # Add the bottom line of the dark blue header box as a very thick line
                             line = HLine(path.bbox.bottom, path.bbox.left, path.bbox.right, 5)
@@ -681,58 +522,9 @@ def _graphics_filtered(self, area) -> list:
 
         return objects
 
-    @property
-    def content_objects(self) -> list:
-        objs = []
-        for area in self._areas["content"]:
-            objs.extend(self._objects_filtered(area))
-        return objs
-
-    @property
-    def content_graphics(self) -> list:
-        objs = []
-        for area in self._areas["content"]:
-            objs.extend(self._graphics_filtered(area))
-        return objs
-
-    @property
-    def content_lines(self) -> list:
-        return [o for o in self.content_objects if isinstance(o, CharLine)]
-
-    @property
-    def content_tables(self) -> list:
-        return [o for o in self.content_graphics if isinstance(o, Table)]
-
-    @property
-    def content_figures(self) -> list:
-        return [o for o in self.content_graphics if isinstance(o, Figure)]
-
-    def _char_properties(self, line, char):
-        cp = {
-            "superscript": False,
-            "subscript": False,
-            "bold": any(frag in char.font for frag in {"Bold"}),
-            "italic": any(frag in char.font for frag in {"Italic", "Oblique"}),
-            "underline": (char.objlink or char.weblink) is not None,
-            "size": round(line.height),
-            "relsize": self._line_size(line),
-            "char": chr(char.unicode),
-        }
-
-        if line.rotation:
-            if char.origin.x < (line.origin - 0.25 * line.height):
-                cp["superscript"] = True
-            elif char.origin.x > (line.origin + 0.15 * line.height):
-                cp["subscript"] = True
-        elif char.origin.y > (line.origin + 0.25 * line.height):
-            cp["superscript"] = True
-        elif char.origin.y < (line.origin - 0.15 * line.height):
-            cp["subscript"] = True
-
-        return cp
-
-    def _ast_filtered(self, area: Rectangle, with_graphics=True,
-                      ignore_xpos=False, with_bits=True, with_notes=True) -> list:
+    def ast_in_area(self, area: Rectangle, with_graphics: bool = True,
+                    ignore_xpos: bool = False, with_bits: bool = True,
+                    with_notes: bool = True) -> Node:
         x_em = self._spacing["x_em"]
         spacing_content = self._spacing["x_content"]
         lh_factor = self._spacing["lh"]
@@ -753,8 +545,9 @@ def parent_name(current):
 
         current = root
         ypos = area.top
-        for obj in self._objects_filtered(area, with_graphics):
+        for obj in self.objects_in_area(area, with_graphics):
             xpos = round(obj.bbox.left)
+
             # Tables should remain in their current hierarchy regardless of indentation
             if isinstance(obj, (Table, Figure)):
                 current = next((c for c in current.iter_path_reverse()
@@ -763,6 +556,7 @@ def parent_name(current):
                 Node(name, parent=current, obj=obj, xpos=xpos, number=-1,
                      _width=obj.bbox.width / area.width, _type=obj._type)
                 ypos = obj.bbox.bottom
+
             # Lines of text need to be carefully checked for indentation
             elif isinstance(obj, CharLine):
                 newlines = round((ypos - obj.origin) / (lh_factor * obj.height))
@@ -783,6 +577,7 @@ def parent_name(current):
                     current = current.parent.parent
 
                 # print(obj.fonts, ypos, xpos, current.xpos, f"{obj.height:.2f}", content)
+
                 # Check if line is a heading, which may be multi-line, so we must
                 # be careful not to nest them, but group them properly
                 # Headings are always inserted into the root note!
@@ -853,15 +648,15 @@ def parent_name(current):
                     else:
                         # Default back to the regex
                         if "Reserved" not in content:
-                            LOGGER.warning(f"Fallback to Regex length for Bit pattern '{content}'!\nFonts: {obj.fonts}")
+                            _LOGGER.warning(f"Fallback to Regex length for Bit pattern '{content}'!\nFonts: {obj.fonts}")
                         content_start = re.match(r" *([Bb]ytes? *.+? *)?(B[uio]t)( *\d+:?|s *(\d+ *([:-] *\d+ *)? *,? *)+) *", content)
                         if content_start is None:
-                            LOGGER.error(f"Unable to match Bit regex at all! '{content}'!")
+                            _LOGGER.error(f"Unable to match Bit regex at all! '{content}'!")
                             content_start = 0
                         else:
                             content_start = len(content_start.group(0))
                         if not content_start:
-                            LOGGER.error(f"Missing content start (=0)! '{content}'!")
+                            _LOGGER.error(f"Missing content start (=0)! '{content}'!")
                         content_start = min(content_start, len(obj.chars) - 1)
 
                     current = next((c for c in current.iter_path_reverse()
@@ -895,4 +690,4 @@ def parent_name(current):
         return root
 
     def __repr__(self) -> str:
-        return f"StPage({self.number})"
+        return f"StmPage({self.number})"
diff --git a/src/modm_data/pdf2html/stmicro/table.py b/src/modm_data/pdf2html/table.py
similarity index 82%
rename from src/modm_data/pdf2html/stmicro/table.py
rename to src/modm_data/pdf2html/table.py
index e0744b5..6aa0995 100644
--- a/src/modm_data/pdf2html/stmicro/table.py
+++ b/src/modm_data/pdf2html/table.py
@@ -5,112 +5,10 @@
 import statistics
 from functools import cached_property
 from collections import defaultdict
-from ...utils import HLine, VLine, Rectangle
-
-LOGGER = logging.getLogger(__name__)
-
-
-class TableCell:
-    class Borders:
-        def __init__(self, l, b, r, t):
-            self.l = l
-            self.b = b
-            self.r = r
-            self.t = t
-
-    def __init__(self, table, position, bbox, borders, is_simple=False):
-        self._table = table
-        self._bboxes = [bbox]
-        self.b = borders
-        self.positions = [position]
-        self.is_header = False
-        self._is_simple = is_simple
-        self._bbox = None
-        self._lines = None
-
-    def _merge(self, other):
-        self.positions.extend(other.positions)
-        self.positions.sort()
-        self._bboxes.append(other.bbox)
-        self._bbox = None
-        self._lines = None
-
-    def _move(self, x, y):
-        self.positions = [(py + y, px + x) for (py, px) in self.positions]
-        self.positions.sort()
-
-    def _expand(self, dx, dy):
-        ymax, xmax = self.positions[-1]
-        for yi in range(ymax, ymax + dy + 1):
-            for xi in range(xmax, xmax + dx + 1):
-                self.positions.append((yi, xi))
-        self.positions.sort()
+from ..utils import HLine, VLine, Rectangle
+from .cell import TableCell
 
-    @property
-    def x(self) -> int:
-        return self.positions[0][1]
-
-    @property
-    def y(self) -> int:
-        return self.positions[0][0]
-
-    @property
-    def xspan(self) -> int:
-        return self.positions[-1][1] - self.positions[0][1] + 1
-
-    @property
-    def yspan(self) -> int:
-        return self.positions[-1][0] - self.positions[0][0] + 1
-
-    @property
-    def rotation(self) -> int:
-        if not self.lines: return 0
-        return self.lines[0].rotation
-
-    @property
-    def bbox(self) -> Rectangle:
-        if self._bbox is None:
-            self._bbox = Rectangle(min(bbox.left   for bbox in self._bboxes),
-                                   min(bbox.bottom for bbox in self._bboxes),
-                                   max(bbox.right  for bbox in self._bboxes),
-                                   max(bbox.top    for bbox in self._bboxes))
-        return self._bbox
-
-    @property
-    def lines(self):
-        if self._lines is None:
-            self._lines = self._table._page._charlines_filtered(self.bbox)
-        return self._lines
-
-    @property
-    def content(self):
-        return "".join(c.char for line in self.lines for c in line.chars)
-
-    @property
-    def left_aligned(self):
-        x_em = self._table._page._spacing["x_em"]
-        for line in self.lines:
-            if (line.bbox.left - self.bbox.left + x_em) < (self.bbox.right - line.bbox.right):
-                return True
-        return False
-
-    @property
-    def ast(self):
-        ast = self._table._page._ast_filtered(self.bbox, with_graphics=False,
-                                              ignore_xpos=not self.left_aligned,
-                                              with_bits=False, with_notes=False)
-        ast.name = "cell"
-        return ast
-
-    def __repr__(self) -> str:
-        positions = ",".join(f"({p[1]},{p[0]})" for p in self.positions)
-        borders = ""
-        if self.b.l: borders += "["
-        if self.b.b: borders += "_"
-        if self.b.t: borders += "^"
-        if self.b.r: borders += "]"
-        start = "CellH" if self.is_header else "Cell"
-        return start + f"[{positions}] {borders}"
+_LOGGER = logging.getLogger(__name__)
 
 
 class Table:
@@ -143,26 +41,26 @@ def _cluster(lines, key):
 
             # Find the positions of the top numbers
             clusters = []
-            if lines := self._page._charlines_filtered(cbbox):
+            if lines := self._page.charlines_in_area(cbbox):
                 if len(cluster := lines[0].clusters(self._page._spacing["x_em"] / 2)):
                     clusters.append((cluster, cbbox))
                 else:
                     self.grid = (0, 0)
-                    LOGGER.error(f"Cannot find any bit position clusters! {self} ({self._page})")
+                    _LOGGER.error(f"Cannot find any bit position clusters! {self} ({self._page})")
 
             # Find the positions of the second row of numbers
             if len(ygrid) > 2:
                 for yi, (ypos0, ypos1) in enumerate(zip(sorted(ygrid), sorted(ygrid)[1:])):
                     nbbox = Rectangle(self.bbox.left, ygrid[ypos0][0].p0.y,
                                       self.bbox.right, ygrid[ypos1][0].p0.y)
-                    if lines := self._page._charlines_filtered(nbbox):
+                    if lines := self._page.charlines_in_area(nbbox):
                         if all(c.char.isnumeric() or c.unicode in {0x20, 0xa, 0xd} for c in lines[0].chars):
                             if not len(cluster := lines[0].clusters(self._page._spacing["x_em"] / 2)) % 16:
                                 clusters.append((cluster, nbbox))
                                 self._bit_headers = len(ygrid) - yi - 1
                             else:
                                 self.grid = (len(cluster), 0)
-                                LOGGER.warning(f"Second bit pattern does not have 16 or 32 clusters! {self} ({self._page})")
+                                _LOGGER.warning(f"Second bit pattern does not have 16 or 32 clusters! {self} ({self._page})")
                             break
 
             # Merge these clusters to find their positions
@@ -235,7 +133,7 @@ def _fix_borders(self, cells, x: int, y: int):
         r = cells[(x + 1, y)].b if cells[(x + 1, y)] is not None else TableCell.Borders(0, 0, 1, 0)
         t = cells[(x, y + 1)].b if cells[(x, y + 1)] is not None else TableCell.Borders(0, 1, 0, 0)
 
-        # if (not c.t and c.l and c.r and c.b) and "Reset value" in cell.content:
+        # if (not c.t and csand c.r and c.b) and "Reset value" in cell.content:
         #     c.t = 1
 
         # Open at the top into a span
@@ -401,7 +299,7 @@ def append_bottom(self, other, merge_headers=True) -> bool:
                 print(len(merged_xheaders), merged_xheaders)
             # If they are not equal length the table layouts are not compatible at all!
             if len(self_heads) != len(other_heads):
-                LOGGER.error(f"Failure to append table {other} ({other._page}) onto table {self} ({self._page})")
+                _LOGGER.error(f"Failure to append table {other} ({other._page}) onto table {self} ({self._page})")
                 return False
 
             # We want to stuff/move the cell positions inplace, therefore we start
@@ -444,6 +342,7 @@ def _insert_cells(cell, src, dsts, insert_only):
                     assert new_positions
                     assert len(new_positions) == len(set(new_positions))
                     cell.positions = sorted(new_positions)
+                    cell._invalidate()
 
                 def _move_cells(cells, own_xpos):
                     if debug:
@@ -497,7 +396,7 @@ def _move_cells(cells, own_xpos):
     def append_side(self, other, expand=False) -> bool:
         if self.grid[1] != other.grid[1]:
             if expand:
-                LOGGER.debug(f"Expanding bottom cells to match height: {self} ({self._page}) + {other} ({other._page})")
+                _LOGGER.debug(f"Expanding bottom cells to match height: {self} ({self._page}) + {other} ({other._page})")
                 ymin = min(self.grid[1], other.grid[1])
                 ymax = max(self.grid[1], other.grid[1])
                 etable = other if self.grid[1] > other.grid[1] else self
@@ -506,7 +405,7 @@ def append_side(self, other, expand=False) -> bool:
                         cell._expand(0, ymax - ymin)
                 etable.grid = (etable.grid[0], ymax)
             else:
-                LOGGER.error(f"Unable to append table at side: {self} ({self._page}) + {other} ({other._page})")
+                _LOGGER.error(f"Unable to append table at side: {self} ({self._page}) + {other} ({other._page})")
                 return False
 
         # We must move all cells to the right now
diff --git a/tools/make/arm.mk b/tools/make/arm.mk
index c561bc6..f8afce9 100644
--- a/tools/make/arm.mk
+++ b/tools/make/arm.mk
@@ -14,6 +14,6 @@ clone-sources-arm: ext/arm/cmsis/
 .PHONY: update-sources-arm
 ## Update all ARM related repositories to the latest version.
 update-sources-arm:
-	@(cd ext/arm/cmsis && git pull) &
+	@(cd ext/arm/cmsis && git fetch && git reset --hard origin/master) &
 	@wait
 
diff --git a/tools/make/common.mk b/tools/make/common.mk
index 9f6ffa8..164af89 100644
--- a/tools/make/common.mk
+++ b/tools/make/common.mk
@@ -1,7 +1,7 @@
 # Copyright 2023, Niklas Hauser
 # SPDX-License-Identifier: MPL-2.0
 
-### @Utils Utilities \1000
+### @Utils Utilities \1010
 
 log/%:
 	@mkdir -p $@
@@ -34,7 +34,7 @@ venv:
 	$(MAKE) pip-install-frozen
 
 .PHONY: clean-venv
-# Remove the virtual environment
+## Remove the virtual environment
 clean-venv:
 	@rm -rf .venv
 
@@ -55,13 +55,13 @@ build-homepage:
 serve-api-docs:
 	@pdoc --mermaid modm_data
 
-
+### @Tests Testing \1009
 # ================================== Testing ==================================
 ext/test/regression/:
 	@git clone --depth=1 git@github.com:modm-ext/modm-data-test-docs.git $@
 
 .PHONY: run-regression-tests
-## Convert some PDF pages and check against their known HTML.
+## @Tests Convert some PDF pages and check against their known HTML.
 run-regression-tests: ext/test/regression/
 	@test/convert_html.sh
 	@git diff --exit-code -- test/data/html
diff --git a/tools/make/stmicro.mk b/tools/make/stmicro.mk
index 8d35f3c..e89eaf6 100644
--- a/tools/make/stmicro.mk
+++ b/tools/make/stmicro.mk
@@ -27,11 +27,11 @@ clone-sources-stmicro: clone-sources-arm ext/stmicro/cubehal/ ext/stmicro/header
 .PHONY: update-sources-stmicro
 ## Update all STMicro related repositories to the latest version.
 update-sources-stmicro: update-sources-arm
-	@(cd ext/stmicro/cubehal && git pull) &
-	@(cd ext/stmicro/header && git pull) &
-	@(cd ext/stmicro/svd && git pull) &
-	@(cd ext/stmicro/owl-archive && git pull) &
-	@(cd ext/stmicro/svd-archive && git pull) &
+	@(cd ext/stmicro/cubehal && git fetch && git reset --hard origin/main) &
+	@(cd ext/stmicro/header && git fetch && git reset --hard origin/master) &
+	@(cd ext/stmicro/svd && git fetch && git reset --hard origin/main) &
+	@(cd ext/stmicro/owl-archive && git fetch && git reset --hard origin/main) &
+	@(cd ext/stmicro/svd-archive && git fetch && git reset --hard origin/master) &
 	@wait
 
 
@@ -71,9 +71,9 @@ clone-sources-stmicro-private: clone-sources-stmicro ext/stmicro/cubemx/ \
 
 .PHONY: update-sources-stmicro-private
 update-sources-stmicro-private: update-sources-stmicro
-	@(cd ext/stmicro/cubemx && git pull) &
-	@(cd ext/stmicro/html-archive && git pull) &
-	@(cd ext/stmicro/pdf && git pull) &
+	@(cd ext/stmicro/cubemx && git fetch && git reset --hard origin/main) &
+	@(cd ext/stmicro/html-archive && git fetch && git reset --hard origin/main) &
+	@(cd ext/stmicro/pdf && git fetch && git reset --hard origin/main) &
 	@wait
 
 
@@ -87,18 +87,18 @@ ext/stmicro/html-archive/%: ext/stmicro/pdf/%.pdf log/stmicro/html/
 ## archive. The log will be placed in log/stmicro/html/%.txt.
 convert-stmicro-html-%: ext/stmicro/html-archive/%
 
-stmicro_pdf2html = $(sort $(1:ext/stmicro/pdf/%.pdf=ext/stmicro/html-archive/%))
+stmicro_pdf2html = $(sort $(foreach path,$1,$(path:ext/stmicro/pdf/%.pdf=ext/stmicro/html-archive/%)))
 .PHONY: convert-stmicro-html-rm
 ## Convert all STMicro Reference Manual PDFs into HTML.
-convert-stmicro-html-rm: $(stmicro_pdf2html $(wildcard ext/stmicro/pdf/RM*.pdf))
+convert-stmicro-html-rm: $(call stmicro_pdf2html,$(wildcard ext/stmicro/pdf/RM*.pdf))
 
 .PHONY: convert-stmicro-html-ds
 ## Convert all STMicro Datasheet PDFs into HTML.
-convert-stmicro-html-ds: $(stmicro_pdf2html $(wildcard ext/stmicro/pdf/DS*.pdf))
+convert-stmicro-html-ds: $(call stmicro_pdf2html,$(wildcard ext/stmicro/pdf/DS*.pdf))
 
 .PHONY: convert-stmicro-html
 ## Convert all STMicro PDFs into HTML.
-convert-stmicro-html: $(stmicro_pdf2html $(wildcard ext/stmicro/pdf/*.pdf))
+convert-stmicro-html: $(call stmicro_pdf2html,$(wildcard ext/stmicro/pdf/*.pdf))
 
 .PHONY: clean-stmicro-html-%
 ## Remove all STMicro HTML folders of a specific document number.
diff --git a/tools/scripts/makefile_help.py b/tools/scripts/makefile_help.py
index a70eeeb..d96e2e4 100644
--- a/tools/scripts/makefile_help.py
+++ b/tools/scripts/makefile_help.py
@@ -14,19 +14,20 @@ def parse_makefiles(makefiles: list[str]):
 	for path in makefiles:
 		content = Path(path).read_text()
 		fcategory = "General"
-		if (cdoc := re.search(r"### *@([\w-]+) *(.*?) *\\(\d+)\n", content)):
-			fcategory = cdoc.group(1)
-			cdocs[fcategory] = (cdoc.group(2), int(cdoc.group(3) or 0))
-
-		rawdocs = re.findall(r"((?:##.+\n)+)(.+):", content, flags=re.MULTILINE)
-		for doc, rule in rawdocs:
-			doc = doc.replace("##", "")
-			if (category := re.search(r"@([\w-]+)", doc)):
-				doc = doc.replace(category.group(0), "")
-				category = category.group(1)
-			else:
-				category = fcategory
-			docs[category][rule] = [l.strip() for l in doc.splitlines()]
+		for groupcontent in re.split(r"### *@", content):
+			if (cdoc := re.search(r"^([\w-]+) *(.*?) *\\(\d+)\n", groupcontent)):
+				fcategory = cdoc.group(1)
+				cdocs[fcategory] = (cdoc.group(2), int(cdoc.group(3) or 0))
+
+			rawdocs = re.findall(r"((?:##.+\n)+)(.+):", groupcontent, flags=re.MULTILINE)
+			for doc, rule in rawdocs:
+				doc = doc.replace("##", "")
+				if (category := re.search(r"@([\w-]+)", doc)):
+					doc = doc.replace(category.group(0), "")
+					category = category.group(1)
+				else:
+					category = fcategory
+				docs[category][rule] = [l.strip() for l in doc.splitlines()]
 
 	return dict(docs), cdocs
 
diff --git a/tools/scripts/search_html.py b/tools/scripts/search_html.py
index 8c9337b..a89b519 100644
--- a/tools/scripts/search_html.py
+++ b/tools/scripts/search_html.py
@@ -1,11 +1,5 @@
-# Copyright (c) 2022, Niklas Hauser
-#
-# This file is part of the modm-data project.
-#
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-# -----------------------------------------------------------------------------
+# Copyright 2022, Niklas Hauser
+# SPDX-License-Identifier: MPL-2.0
 
 import re
 import sys
@@ -15,7 +9,6 @@
 from pathlib import Path
 sys.path.append(".")
 
-from modm_data.utils import ext_path
 from modm_data.html import Document
 
 def _format_html(xmlnode, treenode):
@@ -93,7 +86,7 @@ def format_document(document):
 
     link = etree.Element("link")
     link.set("rel", "stylesheet")
-    link.set("href", "ext/stmicro/html/style.css")
+    link.set("href", "ext/stmicro/html-archive/style.css")
     head.append(link)
 
     body = etree.Element("body")
@@ -113,8 +106,8 @@ def main():
     parser.add_argument("--html", type=str)
     args = parser.parse_args()
 
-    documents = ext_path("stmicro/html").glob(args.document)
-    documents = [Document(d) for d in documents]
+    documents = (Path(__file__).parents[2] / "ext/stmicro/html-archive").absolute()
+    documents = [Document(d) for d in documents.glob(args.document)]
 
     rootnode = anytree.Node("root", document=args.document, chapter=args.chapter, table=args.table)