diff --git a/phylogenetic/build-configs/inrb/auspice_config.json b/phylogenetic/build-configs/inrb/auspice_config.json
index 4c711e6..5f5c796 100644
--- a/phylogenetic/build-configs/inrb/auspice_config.json
+++ b/phylogenetic/build-configs/inrb/auspice_config.json
@@ -15,6 +15,15 @@
   ],
   "build_url": "https://github.com/nextstrain/mpox",
   "colorings": [
+    {
+      "key": "clade_membership",
+      "title": "Clade",
+      "type": "categorical",
+      "scale": [
+        ["Ia", "#4C90C0"],
+        ["Ib", "#CBB742"]
+      ]
+    },
     {
       "key": "region",
       "title": "Region",
diff --git a/phylogenetic/build-configs/inrb/config.yaml b/phylogenetic/build-configs/inrb/config.yaml
index 91513fe..e8e958f 100644
--- a/phylogenetic/build-configs/inrb/config.yaml
+++ b/phylogenetic/build-configs/inrb/config.yaml
@@ -13,3 +13,9 @@ private_metadata: "data/metadata-private.tsv"
 traits:
   columns: region country division location
   sampling_bias_correction: 3
+
+# Private INRB data doesn't have clade annotations so allow empty clade fields
+# (i.e. we're assuming all INRB data is clade I)
+subsample:
+  everything:
+    query: "'clade in [\"I\", \"Ia\", \"Ib\", \"\"]'"
diff --git a/phylogenetic/build-configs/inrb/curate_private_data.py b/phylogenetic/build-configs/inrb/curate_private_data.py
index d4febcf..9c8a869 100644
--- a/phylogenetic/build-configs/inrb/curate_private_data.py
+++ b/phylogenetic/build-configs/inrb/curate_private_data.py
@@ -5,6 +5,7 @@
 from datetime import datetime
 from os import path, mkdir
 from sys import exit
+import csv
 
 Sequences = dict[str, SeqIO.SeqRecord]
 Metadata = dict[str, dict[str, str]]
@@ -238,10 +239,11 @@ def write_sequences(sequences: Sequences) -> None:
 def write_metadata(metadata: Metadata, header: MetadataHeader) -> None:
     fname = fname_in_data_dir("metadata-private.tsv")
     print(f"Writing metadata to {fname}")
-    with open(fname, "w") as fh:
-        print("\t".join(header), file=fh)
-        for _, value in metadata.items():
-            print("\t".join([value[field] for field in header]), file=fh)
+    with open(fname, "w", newline='') as fh:
+        writer = csv.DictWriter(fh, header, extrasaction='ignore', delimiter='\t', lineterminator='\n')
+        writer.writeheader()
+        for line in metadata.values():
+            writer.writerow(line)
 
 def parse_remap_columns(arg: list[str]) -> list[tuple[str, str]]:
     try:
diff --git a/phylogenetic/scripts/assign-clades-via-metadata.py b/phylogenetic/scripts/assign-clades-via-metadata.py
index bc8738e..ef467ce 100644
--- a/phylogenetic/scripts/assign-clades-via-metadata.py
+++ b/phylogenetic/scripts/assign-clades-via-metadata.py
@@ -1,24 +1,30 @@
 """
-Use provided metadata to assign clade to terminal nodes. If the metadata is complete (i.e. all tips are assigned a clade)
-then for each clade we assign the clade to internal nodes and set a label for the MRCA node, iff the tips are monophyletic.
+Use provided metadata to assign clades to internal nodes and those with missing metadata.
+For each valid clades (i.e. those which match a hardcoded list) as long as the tips are monophyletic
+in the tree (ignoring missing data) then we label all internal nodes and missing tips with that clade
+as well as labelling the MRCA branch.
 """
 
 import argparse
-from sys import stderr, stdout, exit
+from sys import exit
 from Bio import Phylo
 import json
 from augur.io import read_metadata
 from augur.argparse_ import ExtendOverwriteDefault
 from collections import defaultdict
 
+VALID_CLADES = set(['Ia', 'Ib', 'I'])
+MISSING = ''
 
-def assign_internal_nodes(clade_name, terminal_nodes, node_data):
-    print(f"Assigning {clade_name} to internal nodes")
-    mrca = t.is_monophyletic(terminal_nodes)
-    if not mrca:
-        print(f"WARNING: {clade_name} wasn't monophyletic! Clades will not be assigned to internal nodes")
+def assign_internal_nodes(t, clade_name, terminal_nodes, missing_nodes, node_data):
+    print(f"[clade metadata] Assigning {clade_name} to internal nodes & labelling MRCA branch")
+    mrca = t.common_ancestor(terminal_nodes)
+
+    if not all([(n in terminal_nodes or n in missing_nodes) for n in mrca.get_terminals()]):
+        print(f"[clade metadata] ERROR {clade_name} wasn't monophyloetic (after accounting for nodes missing clade values).  Clades will not be assigned to internal nodes.")
         return
-    for node in mrca.get_nonterminals():
+
+    for node in (n for n in mrca.find_clades() if n not in terminal_nodes): # skip ones we have already assigned
         node_data['nodes'][node.name] = {'clade_membership': clade_name}
     node_data['branches'][mrca.name] = {'labels': {'clade': clade_name}}
 
@@ -41,23 +47,29 @@ def assign_internal_nodes(clade_name, terminal_nodes, node_data):
     t = Phylo.read(args.tree, "newick")
 
     node_data = {'nodes': {}, 'branches': {}}
-    counts = [0,0]
+    counts = {'missing': 0, 'valid': 0, 'invalid': 0}
     terminals = defaultdict(list)
+    missing_nodes = []
+
 
+    print(f"[clade metadata] Pass 1/2 - assigning clades from metadata to nodes")
     for node in t.get_terminals():
-        counts[0]+=1
-        if node.name in clades:
-            counts[1]+=1
-            node_data['nodes'][node.name] = {'clade_membership': clades[node.name]}
-            terminals[clades[node.name]].append(node)
-
-    print(f"Metadata defined clades for {counts[1]}/{counts[0]} tips in tree", file=stderr)
-
-    if counts[0]==counts[1]:
-        for clade_name, clade_terminals in terminals.items():
-            assign_internal_nodes(clade_name, clade_terminals, node_data)
-    else:
-        print(f"WARNING: incomplete metadata. Assignment of clade labels is uncertain and thus we are skipping it")
+        clade_value = clades[node.name]
+        if clade_value == MISSING:
+            counts['missing']+=1
+            missing_nodes.append(node)
+        elif clade_value in VALID_CLADES:
+            counts['valid']+=1
+            node_data['nodes'][node.name] = {'clade_membership': clade_value}
+            terminals[clade_value].append(node)
+        else:
+            counts['invalid']+=1
+    print(f"[clade metadata] {sum(counts.values())} tips: {counts['valid']} valid, {counts['invalid']} invalid, {counts['missing']} missing clade values")
+
+
+    print(f"[clade metadata] Pass 2/2 - if (valid) clades are monophyletic then label internal nodes (and missing tips)")
+    for clade_name, clade_terminals in terminals.items():
+        assign_internal_nodes(t, clade_name, clade_terminals, missing_nodes, node_data)
 
     with open(args.output_node_data, 'w') as fh:
         json.dump(node_data, fh)
diff --git a/phylogenetic/scripts/combine_data_sources.py b/phylogenetic/scripts/combine_data_sources.py
index 4ec9d49..667dd7d 100644
--- a/phylogenetic/scripts/combine_data_sources.py
+++ b/phylogenetic/scripts/combine_data_sources.py
@@ -30,7 +30,7 @@ def parse_tsv(fname: str) -> Metadata:
     assert list(fname).count('=')<=1, f"Too many '=' characters in argument {fname!r}"
     if '=' in fname:
         source_name, fname = fname.split('=')
-    with open(fname, "r") as fh:
+    with open(fname, "r", newline='') as fh:
         reader = csv.DictReader(fh, delimiter="\t")
         metadata = [row for row in reader]
     if source_name:
@@ -77,10 +77,11 @@ def write_sequences(fname: str, sequences: Sequences) -> None:
 
 def write_metadata(fname: str, metadata: Metadata, header: MetadataHeader) -> None:
     print(f"Writing metadata to {fname}")
-    with open(fname, "w") as fh:
-        print("\t".join(header), file=fh)
+    with open(fname, "w", newline='') as fh:
+        writer = csv.DictWriter(fh, header, extrasaction='ignore', delimiter='\t', lineterminator='\n')
+        writer.writeheader()
         for row in metadata:
-            print("\t".join([row.get(field, '') for field in header]), file=fh)
+            writer.writerow(row)
 
 if __name__=="__main__":
     args = parse_args()