diff --git a/scripts/sanitize_metadata.py b/scripts/sanitize_metadata.py
index 0485f133a..73711f1c3 100644
--- a/scripts/sanitize_metadata.py
+++ b/scripts/sanitize_metadata.py
@@ -20,6 +20,58 @@
     "Virus name",
 )
 
+def parse_location_string(location_string, location_fields):
+    """Parse location string from GISAID into the given separate geographic scales
+    and return a dictionary of parse values by scale.
+
+    Parameters
+    ----------
+    location_string : str
+    location_fields : list
+
+    Returns
+    -------
+    dict :
+        dictionary of geographic fields parsed from the given string
+
+    >>> location_fields = ["region", "country", "division", "location"]
+    >>> parse_location_string("Asia / Japan", location_fields)
+    {'region': 'Asia', 'country': 'Japan', 'division': '?', 'location': '?'}
+
+    >>> parse_location_string("Europe / Iceland / Reykjavik", location_fields)
+    {'region': 'Europe', 'country': 'Iceland', 'division': 'Reykjavik', 'location': '?'}
+
+    >>> parse_location_string("North America / USA / Washington / King County", location_fields)
+    {'region': 'North America', 'country': 'USA', 'division': 'Washington', 'location': 'King County'}
+
+    Additional location entries beyond what has been specified should be stripped from output.
+
+    >>> parse_location_string("North America / USA / Washington / King County / Extra field", location_fields)
+    {'region': 'North America', 'country': 'USA', 'division': 'Washington', 'location': 'King County'}
+
+    Trailing location delimiters should be stripped from the output.
+
+    >>> parse_location_string("North America / USA / Washington / King County / ", location_fields)
+    {'region': 'North America', 'country': 'USA', 'division': 'Washington', 'location': 'King County'}
+
+    Handle inconsistently delimited strings.
+
+    >>> parse_location_string("North America/USA/New York/New York", location_fields)
+    {'region': 'North America', 'country': 'USA', 'division': 'New York', 'location': 'New York'}
+    >>> parse_location_string("Europe/ Lithuania", location_fields)
+    {'region': 'Europe', 'country': 'Lithuania', 'division': '?', 'location': '?'}
+
+    """
+    # Try to extract values for specific geographic scales.
+    values = re.split(r"[ ]*/[ ]*", location_string)
+
+    # Create a default mapping of location fields to missing values and update
+    # these from the values in the location string.
+    locations = {field: "?" for field in location_fields}
+    locations.update(dict(zip(location_fields, values)))
+
+    return locations
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@@ -48,11 +100,10 @@
         # scales. Replace missing field values with "?".
         locations = pd.DataFrame(
             (
-                str(location).split(" / ", maxsplit=len(LOCATION_FIELDS) - 1)
+                parse_location_string(location, LOCATION_FIELDS)
                 for location in metadata[args.parse_location_field].values
-            ),
-            columns=LOCATION_FIELDS
-        ).fillna("?")
+            )
+        )
 
         # Combine new location columns with original metadata and drop the
         # original location column.