From 454e58c52c235464120e05b0a578231f66296167 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Fri, 8 Mar 2024 15:06:50 +0000 Subject: [PATCH] Use a single `#` character in a filename template to represent a non-zero-padded incremental number. --- src/dxtbx/imageset.py | 15 +++++++++++---- src/dxtbx/model/experiment_list.py | 10 +++++++--- src/dxtbx/sequence_filenames.py | 26 +++++++++++++++++++------- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/dxtbx/imageset.py b/src/dxtbx/imageset.py index 564ced681..190122dc3 100644 --- a/src/dxtbx/imageset.py +++ b/src/dxtbx/imageset.py @@ -1,5 +1,7 @@ from __future__ import annotations +import natsort + import boost_adaptbx.boost.python import dxtbx.format.image # noqa: F401, import dependency for unpickling @@ -55,7 +57,12 @@ def _expand_template(template: str, indices: Iterable[int]) -> list[str]: pfx = template.split("#")[0] sfx = template.split("#")[-1] count = template.count("#") - return [f"{pfx}{index:0{count}}{sfx}" for index in indices] + if count == 1: + # https://github.com/cctbx/dxtbx/issues/646 + filenames = [f"{pfx}{index}{sfx}" for index in indices] + else: + filenames = [f"{pfx}{index:0{count}}{sfx}" for index in indices] + return natsort.natsorted(filenames) class MemReader: @@ -486,7 +493,7 @@ def _create_imageset(filelist, check_headers): # Get the template format if "#" in template: - filenames = sorted(_expand_template(template, indices)) + filenames = _expand_template(template, indices) else: filenames = [template] @@ -503,7 +510,7 @@ def _create_sequence(filelist, check_headers): # Expand the template if necessary if "#" in template: - filenames = sorted(_expand_template(template, indices)) + filenames = _expand_template(template, indices) else: filenames = [template] @@ -564,7 +571,7 @@ def make_sequence( # Get the template format if "#" in template: - filenames = sorted(_expand_template(template, indices)) + filenames = _expand_template(template, indices) else: filenames = [template] diff --git a/src/dxtbx/model/experiment_list.py b/src/dxtbx/model/experiment_list.py index 257515649..299831335 100644 --- a/src/dxtbx/model/experiment_list.py +++ b/src/dxtbx/model/experiment_list.py @@ -915,8 +915,6 @@ def from_templates(templates, **kwargs): f"Image file {filenames[0]} appears to be a '{type(format_class).__name__}', but this is an abstract Format" ) else: - index = slice(*template_string_number_index(template)) - image_range = kwargs.get("image_range") if image_range: first, last = image_range @@ -926,7 +924,13 @@ def from_templates(templates, **kwargs): if not kwargs.get("allow_incomplete_sequences", False): if "#" in template: # Check all images in range are present - if allowed - all_numbers = {int(f[index]) for f in filenames} + i0, i1 = template_string_number_index(template) + prefix = template[:i0] + suffix = template[i1:] + all_numbers = { + int(f.replace(prefix, "").replace(suffix, "")) + for f in filenames + } missing = set(range(first, last + 1)) - all_numbers if missing: raise ValueError( diff --git a/src/dxtbx/sequence_filenames.py b/src/dxtbx/sequence_filenames.py index d6c056408..1b7f4df01 100644 --- a/src/dxtbx/sequence_filenames.py +++ b/src/dxtbx/sequence_filenames.py @@ -5,6 +5,8 @@ from collections import defaultdict from glob import glob +import natsort + def template_regex(filename): """Works out a template from a filename. @@ -181,6 +183,9 @@ def replace_template_format_with_hash(match): def template_string_to_glob_expr(template): """Convert the template to a glob expression.""" + if template.count("#") == 1: + # https://github.com/cctbx/dxtbx/issues/646 + return template.replace("#", "*") return template.replace("#", "[0-9]") @@ -191,7 +196,14 @@ def template_string_number_index(template): def locate_files_matching_template_string(template): """Return all files matching template.""" - return glob(template_string_to_glob_expr(template)) + matches = glob(template_string_to_glob_expr(template)) + if template.count("#") != 1: + return matches + i0, i1 = template_string_number_index(template) + prefix = template[:i0] + suffix = template[i1:] + patt = re.compile(prefix + "([^0]*)([0-9]+)" + suffix) + return [m for m in matches if patt.match(m)] def template_image_range(template): @@ -199,19 +211,19 @@ def template_image_range(template): # Find the files matching the template filenames = locate_files_matching_template_string(template) - filenames = sorted(filenames) + filenames = natsort.natsorted(filenames) # Check that the template matches some files if len(filenames) == 0: raise ValueError(f"Template {template} doesn't match any files.") - # Get the templete format - index = slice(*template_string_number_index(template)) - # Get the first and last indices if "#" in template: - first = int(filenames[0][index]) - last = int(filenames[-1][index]) + i0, i1 = template_string_number_index(template) + prefix = template[:i0] + suffix = template[i1:] + first = int(filenames[0].replace(prefix, "").replace(suffix, "")) + last = int(filenames[-1].replace(prefix, "").replace(suffix, "")) else: # template is one file first, last = 0, 0