Skip to content

Commit

Permalink
First attempt at h5 file format
Browse files Browse the repository at this point in the history
  • Loading branch information
jbeilstenedmands committed Feb 6, 2024
1 parent bf73681 commit ff3e1d3
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/dials/array_family/boost_python/flex_ext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ namespace dials { namespace af { namespace boost_python {
void export_flex_unit_cell();
void export_flex_shoebox_extractor();
void export_flex_binner();
void export_shoebox_extract();

template <typename FloatType>
std::string get_real_type();
Expand All @@ -55,6 +56,7 @@ namespace dials { namespace af { namespace boost_python {
export_flex_unit_cell();
export_flex_shoebox_extractor();
export_flex_binner();
export_shoebox_extract();

def("get_real_type", &get_real_type<ProfileFloatType>);

Expand Down
26 changes: 26 additions & 0 deletions src/dials/array_family/boost_python/flex_reflection_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,31 @@ namespace dials { namespace af { namespace boost_python {
return data_bytes;
}

boost::python::list get_shoebox_data_arrays(reflection_table self) {
boost::python::list result;
af::shared<Shoebox<> > sbox = self["shoebox"];
size_t n = 0;
for (int i = 0; i < self.size(); ++i) {
const Shoebox<> &s1 = sbox[i];
n += s1.data.size();
}
size_t ntot = 0;
af::shared<float> data_array(n, 0);
af::shared<float> bg_array(n, 0);
af::shared<size_t> mask_array(n, 0);
for (int i = 0; i < self.size(); ++i) {
const Shoebox<> &s1 = sbox[i];
std::copy(s1.data.begin(), s1.data.end(), data_array.begin() + ntot);
std::copy(s1.background.begin(), s1.background.end(), bg_array.begin() + ntot);
std::copy(s1.mask.begin(), s1.mask.end(), mask_array.begin() + ntot);
ntot += s1.data.size();
}
result.append(data_array);
result.append(bg_array);
result.append(mask_array);
return result;
}

/**
* Pack the reflection table in msgpack format into a streambuf object
* @param self The reflection table
Expand Down Expand Up @@ -938,6 +963,7 @@ namespace dials { namespace af { namespace boost_python {
.def("as_msgpack_to_file", &reflection_table_as_msgpack_to_file)
.def("from_msgpack", &reflection_table_from_msgpack)
.staticmethod("from_msgpack")
.def("get_shoebox_data_arrays", &get_shoebox_data_arrays)
.def("experiment_identifiers", &T::experiment_identifiers)
.def("select", &reflection_table_suite::select_rows_index<flex_table_type>)
.def("select", &reflection_table_suite::select_rows_flags<flex_table_type>)
Expand Down
4 changes: 4 additions & 0 deletions src/dials/array_family/boost_python/flex_shoebox_extractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,8 @@ namespace dials { namespace af { namespace boost_python {
.def("npanals", &ShoeboxExtractor::npanels);
}

void export_shoebox_extract() {
def("ShoeboxExtractFromData", &ShoeboxExtractFromData);
}

}}} // namespace dials::af::boost_python
29 changes: 28 additions & 1 deletion src/dials/array_family/flex_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ def as_file(self, filename):
"""
if os.getenv("DIALS_USE_PICKLE"):
self.as_pickle(filename)
elif os.getenv("DIALS_USE_H5"):
self.as_new_h5(filename)
else:
self.as_msgpack_file(filename)

Expand All @@ -243,7 +245,14 @@ def from_file(filename):
filename
)
except RuntimeError:
return dials_array_family_flex_ext.reflection_table.from_pickle(filename)
try:
return dials_array_family_flex_ext.reflection_table.from_new_h5(
filename
)
except OSError:
return dials_array_family_flex_ext.reflection_table.from_pickle(
filename
)

@staticmethod
def empty_standard(nrows):
Expand Down Expand Up @@ -375,6 +384,24 @@ def as_h5(self, filename):
handle.set_reflections(self)
handle.close()

def as_new_h5(self, filename):
from dials.util.nexus.nexus_new import NewNexusFile

handle = NewNexusFile(filename, "w")
# Clean up any removed experiments from the identifiers map
self.clean_experiment_identifiers_map()
handle.set_reflections(self)
handle.close()

@staticmethod
def from_new_h5(filename):
from dials.util.nexus.nexus_new import NewNexusFile

handle = NewNexusFile(filename, "r")
table = handle.get_reflections()
handle.close()
return table

def as_miller_array(self, experiment, intensity="sum"):
"""Return a miller array with the chosen intensities.
Expand Down
24 changes: 24 additions & 0 deletions src/dials/array_family/shoebox_extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,30 @@ namespace dials { namespace af {
using model::Shoebox;
using model::Valid;

void ShoeboxExtractFromData(af::reflection_table data,
af::shared<float> shoebox_data,
af::shared<float> background_data,
af::shared<size_t> mask_data) {
af::shared<Shoebox<> > shoebox_ = data["shoebox"];
std::vector<int> sizes(data.size());
for (int i = 0; i < data.size(); ++i) {
sizes[i] = shoebox_[i].data.size();
}
int n = 0;
for (int i = 0; i < data.size(); ++i) {
size_t size_ = sizes[i];
std::copy(shoebox_data.begin() + n,
shoebox_data.begin() + n + size_,
shoebox_[i].data.begin());
std::copy(background_data.begin() + n,
background_data.begin() + n + size_,
shoebox_[i].background.begin());
std::copy(
mask_data.begin() + n, mask_data.begin() + n + size_, shoebox_[i].mask.begin());
n += size_;
}
}

/**
* A class to extract shoebox pixels from images
*/
Expand Down
165 changes: 165 additions & 0 deletions src/dials/util/nexus/nexus_new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from __future__ import annotations

import numpy as np

from dxtbx import flumpy


class ReflectionListEncoder(object):
"""Encoder for the reflection data."""

def encode(self, reflections, handle):
"""Encode the reflection data."""

# Create the reflection data group
group = handle.create_group("entry/data_processing")
group.attrs["num_reflections"] = reflections.size()
identifiers_group = handle.create_group("entry/experiment_identifiers")
identifiers = np.array(
[str(i) for i in reflections.experiment_identifiers().values()],
dtype="S",
)
identifiers_group.create_dataset(
"identifiers", data=identifiers, dtype=identifiers.dtype
)
ids = np.array(list(reflections.experiment_identifiers().keys()), dtype=int)
identifiers_group.create_dataset("ids", data=ids, dtype=ids.dtype)

self.encode_columns(group, reflections)

def encode_columns(self, group, table):
"""Encode a column of data."""
from dials.array_family import flex

for key, data in table.cols():
if isinstance(data, flex.shoebox):
self.encode_shoebox(group, table)
elif isinstance(data, flex.int6):
s = data.size()
this_data = flumpy.to_numpy(data.as_int()).reshape(s, 6)
group.create_dataset(
key, data=this_data, shape=this_data.shape, dtype=this_data.dtype
)
else:
this_data = flumpy.to_numpy(data)
group.create_dataset(
key, data=this_data, shape=this_data.shape, dtype=this_data.dtype
)

def encode_shoebox(self, group, table):
"""Encode a column of shoeboxes."""
sbdata, bg, mask = table.get_shoebox_data_arrays()
data = flumpy.to_numpy(sbdata)
group.create_dataset(
"shoebox_data", data=data, shape=data.shape, dtype=data.dtype
)
data = flumpy.to_numpy(bg)
group.create_dataset(
"shoebox_background", data=data, shape=data.shape, dtype=data.dtype
)
data = flumpy.to_numpy(mask)
group.create_dataset(
"shoebox_mask", data=data, shape=data.shape, dtype=data.dtype
)


class ReflectionListDecoder(object):
"""Decoder for the reflection data."""

def decode(self, handle):
"""Decode the reflection data."""
import dials_array_family_flex_ext
from dials.array_family import flex

# Get the group containing the reflection data
g = handle["entry/data_processing"]

# Create the list of reflections
table = flex.reflection_table(int(g.attrs["num_reflections"]))
identifiers = handle["entry"]["experiment_identifiers"]["identifiers"][()]
ids = handle["entry"]["experiment_identifiers"]["ids"][()]
for id_, identifier in zip(ids, identifiers):
table.experiment_identifiers()[int(id_)] = str(identifier.decode())

# Decode all the columns
shoebox_data = None
shoebox_background = None
shoebox_mask = None
for key in g:
item = g[key]
if key == "shoebox_data":
shoebox_data = flumpy.from_numpy(np.array(g[key]))
elif key == "shoebox_background":
shoebox_background = flumpy.from_numpy(np.array(g[key]))
elif key == "shoebox_mask":
shoebox_mask = flumpy.from_numpy(np.array(g[key]))
else:
val = self._convert(key, item)
if val:
table[str(key)] = val

if "panel" in table and "bbox" in table:
table["shoebox"] = flex.shoebox(
table["panel"], table["bbox"], allocate=True
)
if shoebox_mask and shoebox_background and shoebox_data:
dials_array_family_flex_ext.ShoeboxExtractFromData(
table, shoebox_data, shoebox_background, shoebox_mask
)

# Return the list of reflections
return table

def _convert(self, key, data):
# Must allow that the data were written by a program outside of DIALS, so no special
# knowledge of flex types should be assumed.
from dials.array_family import flex

if key == "miller_index": # special
assert len(data.shape) == 2 and data.shape[1] == 3
new = flumpy.miller_index_from_numpy(np.array(data))
elif len(data.shape) == 2:
if data.shape[1] == 3 or data.shape[1] == 2: # vec3 or vec2 double
new = flumpy.vec_from_numpy(np.array(data))
elif data.shape[1] == 6:
new = flex.int6(flumpy.from_numpy(np.array(data).flatten()))
# N.B. flumpy could support this - but int6 only currently defined in dials, so would have to
# move that to dxtbx first.
else:
raise RuntimeError(
"Unrecognised 2D data dimensions (expected shape (N,m)) where m is 2,3 or 6"
)
else:
new = flumpy.from_numpy(np.array(data))
return new


class NewNexusFile:
"""Interface to Nexus file."""

def __init__(self, filename, mode="a"):
"""Open the file with the given mode."""
import h5py

self._handle = h5py.File(filename, mode)

def close(self):
"""Close the file."""
self._handle.close()
del self._handle

def set_data(self, data, encoder):
"""Set the model data using the supplied encoder."""
encoder.encode(data, self._handle)

def get_data(self, decoder):
"""Get the model data using the supplied decoder."""
return decoder.decode(self._handle)

def set_reflections(self, reflections):
"""Set the reflection data."""
self.set_data(reflections, ReflectionListEncoder())

def get_reflections(self):
"""Get the reflection data."""
return self.get_data(ReflectionListDecoder())

0 comments on commit ff3e1d3

Please sign in to comment.