Skip to content

Commit

Permalink
Merge branch 'chunk_size' into 'master'
Browse files Browse the repository at this point in the history
[MeL/IO] HDF5: Specify chunk size in bytes

See merge request ogs/ogs!4777
  • Loading branch information
TobiasMeisel committed Nov 13, 2023
2 parents 0e18d5b + 35a404a commit 1bf154a
Show file tree
Hide file tree
Showing 23 changed files with 239 additions and 531 deletions.
3 changes: 2 additions & 1 deletion Applications/Utils/FileConverter/PVD2XDMF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ int main(int argc, char* argv[])
mesh_xdmf_hdf_writer = std::make_unique<MeshLib::IO::XdmfHdfWriter>(
std::vector{std::cref(*main_mesh)}, output_file_path,
0 /*timestep*/, time, variable_output_names,
true /*output_file.compression*/, 1 /*output_file.n_files*/);
true /*output_file.compression*/, 1 /*output_file.n_files*/,
1048576 /*chunk_size_bytes*/);
}

for (std::size_t timestep = 1; timestep < timeseries.size(); ++timestep)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Group of parameters when XDMF/HDF writer is used.
Type of output in time_loop must be XDMF
Type of output must be XDMF
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
\copydoc ProcessLib::OutputXDMFHDF5Format::chunk_size_bytes
29 changes: 20 additions & 9 deletions MeshLib/IO/XDMF/HdfData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,34 @@ static hid_t meshPropertyType2HdfType(MeshPropertyDataType const ogs_data_type)
HdfData::HdfData(void const* data_start, std::size_t const size_partitioned_dim,
std::size_t const size_tuple, std::string const& name,
MeshPropertyDataType const mesh_property_data_type,
unsigned int const n_files)
unsigned int const n_files,
unsigned int const chunk_size_bytes)
: data_start(data_start), name(name)
{
data_type = meshPropertyType2HdfType(mesh_property_data_type);

auto const& partition_info =
getPartitionInfo(size_partitioned_dim, n_files);
auto const& offset_partitioned_dim = partition_info.local_offset;
offsets = {offset_partitioned_dim, 0};

std::size_t unified_length = partition_info.local_length;
std::size_t const unified_length = partition_info.local_length;
int const type_size = H5Tget_size(data_type);
std::size_t const space =
(chunk_size_bytes > 0)
? std::min(std::size_t(std::lround(float(chunk_size_bytes) /
(size_tuple * type_size) +
0.5)),
partition_info.global_length)
: partition_info.longest_local_length;

chunk_space =
(size_tuple > 1)
? std::vector<Hdf5DimType>{partition_info.longest_local_length,
size_tuple}
: std::vector<Hdf5DimType>{partition_info.longest_local_length};
if (chunk_size_bytes > 0 && space == partition_info.global_length)
{
INFO("HDF5: Using a single chunk for dataset {:s} .", name);
}

chunk_space = (size_tuple > 1) ? std::vector<Hdf5DimType>{space, size_tuple}
: std::vector<Hdf5DimType>{space};

data_space = (size_tuple > 1)
? std::vector<Hdf5DimType>{unified_length, size_tuple}
Expand All @@ -69,8 +82,6 @@ HdfData::HdfData(void const* data_start, std::size_t const size_partitioned_dim,
? std::vector<Hdf5DimType>{partition_info.global_length, size_tuple}
: std::vector<Hdf5DimType>{partition_info.global_length};

data_type = meshPropertyType2HdfType(mesh_property_data_type);

DBUG(
"HDF: dataset name: {:s}, offset: {:d}, data_space: {:d}, chunk_space "
"{:d}, file_space: {:d}, tuples: {:d}",
Expand Down
3 changes: 2 additions & 1 deletion MeshLib/IO/XDMF/HdfData.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ struct HdfData final
{
HdfData(void const* data_start, std::size_t size_partitioned_dim,
std::size_t size_tuple, std::string const& name,
MeshPropertyDataType mesh_property_data_type, unsigned int n_files);
MeshPropertyDataType mesh_property_data_type, unsigned int n_files,
unsigned int chunk_size_bytes);
void const* data_start;
std::vector<Hdf5DimType> data_space;
std::vector<Hdf5DimType> offsets;
Expand Down
23 changes: 13 additions & 10 deletions MeshLib/IO/XDMF/XdmfHdfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ XdmfHdfWriter::XdmfHdfWriter(
std::filesystem::path const& filepath, unsigned long long const time_step,
double const initial_time,
std::set<std::string> const& variable_output_names,
bool const use_compression, unsigned int const n_files)
bool const use_compression, unsigned int const n_files,
unsigned int const chunk_size_bytes)
{
// ogs meshes to vector of Xdmf/HDF meshes (we keep Xdmf and HDF together
// because XDMF depends on HDF) to meta
Expand All @@ -81,12 +82,12 @@ XdmfHdfWriter::XdmfHdfWriter(
// Transform the data to be written into a format conforming with the rules
// of xdmf topology and geometry
auto const transform_ogs_mesh_data_to_xdmf_conforming_data =
[&n_files](auto const& mesh)
[&n_files, &chunk_size_bytes](auto const& mesh)
{
auto flattened_geometry_values = transformToXDMFGeometry(mesh);
// actually this line is only needed to calculate the offset
XdmfHdfData const& geometry =
transformGeometry(mesh, flattened_geometry_values.data(), n_files);
XdmfHdfData const& geometry = transformGeometry(
mesh, flattened_geometry_values.data(), n_files, chunk_size_bytes);
auto const flattened_topology_values =
transformToXDMFTopology(mesh, geometry.hdf.offsets[0]);
return std::make_unique<TransformedMeshData>(
Expand All @@ -96,19 +97,21 @@ XdmfHdfWriter::XdmfHdfWriter(

// create metadata for transformed data and original ogs mesh data
auto const transform_to_meta_data =
[&transform_ogs_mesh_data_to_xdmf_conforming_data,
&n_files](auto const& mesh)
[&transform_ogs_mesh_data_to_xdmf_conforming_data, &n_files,
&chunk_size_bytes](auto const& mesh)
{
// important: transformed data must survive and be unique, raw pointer
// to its memory!
std::unique_ptr<TransformedMeshData> xdmf_conforming_data =
transform_ogs_mesh_data_to_xdmf_conforming_data(mesh);
auto const geometry = transformGeometry(
mesh, xdmf_conforming_data->flattened_geometry_values.data(),
n_files);
auto const topology = transformTopology(
xdmf_conforming_data->flattened_topology_values, n_files);
auto const attributes = transformAttributes(mesh, n_files);
n_files, chunk_size_bytes);
auto const topology =
transformTopology(xdmf_conforming_data->flattened_topology_values,
n_files, chunk_size_bytes);
auto const attributes =
transformAttributes(mesh, n_files, chunk_size_bytes);
return XdmfHdfMesh{std::move(geometry), std::move(topology),
std::move(attributes), mesh.get().getName(),
std::move(xdmf_conforming_data)};
Expand Down
5 changes: 4 additions & 1 deletion MeshLib/IO/XDMF/XdmfHdfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,15 @@ class XdmfHdfWriter final
* @param use_compression if true, zlib compression in HDFWriter component
* is used
* @param n_files number of hdf5 output files
* @param chunk_size_bytes Data will be split into chunks. The parameter
* specifies the size (in bytes) of the largest chunk.
*/
XdmfHdfWriter(
std::vector<std::reference_wrapper<const MeshLib::Mesh>> const& meshes,
std::filesystem::path const& filepath, unsigned long long time_step,
double initial_time, std::set<std::string> const& variable_output_names,
bool use_compression, unsigned int n_files);
bool use_compression, unsigned int n_files,
unsigned int chunk_size_bytes);

/**
* \brief Adds data for either lazy (xdmf) or eager (hdf) writing algorithm
Expand Down
2 changes: 2 additions & 0 deletions MeshLib/IO/XDMF/mpi/fileIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ hid_t createFile(std::filesystem::path const& filepath,
hid_t const plist_id = H5Pcreate(H5P_FILE_ACCESS);

H5Pset_fapl_mpio(plist_id, comm, info);
H5Pset_coll_metadata_write(plist_id, true);

std::filesystem::path const partition_filename =
partitionFilename(filepath, communicator.color);
hid_t file = H5Fcreate(partition_filename.string().c_str(), H5F_ACC_TRUNC,
Expand Down
32 changes: 19 additions & 13 deletions MeshLib/IO/XDMF/transformData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ constexpr auto cellTypeOGS2XDMF(MeshLib::CellType const& cell_type)

std::optional<XdmfHdfData> transformAttribute(
std::pair<std::string, PropertyVectorBase*> const& property_pair,
unsigned int const n_files)
unsigned int const n_files, unsigned int const chunk_size_bytes)
{
// 3 data that will be captured and written by lambda f below
MeshPropertyDataType data_type = MeshPropertyDataType::unknown;
Expand Down Expand Up @@ -189,8 +189,8 @@ std::optional<XdmfHdfData> transformAttribute(

std::string const& name = property_base->getPropertyName();

HdfData hdf = {data_ptr, num_of_tuples, ui_global_components,
name, data_type, n_files};
HdfData hdf = {data_ptr, num_of_tuples, ui_global_components, name,
data_type, n_files, chunk_size_bytes};

XdmfData xdmf = {num_of_tuples, ui_global_components, data_type,
name, mesh_item_type, 0,
Expand All @@ -199,8 +199,9 @@ std::optional<XdmfHdfData> transformAttribute(
return XdmfHdfData{std::move(hdf), std::move(xdmf)};
}

std::vector<XdmfHdfData> transformAttributes(MeshLib::Mesh const& mesh,
unsigned int const n_files)
std::vector<XdmfHdfData> transformAttributes(
MeshLib::Mesh const& mesh, unsigned int const n_files,
unsigned int const chunk_size_bytes)
{
MeshLib::Properties const& properties = mesh.getProperties();

Expand All @@ -220,7 +221,10 @@ std::vector<XdmfHdfData> transformAttributes(MeshLib::Mesh const& mesh,
}

if (auto const attribute = transformAttribute(
std::pair(std::string(name), property_base), n_files))

std::pair(std::string(name), property_base), n_files,
chunk_size_bytes))

{
attributes.push_back(attribute.value());
}
Expand Down Expand Up @@ -248,9 +252,9 @@ std::vector<double> transformToXDMFGeometry(MeshLib::Mesh const& mesh)
return values;
}

XdmfHdfData transformGeometry(MeshLib::Mesh const& mesh,
double const* data_ptr,
unsigned int const n_files)
XdmfHdfData transformGeometry(MeshLib::Mesh const& mesh, double const* data_ptr,
unsigned int const n_files,
unsigned int const chunk_size_bytes)
{
std::string const name = "geometry";
std::vector<MeshLib::Node*> const& nodes = mesh.getNodes();
Expand All @@ -263,7 +267,8 @@ XdmfHdfData transformGeometry(MeshLib::Mesh const& mesh,
point_size,
name,
MeshPropertyDataType::float64,
n_files};
n_files,
chunk_size_bytes};
XdmfData const xdmf = {
partition_dim, point_size, MeshPropertyDataType::float64,
name, std::nullopt, 2,
Expand Down Expand Up @@ -300,12 +305,13 @@ std::vector<int> transformToXDMFTopology(MeshLib::Mesh const& mesh,
}

XdmfHdfData transformTopology(std::vector<int> const& values,
unsigned int const n_files)
unsigned int const n_files,
unsigned int const chunk_size_bytes)
{
std::string const name = "topology";
HdfData const hdf = {
values.data(), values.size(), 1, name, MeshPropertyDataType::int32,
n_files};
values.data(), values.size(), 1, name, MeshPropertyDataType::int32,
n_files, chunk_size_bytes};
XdmfData const xdmf = {
values.size(), 1, MeshPropertyDataType::int32, name, std::nullopt, 3,
n_files};
Expand Down
17 changes: 13 additions & 4 deletions MeshLib/IO/XDMF/transformData.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,38 @@ namespace MeshLib::IO
* \param mesh OGS mesh can be mesh or partitionedMesh
* \param n_files specifies the number of files. If greater than 1 it groups the
* data of each process to n_files
* \return vector of meta data
* @param chunk_size_bytes Data will be split into chunks. The parameter
* specifies the size (in bytes) of the largest chunk.
* @return vector of meta data
*/
std::vector<XdmfHdfData> transformAttributes(MeshLib::Mesh const& mesh,
unsigned int n_files);
unsigned int n_files,
unsigned int chunk_size_bytes);
/**
* \brief Create meta data for geometry used for hdf5 and xdmf
* \param mesh OGS mesh can be mesh or partitionedMesh
* \param data_ptr Memory location of geometry values.
* \param n_files specifies the number of files. If greater than 1 it groups the
* data of each process to n_files
* \param chunk_size_bytes Data will be split into chunks. The parameter
* specifies the size (in bytes) of the largest chunk.
* \return Geometry meta data
*/
XdmfHdfData transformGeometry(MeshLib::Mesh const& mesh, double const* data_ptr,
unsigned int n_files);
unsigned int n_files,
unsigned int chunk_size_bytes);
/**
* \brief Create meta data for topology used for HDF5 and XDMF
* \param values actual topology values to get size and memory location
* \param n_files specifies the number of files. If greater than 1 it groups the
* data of each process to n_files
* \param chunk_size_bytes Data will be split into chunks. The parameter
* specifies the size (in bytes) of the largest chunk.
* \return Topology meta data
*/
XdmfHdfData transformTopology(std::vector<int> const& values,
unsigned int n_files);
unsigned int n_files,
unsigned int chunk_size_bytes);
/**
* \brief Copies all node points into a new vector. Contiguous data used for
* writing. Conform with XDMF standard in
Expand Down
2 changes: 1 addition & 1 deletion MeshLib/IO/writeMeshToFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ int writeMeshToFile(const MeshLib::Mesh& mesh,
const std::reference_wrapper<const MeshLib::Mesh> mr = mesh;
meshes.push_back(mr);
MeshLib::IO::XdmfHdfWriter(std::move(meshes), file_path, 0, 0.0,
variable_output_names, true, 1);
variable_output_names, true, 1, 1048576);
return 0;
}
ERR("writeMeshToFile(): Unknown file extension '{:s}'. Can not write file "
Expand Down
7 changes: 4 additions & 3 deletions ProcessLib/Output/CreateOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ namespace ProcessLib
std::unique_ptr<OutputFormat> createOutputFormat(
std::string const& output_directory, OutputType const output_type,
std::string prefix, std::string suffix, std::string const& data_mode,
bool const compress_output, unsigned int const number_of_files)
bool const compress_output, unsigned int const number_of_files,
unsigned int const chunk_size_bytes)
{
switch (output_type)
{
Expand All @@ -72,7 +73,7 @@ std::unique_ptr<OutputFormat> createOutputFormat(
case OutputType::xdmf:
return std::make_unique<OutputXDMFHDF5Format>(
output_directory, std::move(prefix), std::move(suffix),
compress_output, number_of_files);
compress_output, number_of_files, chunk_size_bytes);
default:
OGS_FATAL(
"No supported file type provided. Read '{}' from "
Expand All @@ -87,7 +88,7 @@ Output createOutput(OutputConfig&& oc, std::string const& output_directory,
auto output_format = createOutputFormat(
output_directory, oc.output_type, std::move(oc.prefix),
std::move(oc.suffix), oc.data_mode, oc.compress_output,
oc.number_of_files);
oc.number_of_files, oc.chunk_size_bytes);

OutputDataSpecification output_data_specification{
std::move(oc.output_variables), std::move(oc.fixed_output_times),
Expand Down
10 changes: 10 additions & 0 deletions ProcessLib/Output/CreateOutputConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ OutputConfig createOutputConfig(
}
return 1;
}();
output_config.chunk_size_bytes = [&hdf]() -> unsigned int
{
if (hdf)
{
//! \ogs_file_param{prj__time_loop__output__hdf__chunk_size_bytes}
return hdf->getConfigParameter<unsigned int>("chunk_size_bytes");
}
return 1048576; // default chunk size in bytes according to
// https://www.hdfgroup.org/2022/10/improve-hdf5-performance-using-caching/
}();

output_config.data_mode =
//! \ogs_file_param{prj__time_loop__output__data_mode}
Expand Down
1 change: 1 addition & 0 deletions ProcessLib/Output/OutputConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct OutputConfig
std::string suffix;
bool compress_output;
unsigned int number_of_files;
unsigned int chunk_size_bytes;
std::string data_mode;
/// A list of repeat/step-count pairs. If the list is empty, and no
/// fixed_output_times were specified, a default pair 1/1 will be inserted
Expand Down
3 changes: 2 additions & 1 deletion ProcessLib/Output/OutputFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ void OutputXDMFHDF5Format::outputMeshXdmf(
iteration);
std::filesystem::path path(BaseLib::joinPaths(directory, name));
mesh_xdmf_hdf_writer = std::make_unique<MeshLib::IO::XdmfHdfWriter>(
meshes, path, timestep, t, output_variables, compression, n_files);
meshes, path, timestep, t, output_variables, compression, n_files,
chunk_size_bytes);
}
else
{
Expand Down
8 changes: 6 additions & 2 deletions ProcessLib/Output/OutputFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,12 @@ struct OutputXDMFHDF5Format final : public OutputFormat
{
OutputXDMFHDF5Format(std::string const& directory, std::string prefix,
std::string suffix, bool const compression,
unsigned int const n_files)
unsigned int const n_files,
unsigned int const chunk_size_bytes)
: OutputFormat(directory, std::move(prefix), std::move(suffix),
compression),
n_files(n_files)
n_files(n_files),
chunk_size_bytes(chunk_size_bytes)
{
}

Expand All @@ -117,6 +119,8 @@ struct OutputXDMFHDF5Format final : public OutputFormat
mutable std::unique_ptr<MeshLib::IO::XdmfHdfWriter> mesh_xdmf_hdf_writer;
//! Specifies the number of hdf5 output files.
unsigned int n_files;
//! Specifies the chunks size in bytes per hdf5 output file.
unsigned int const chunk_size_bytes;

void outputMeshXdmf(
std::set<std::string> const& output_variables,
Expand Down
Loading

0 comments on commit 1bf154a

Please sign in to comment.