diff --git a/include/chopper/layout/input.hpp b/include/chopper/layout/input.hpp new file mode 100644 index 00000000..6b194b1a --- /dev/null +++ b/include/chopper/layout/input.hpp @@ -0,0 +1,90 @@ +// --------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md +// --------------------------------------------------------------------------------------------------- + +#pragma once + +#include +#include +#include + +#include + +#include + +#include + +namespace chopper::layout +{ + +inline void read_config_from(configuration & config, std::istream & stream) +{ + std::string line; + std::stringstream config_str; + + while (std::getline(stream, line) && line != "@CHOPPER_CONFIG") + ; + + assert(line == "@CHOPPER_CONFIG"); + + // TODO ##CONFIG: as prefix + while (std::getline(stream, line) && line != "@CHOPPER_CONFIG_END") + { + assert(line.size() >= 2); + assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header); + config_str << line.substr(1); // remove seqan::hibf::prefix::meta_header + } + + assert(line == "@CHOPPER_CONFIG_END"); + + cereal::JSONInputArchive iarchive(config_str); + iarchive(config); +} + +inline std::vector> read_filenames_from(std::istream & stream) +{ + std::vector> filenames{}; + std::string line; + + while (std::getline(stream, line) && line != "@CHOPPER_USER_BINS") + ; + + assert(line == "@CHOPPER_USER_BINS"); + +#ifndef NDEBUG + size_t counter{}; +#endif + // TODO ##CONFIG: as prefix + while (std::getline(stream, line) && line != "@CHOPPER_USER_BINS_END") + { + assert(line.size() >= 2); + assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header); + + // @0 file1.fa file2.fa + auto const bin_idx_pos = line.find(' '); + assert(bin_idx_pos != std::string::npos); + +#ifndef NDEBUG + size_t bin_idx{}; + std::from_chars(line.data() + 1, line.data() + bin_idx_pos, bin_idx); + assert(bin_idx == counter++); +#endif + + filenames.emplace_back(); + std::string_view const filename_str{line.begin() + bin_idx_pos + 1, line.end()}; + for (auto const && filename : std::views::split(filename_str, ' ')) + { + auto common_view = std::views::common(filename); + filenames.back().emplace_back(common_view.begin(), common_view.end()); + } + } + + assert(line == "@CHOPPER_USER_BINS_END"); + + return filenames; +} + +} // namespace chopper::layout diff --git a/src/display_layout.cpp b/src/display_layout.cpp index 041cde8c..d71939c9 100644 --- a/src/display_layout.cpp +++ b/src/display_layout.cpp @@ -18,7 +18,7 @@ #include #include -#include // for read_layout_file +#include #include @@ -119,11 +119,19 @@ void process_file(std::string const & filename, } } -int execute(std::filesystem::path const & layout_file) +int execute(std::filesystem::path const & layout_filename) { - std::vector> filenames; chopper::configuration chopper_config; - seqan::hibf::layout::layout hibf_layout = chopper::stats::read_layout_file(chopper_config, filenames, layout_file); + std::ifstream layout_file{layout_filename}; + + if (!layout_file.good() || !layout_file.is_open()) + throw std::logic_error{"Could not open file " + layout_filename.string() + " for reading"}; // GCOVR_EXCL_LINE + + std::vector> filenames = chopper::layout::read_filenames_from(layout_file); + chopper::layout::read_config_from(chopper_config, layout_file); + chopper_config.hibf_config.read_from(layout_file); + seqan::hibf::layout::layout hibf_layout{}; + hibf_layout.read_from(layout_file); auto const & hibf_config = chopper_config.hibf_config; // Fetch all file sizes such that sorting by file size doesn't have to access the filesystem too often. @@ -183,7 +191,7 @@ int execute(std::filesystem::path const & layout_file) size_t current_idx{}; // The current top-level technical bin index // Stats file header - std::cout << "# Layout: " << layout_file.c_str() << '\n' // + std::cout << "# Layout: " << layout_filename.c_str() << '\n' // << "tb_index\t" << "size\t" << "shared_size\t"