Skip to content

Commit

Permalink
[MISC] Read input functions
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Aug 25, 2023
1 parent f9d2618 commit c92be4d
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 5 deletions.
90 changes: 90 additions & 0 deletions include/chopper/layout/input.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// ---------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
// ---------------------------------------------------------------------------------------------------

#pragma once

#include <cassert>
#include <charconv>
#include <ranges>

#include <cereal/archives/json.hpp>

#include <chopper/configuration.hpp>

#include <hibf/detail/prefixes.hpp>

namespace chopper::layout
{

inline void read_config_from(configuration & config, std::istream & stream)
{
std::string line;
std::stringstream config_str;

while (std::getline(stream, line) && line != "@CHOPPER_CONFIG")
;

assert(line == "@CHOPPER_CONFIG");

// TODO ##CONFIG: as prefix
while (std::getline(stream, line) && line != "@CHOPPER_CONFIG_END")
{
assert(line.size() >= 2);
assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header);
config_str << line.substr(1); // remove seqan::hibf::prefix::meta_header
}

assert(line == "@CHOPPER_CONFIG_END");

cereal::JSONInputArchive iarchive(config_str);
iarchive(config);
}

inline std::vector<std::vector<std::string>> read_filenames_from(std::istream & stream)
{
std::vector<std::vector<std::string>> filenames{};
std::string line;

while (std::getline(stream, line) && line != "@CHOPPER_USER_BINS")
;

assert(line == "@CHOPPER_USER_BINS");

#ifndef NDEBUG
size_t counter{};
#endif
// TODO ##CONFIG: as prefix
while (std::getline(stream, line) && line != "@CHOPPER_USER_BINS_END")
{
assert(line.size() >= 2);
assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header);

// @0 file1.fa file2.fa
auto const bin_idx_pos = line.find(' ');
assert(bin_idx_pos != std::string::npos);

#ifndef NDEBUG
size_t bin_idx{};
std::from_chars(line.data() + 1, line.data() + bin_idx_pos, bin_idx);
assert(bin_idx == counter++);
#endif

filenames.emplace_back();
std::string_view const filename_str{line.begin() + bin_idx_pos + 1, line.end()};
for (auto const && filename : std::views::split(filename_str, ' '))
{
auto common_view = std::views::common(filename);
filenames.back().emplace_back(common_view.begin(), common_view.end());
}
}

assert(line == "@CHOPPER_USER_BINS_END");

return filenames;
}

} // namespace chopper::layout
18 changes: 13 additions & 5 deletions src/display_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <seqan3/search/views/kmer_hash.hpp>

#include <chopper/layout/hibf_statistics.hpp>
#include <chopper/stats/read_layout_file.hpp> // for read_layout_file
#include <chopper/layout/input.hpp>

#include <hibf/detail/sketch/hyperloglog.hpp>

Expand Down Expand Up @@ -119,11 +119,19 @@ void process_file(std::string const & filename,
}
}

int execute(std::filesystem::path const & layout_file)
int execute(std::filesystem::path const & layout_filename)
{
std::vector<std::vector<std::string>> filenames;
chopper::configuration chopper_config;
seqan::hibf::layout::layout hibf_layout = chopper::stats::read_layout_file(chopper_config, filenames, layout_file);
std::ifstream layout_file{layout_filename};

if (!layout_file.good() || !layout_file.is_open())
throw std::logic_error{"Could not open file " + layout_filename.string() + " for reading"}; // GCOVR_EXCL_LINE

std::vector<std::vector<std::string>> filenames = chopper::layout::read_filenames_from(layout_file);
chopper::layout::read_config_from(chopper_config, layout_file);
chopper_config.hibf_config.read_from(layout_file);
seqan::hibf::layout::layout hibf_layout{};
hibf_layout.read_from(layout_file);
auto const & hibf_config = chopper_config.hibf_config;

// Fetch all file sizes such that sorting by file size doesn't have to access the filesystem too often.
Expand Down Expand Up @@ -183,7 +191,7 @@ int execute(std::filesystem::path const & layout_file)
size_t current_idx{}; // The current top-level technical bin index

// Stats file header
std::cout << "# Layout: " << layout_file.c_str() << '\n' //
std::cout << "# Layout: " << layout_filename.c_str() << '\n' //
<< "tb_index\t"
<< "size\t"
<< "shared_size\t"
Expand Down

0 comments on commit c92be4d

Please sign in to comment.