From 9274f7e4619204399c876df72ac22b97ba80924b Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Fri, 29 Dec 2023 23:34:20 +0100 Subject: [PATCH] refactor: input stream abstraction and wide char support --- include/dwarfs/file_access.h | 13 +++++++++ src/dwarfs/file_access_generic.cpp | 42 ++++++++++++++++++++++++++++ src/mkdwarfs_main.cpp | 45 ++++++++++++++++-------------- test/test_helpers.h | 5 ++++ test/test_iolayer.cpp | 29 +++++++++++++++++++ 5 files changed, 113 insertions(+), 21 deletions(-) diff --git a/include/dwarfs/file_access.h b/include/dwarfs/file_access.h index 521d73dfe..9dd002ca4 100644 --- a/include/dwarfs/file_access.h +++ b/include/dwarfs/file_access.h @@ -28,6 +28,14 @@ namespace dwarfs { +class input_stream { + public: + virtual ~input_stream() = default; + + virtual std::istream& is() = 0; + virtual void close(std::error_code& ec) = 0; +}; + class output_stream { public: virtual ~output_stream() = default; @@ -41,6 +49,11 @@ class file_access { virtual ~file_access() = default; virtual bool exists(std::filesystem::path const& path) const = 0; + virtual std::unique_ptr + open_input(std::filesystem::path const& path, std::error_code& ec) const = 0; + virtual std::unique_ptr + open_input_binary(std::filesystem::path const& path, + std::error_code& ec) const = 0; virtual std::unique_ptr open_output_binary(std::filesystem::path const& path, std::error_code& ec) const = 0; diff --git a/src/dwarfs/file_access_generic.cpp b/src/dwarfs/file_access_generic.cpp index 82301c0de..db5106b7e 100644 --- a/src/dwarfs/file_access_generic.cpp +++ b/src/dwarfs/file_access_generic.cpp @@ -43,6 +43,29 @@ void assign_error_code(std::error_code& ec) { #endif } +class file_input_stream : public input_stream { + public: + file_input_stream(std::filesystem::path const& path, std::error_code& ec, + std::ios_base::openmode mode) + : is_{path.string().c_str(), mode} { + if (is_.bad() || is_.fail() || !is_.is_open()) { + assign_error_code(ec); + } + } + + std::istream& is() override { return is_; } + + void close(std::error_code& ec) override { + is_.close(); + if (is_.bad()) { + assign_error_code(ec); + } + } + + private: + std::ifstream is_; +}; + class file_output_stream : public output_stream { public: file_output_stream(std::filesystem::path const& path, std::error_code& ec) @@ -71,6 +94,25 @@ class file_access_generic : public file_access { return std::filesystem::exists(path); } + std::unique_ptr open_input(std::filesystem::path const& path, + std::error_code& ec) const override { + auto rv = std::make_unique(path, ec, std::ios::in); + if (ec) { + rv.reset(); + } + return rv; + } + + std::unique_ptr + open_input_binary(std::filesystem::path const& path, + std::error_code& ec) const override { + auto rv = std::make_unique(path, ec, std::ios::binary); + if (ec) { + rv.reset(); + } + return rv; + } + std::unique_ptr open_output_binary(std::filesystem::path const& path, std::error_code& ec) const override { diff --git a/src/mkdwarfs_main.cpp b/src/mkdwarfs_main.cpp index 42db62738..0f4bc2594 100644 --- a/src/mkdwarfs_main.cpp +++ b/src/mkdwarfs_main.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -357,12 +356,12 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { const size_t num_cpu = std::max(folly::hardware_concurrency(), 1u); segmenter_factory::config sf_config; - sys_string path_str, output_str; - std::string memory_limit, script_arg, header, schema_compression, + sys_string path_str, input_list_str, output_str, header_str; + std::string memory_limit, script_arg, schema_compression, metadata_compression, log_level_str, timestamp, time_resolution, progress_mode, recompress_opts, pack_metadata, file_hash_algo, - debug_filter, max_similarity_size, input_list_str, chmod_str, - history_compression, recompress_categories; + debug_filter, max_similarity_size, chmod_str, history_compression, + recompress_categories; std::vector filter; std::vector order, max_lookback_blocks, window_size, window_step, bloom_filter_size, compression; @@ -415,7 +414,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { po_sys_value(&path_str), "path to root directory or source filesystem") ("input-list", - po::value(&input_list_str), + po_sys_value(&input_list_str), "file containing list of paths relative to root directory") ("output,o", po_sys_value(&output_str), @@ -490,7 +489,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { po::value(&options.with_specials)->zero_tokens(), "include named fifo and sockets") ("header", - po::value(&header), + po_sys_value(&header_str), "prepend output filesystem with contents of this file") ("remove-header", po::value(&remove_header)->zero_tokens(), @@ -757,20 +756,23 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { path = std::filesystem::current_path(); } - std::unique_ptr ifs; + std::filesystem::path input_list_path(input_list_str); + std::unique_ptr ifs; std::istream* is; - if (input_list_str == "-") { + if (input_list_path == "-") { is = &iol.in; } else { - ifs = std::make_unique(input_list_str); + std::error_code ec; + ifs = iol.file->open_input(input_list_path, ec); - if (!ifs->is_open()) { - throw std::runtime_error( - fmt::format("error opening file: {}", input_list_str)); + if (ec) { + throw std::runtime_error(fmt::format("error opening file '{}': {}", + input_list_path.string(), + ec.message())); } - is = ifs.get(); + is = &ifs->is(); } std::string line; @@ -1014,14 +1016,15 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { fswopts.remove_header = remove_header; fswopts.no_section_index = no_section_index; - std::unique_ptr header_ifs; + std::unique_ptr header_ifs; - if (!header.empty()) { - header_ifs = - std::make_unique(header.c_str(), std::ios::binary); - if (header_ifs->bad() || !header_ifs->is_open()) { + if (!header_str.empty()) { + std::filesystem::path header(header_str); + std::error_code ec; + header_ifs = iol.file->open_input_binary(header, ec); + if (ec) { iol.err << "error: cannot open header file '" << header - << "': " << strerror(errno) << "\n"; + << "': " << ec.message() << "\n"; return 1; } } @@ -1204,7 +1207,7 @@ int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { fsw = std::make_unique( fsw_os, lgr, wg_compress, prog, schema_bc, metadata_bc, history_bc, - fswopts, header_ifs.get()); + fswopts, header_ifs ? &header_ifs->is() : nullptr); categorized_option compression_opt; contextual_option_parser cop("--compression", compression_opt, cp, diff --git a/test/test_helpers.h b/test/test_helpers.h index e1fc618c1..f8e410ffc 100644 --- a/test/test_helpers.h +++ b/test/test_helpers.h @@ -161,6 +161,11 @@ class test_terminal : public terminal { class test_file_access : public file_access { public: bool exists(std::filesystem::path const& path) const override; + std::unique_ptr open_input(std::filesystem::path const& path, + std::error_code& ec) const override; + std::unique_ptr + open_input_binary(std::filesystem::path const& path, + std::error_code& ec) const override; std::unique_ptr open_output_binary(std::filesystem::path const& path, std::error_code& ec) const override; diff --git a/test/test_iolayer.cpp b/test/test_iolayer.cpp index 3a1d1a7fe..7bf972509 100644 --- a/test/test_iolayer.cpp +++ b/test/test_iolayer.cpp @@ -31,6 +31,18 @@ namespace dwarfs::test { namespace { +class test_input_stream : public input_stream { + public: + test_input_stream(std::string content) { is_.str(std::move(content)); } + + std::istream& is() override { return is_; } + + void close(std::error_code& /*ec*/) override {} + + private: + std::istringstream is_; +}; + class test_output_stream : public output_stream { public: test_output_stream(std::filesystem::path const& path, std::error_code& ec, @@ -61,6 +73,23 @@ bool test_file_access::exists(std::filesystem::path const& path) const { return files_.find(path) != files_.end(); } +std::unique_ptr +test_file_access::open_input(std::filesystem::path const& path, + std::error_code& ec) const { + auto it = files_.find(path); + if (it != files_.end()) { + return std::make_unique(it->second); + } + ec = std::make_error_code(std::errc::no_such_file_or_directory); + return nullptr; +} + +std::unique_ptr +test_file_access::open_input_binary(std::filesystem::path const& path, + std::error_code& ec) const { + return open_input(path, ec); +} + std::unique_ptr test_file_access::open_output_binary(std::filesystem::path const& path, std::error_code& ec) const {