diff --git a/doc/dwarfs.md b/doc/dwarfs.md index 590c69ef0..b56c9acd9 100644 --- a/doc/dwarfs.md +++ b/doc/dwarfs.md @@ -66,6 +66,11 @@ options: This is only useful for images that have some header located before the actual filesystem data. +- `-o imagesize=`*value*: + Explicitly set the size of the filesystem image in bytes, + starting from the offset. This can be used in cases where + the image is embedded in a larger file. + - `-o mlock=none`|`try`|`must`: Set this to `try` or `must` instead of the default `none` to try or require `mlock()`ing of the file system metadata into diff --git a/include/dwarfs/reader/filesystem_options.h b/include/dwarfs/reader/filesystem_options.h index e1aa62516..a0fccc8b9 100644 --- a/include/dwarfs/reader/filesystem_options.h +++ b/include/dwarfs/reader/filesystem_options.h @@ -21,6 +21,8 @@ #pragma once +#include + #include #include #include @@ -34,6 +36,7 @@ struct filesystem_options { mlock_mode lock_mode{mlock_mode::NONE}; file_off_t image_offset{0}; + file_off_t image_size{std::numeric_limits::max()}; block_cache_options block_cache{}; metadata_options metadata{}; inode_reader_options inode_reader{}; diff --git a/include/dwarfs/reader/internal/filesystem_parser.h b/include/dwarfs/reader/internal/filesystem_parser.h index 007a70650..ed7a514f7 100644 --- a/include/dwarfs/reader/internal/filesystem_parser.h +++ b/include/dwarfs/reader/internal/filesystem_parser.h @@ -45,8 +45,9 @@ class filesystem_parser { public: static file_off_t find_image_offset(mmif& mm, file_off_t image_offset); - explicit filesystem_parser(std::shared_ptr mm, - file_off_t image_offset = 0); + explicit filesystem_parser( + std::shared_ptr mm, file_off_t image_offset = 0, + file_off_t image_size = std::numeric_limits::max()); std::optional next_section(); @@ -74,6 +75,7 @@ class filesystem_parser { std::shared_ptr mm_; file_off_t const image_offset_{0}; + file_off_t const image_size_{std::numeric_limits::max()}; file_off_t offset_{0}; int version_{0}; uint8_t major_{0}; diff --git a/src/reader/filesystem_v2.cpp b/src/reader/filesystem_v2.cpp index e3f2bac83..fe957b8ee 100644 --- a/src/reader/filesystem_v2.cpp +++ b/src/reader/filesystem_v2.cpp @@ -308,7 +308,8 @@ class filesystem_ final : public filesystem_v2::impl { return meta_.get_all_gids(); } std::shared_ptr get_parser() const override { - return std::make_unique(mm_, image_offset_); + return std::make_unique(mm_, image_offset_, + options_.image_size); } std::optional get_block_category(size_t block_no) const override { @@ -341,6 +342,7 @@ class filesystem_ final : public filesystem_v2::impl { mutable std::unique_ptr fsinfo_; history history_; file_off_t const image_offset_; + filesystem_options const options_; PERFMON_CLS_PROXY_DECL PERFMON_CLS_TIMER_DECL(find_path) PERFMON_CLS_TIMER_DECL(find_inode) @@ -380,7 +382,7 @@ filesystem_::get_info(fsinfo_options const& opts) const { std::lock_guard lock(mx_); if (!fsinfo_ || opts.block_access > fsinfo_block_access_level_) { - filesystem_parser parser(mm_, image_offset_); + filesystem_parser parser(mm_, image_offset_, options_.image_size); filesystem_info info; parser.rewind(); @@ -429,14 +431,15 @@ filesystem_::get_info(fsinfo_options const& opts) const { template filesystem_::filesystem_( logger& lgr, os_access const& os, std::shared_ptr mm, - const filesystem_options& options, + filesystem_options const& options, std::shared_ptr perfmon) : LOG_PROXY_INIT(lgr) , os_{os} , mm_{std::move(mm)} , history_({.with_timestamps = true}) - , image_offset_{filesystem_parser::find_image_offset( - *mm_, options.image_offset)} // clang-format off + , image_offset_{filesystem_parser::find_image_offset(*mm_, + options.image_offset)} + , options_{options} // clang-format off PERFMON_CLS_PROXY_INIT(perfmon, "filesystem_v2") PERFMON_CLS_TIMER_INIT(find_path) PERFMON_CLS_TIMER_INIT(find_inode) @@ -465,7 +468,7 @@ filesystem_::filesystem_( PERFMON_CLS_TIMER_INIT(readv_future_ec) // clang-format on { block_cache cache(lgr, os_, mm_, options.block_cache, perfmon); - filesystem_parser parser(mm_, image_offset_); + filesystem_parser parser(mm_, image_offset_, options.image_size); if (parser.has_index()) { LOG_DEBUG << "found valid section index"; @@ -531,7 +534,7 @@ filesystem_::filesystem_( template int filesystem_::check(filesystem_check_level level, size_t num_threads) const { - filesystem_parser parser(mm_, image_offset_); + filesystem_parser parser(mm_, image_offset_, options_.image_size); worker_group wg(LOG_GET_LOGGER, os_, "fscheck", num_threads); std::vector> sections; @@ -593,7 +596,7 @@ int filesystem_::check(filesystem_check_level level, template void filesystem_::dump(std::ostream& os, fsinfo_options const& opts) const { - filesystem_parser parser(mm_, image_offset_); + filesystem_parser parser(mm_, image_offset_, options_.image_size); if (opts.features.has(fsinfo_feature::version)) { os << "DwarFS version " << parser.version(); @@ -662,7 +665,7 @@ std::string filesystem_::dump(fsinfo_options const& opts) const { template nlohmann::json filesystem_::info_as_json(fsinfo_options const& opts) const { - filesystem_parser parser(mm_, image_offset_); + filesystem_parser parser(mm_, image_offset_, options_.image_size); auto info = nlohmann::json::object(); diff --git a/src/reader/internal/filesystem_parser.cpp b/src/reader/internal/filesystem_parser.cpp index 787431e33..2167b8d13 100644 --- a/src/reader/internal/filesystem_parser.cpp +++ b/src/reader/internal/filesystem_parser.cpp @@ -109,10 +109,13 @@ filesystem_parser::find_image_offset(mmif& mm, file_off_t image_offset) { } filesystem_parser::filesystem_parser(std::shared_ptr mm, - file_off_t image_offset) + file_off_t image_offset, + file_off_t image_size) : mm_{std::move(mm)} - , image_offset_{find_image_offset(*mm_, image_offset)} { - if (mm_->size() < image_offset_ + sizeof(file_header)) { + , image_offset_{find_image_offset(*mm_, image_offset)} + , image_size_{ + std::min(image_size, mm_->size() - image_offset_)} { + if (image_size_ < static_cast(sizeof(file_header))) { DWARFS_THROW(runtime_error, "file too small"); } @@ -143,7 +146,7 @@ filesystem_parser::filesystem_parser(std::shared_ptr mm, std::optional filesystem_parser::next_section() { if (index_.empty()) { - if (offset_ < static_cast(mm_->size())) { + if (offset_ < image_offset_ + image_size_) { auto section = fs_section(*mm_, offset_, version_); offset_ = section.end(); return section; @@ -154,7 +157,7 @@ std::optional filesystem_parser::next_section() { uint64_t offset = id & section_offset_mask; uint64_t next_offset = offset_ < static_cast(index_.size()) ? index_[offset_] & section_offset_mask - : mm_->size() - image_offset_; + : image_size_; return fs_section(mm_, static_cast(id >> 48), image_offset_ + offset, next_offset - offset, version_); } @@ -189,7 +192,9 @@ bool filesystem_parser::has_checksums() const { return version_ >= 2; } bool filesystem_parser::has_index() const { return !index_.empty(); } -size_t filesystem_parser::filesystem_size() const { return mm_->size(); } +size_t filesystem_parser::filesystem_size() const { + return image_offset_ + image_size_; +} std::span filesystem_parser::section_data(fs_section const& s) const { @@ -199,14 +204,15 @@ filesystem_parser::section_data(fs_section const& s) const { void filesystem_parser::find_index() { uint64_t index_pos; - ::memcpy(&index_pos, mm_->as(mm_->size() - sizeof(uint64_t)), + ::memcpy(&index_pos, + mm_->as(image_offset_ + image_size_ - sizeof(uint64_t)), sizeof(uint64_t)); if ((index_pos >> 48) == static_cast(section_type::SECTION_INDEX)) { index_pos &= section_offset_mask; index_pos += image_offset_; - if (index_pos < mm_->size()) { + if (index_pos < static_cast(image_offset_ + image_size_)) { auto section = fs_section(*mm_, index_pos, version_); if (section.check_fast(*mm_)) { diff --git a/test/dwarfs_test.cpp b/test/dwarfs_test.cpp index 6a16b5902..182b1e183 100644 --- a/test/dwarfs_test.cpp +++ b/test/dwarfs_test.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -81,6 +82,8 @@ build_dwarfs(logger& lgr, std::shared_ptr input, std::string const& compression, writer::segmenter::config const& cfg = writer::segmenter::config(), writer::scanner_options const& options = writer::scanner_options(), + writer::filesystem_writer_options const& writer_opts = + writer::filesystem_writer_options(), writer::writer_progress* prog = nullptr, std::shared_ptr ftd = nullptr, std::optional> input_list = @@ -120,7 +123,7 @@ build_dwarfs(logger& lgr, std::shared_ptr input, std::ostringstream oss; block_compressor bc(compression); - writer::filesystem_writer fsw(oss, lgr, pool, *prog); + writer::filesystem_writer fsw(oss, lgr, pool, *prog, writer_opts); fsw.add_default_compressor(bc); s.scan(fsw, std::filesystem::path("/"), *prog, input_list); @@ -187,7 +190,7 @@ void basic_end_to_end_test( auto ftd = std::make_shared(); auto fsimage = - build_dwarfs(lgr, input, compressor, cfg, options, &wprog, ftd); + build_dwarfs(lgr, input, compressor, cfg, options, {}, &wprog, ftd); EXPECT_EQ(14, ftd->filter_calls.size()); EXPECT_EQ(15, ftd->transform_calls.size()); @@ -1046,7 +1049,7 @@ TEST_P(filter_test, filesystem) { writer::scanner_options options; options.remove_empty_dirs = true; - auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr, + auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, {}, nullptr, nullptr, std::nullopt, std::move(rbf)); auto mm = std::make_shared(std::move(fsimage)); @@ -1136,8 +1139,8 @@ TEST(file_scanner, input_list) { "foo.pl", }; - auto fsimage = build_dwarfs(lgr, input, "null", bmcfg, opts, nullptr, nullptr, - input_list); + auto fsimage = build_dwarfs(lgr, input, "null", bmcfg, opts, {}, nullptr, + nullptr, input_list); auto mm = std::make_shared(std::move(fsimage)); @@ -1926,3 +1929,73 @@ TEST(filesystem, inode_size_cache) { EXPECT_EQ(st.size(), size); } } + +TEST(filesystem, multi_image) { + test::test_logger lgr; + std::string data("header"); + std::vector> images; + + for (std::string str : {"foo", "bar", "baz"}) { + auto input = std::make_shared(); + input->add_dir(""); + input->add_file(str, str); + auto img = build_dwarfs(lgr, input, "null", {}, {}, + {.no_section_index = str == "bar"}); + images.emplace_back(data.size(), img.size()); + data += img; + data += "filler"; + } + + auto mm = std::make_shared(std::move(data)); + auto os = std::make_shared(); + + std::vector fss; + + for (size_t i = 0; i < images.size(); ++i) { + fss.emplace_back( + lgr, *os, mm, + reader::filesystem_options{.image_offset = images[i].first, + .image_size = images[i].second}); + } + + ASSERT_EQ(3, fss.size()); + + { + auto& fs = fss[0]; + auto foo = fs.find("/foo"); + auto bar = fs.find("/bar"); + auto baz = fs.find("/baz"); + + ASSERT_TRUE(foo); + EXPECT_FALSE(bar); + EXPECT_FALSE(baz); + + EXPECT_EQ("foo", fs.read_string(fs.open(foo->inode()))); + } + + { + auto& fs = fss[1]; + auto foo = fs.find("/foo"); + auto bar = fs.find("/bar"); + auto baz = fs.find("/baz"); + + EXPECT_FALSE(foo); + ASSERT_TRUE(bar); + EXPECT_FALSE(baz); + + EXPECT_EQ("bar", fs.read_string(fs.open(bar->inode()))); + } + + { + auto& fs = fss[2]; + auto foo = fs.find("/foo"); + auto bar = fs.find("/bar"); + auto baz = fs.find("/baz"); + + EXPECT_FALSE(foo); + EXPECT_FALSE(bar); + ASSERT_TRUE(baz); + + EXPECT_EQ("baz", fs.read_string(fs.open(baz->inode()))); + } +} diff --git a/tools/src/dwarfs_main.cpp b/tools/src/dwarfs_main.cpp index 6c822ea50..fc9f09c39 100644 --- a/tools/src/dwarfs_main.cpp +++ b/tools/src/dwarfs_main.cpp @@ -164,6 +164,7 @@ struct options { char const* mlock_str{nullptr}; // TODO: const?? -> use string? char const* decompress_ratio_str{nullptr}; // TODO: const?? -> use string? char const* image_offset_str{nullptr}; // TODO: const?? -> use string? + char const* image_size_str{nullptr}; // TODO: const?? -> use string? char const* cache_tidy_strategy_str{nullptr}; // TODO: const?? -> use string? char const* cache_tidy_interval_str{nullptr}; // TODO: const?? -> use string? char const* cache_tidy_max_age_str{nullptr}; // TODO: const?? -> use string? @@ -238,6 +239,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = { DWARFS_OPT("mlock=%s", mlock_str, 0), DWARFS_OPT("decratio=%s", decompress_ratio_str, 0), DWARFS_OPT("offset=%s", image_offset_str, 0), + DWARFS_OPT("imagesize=%s", image_size_str, 0), DWARFS_OPT("tidy_strategy=%s", cache_tidy_strategy_str, 0), DWARFS_OPT("tidy_interval=%s", cache_tidy_interval_str, 0), DWARFS_OPT("tidy_max_age=%s", cache_tidy_max_age_str, 0), @@ -1203,6 +1205,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) { << " -o mlock=NAME mlock mode: (none), try, must\n" << " -o decratio=NUM ratio for full decompression (0.8)\n" << " -o offset=NUM|auto filesystem image offset in bytes (0)\n" + << " -o imagesize=NUM filesystem image size in bytes\n" << " -o enable_nlink show correct hardlink numbers\n" << " -o readonly show read-only file system\n" << " -o (no_)cache_image (don't) keep image in kernel cache\n" @@ -1451,6 +1454,10 @@ void load_filesystem(dwarfs_userdata& userdata) { fsopts.image_offset = reader::parse_image_offset(opts.image_offset_str); } + if (opts.image_size_str) { + fsopts.image_size = to(opts.image_size_str); + } + std::unordered_set perfmon_enabled; std::optional perfmon_trace_file; #if DWARFS_PERFMON_ENABLED