Skip to content

Commit

Permalink
feat: allow setting image size in FUSE driver (fixes gh #239)
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Oct 8, 2024
1 parent 7afd0af commit 3bb80bb
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 24 deletions.
5 changes: 5 additions & 0 deletions doc/dwarfs.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ options:
This is only useful for images that have some header located
before the actual filesystem data.

- `-o imagesize=`*value*:
Explicitly set the size of the filesystem image in bytes,
starting from the offset. This can be used in cases where
the image is embedded in a larger file.

- `-o mlock=none`|`try`|`must`:
Set this to `try` or `must` instead of the default `none` to
try or require `mlock()`ing of the file system metadata into
Expand Down
3 changes: 3 additions & 0 deletions include/dwarfs/reader/filesystem_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#pragma once

#include <limits>

#include <dwarfs/reader/block_cache_options.h>
#include <dwarfs/reader/inode_reader_options.h>
#include <dwarfs/reader/metadata_options.h>
Expand All @@ -34,6 +36,7 @@ struct filesystem_options {

mlock_mode lock_mode{mlock_mode::NONE};
file_off_t image_offset{0};
file_off_t image_size{std::numeric_limits<file_off_t>::max()};
block_cache_options block_cache{};
metadata_options metadata{};
inode_reader_options inode_reader{};
Expand Down
6 changes: 4 additions & 2 deletions include/dwarfs/reader/internal/filesystem_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ class filesystem_parser {
public:
static file_off_t find_image_offset(mmif& mm, file_off_t image_offset);

explicit filesystem_parser(std::shared_ptr<mmif> mm,
file_off_t image_offset = 0);
explicit filesystem_parser(
std::shared_ptr<mmif> mm, file_off_t image_offset = 0,
file_off_t image_size = std::numeric_limits<file_off_t>::max());

std::optional<dwarfs::internal::fs_section> next_section();

Expand Down Expand Up @@ -74,6 +75,7 @@ class filesystem_parser {

std::shared_ptr<mmif> mm_;
file_off_t const image_offset_{0};
file_off_t const image_size_{std::numeric_limits<file_off_t>::max()};
file_off_t offset_{0};
int version_{0};
uint8_t major_{0};
Expand Down
21 changes: 12 additions & 9 deletions src/reader/filesystem_v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,8 @@ class filesystem_ final : public filesystem_v2::impl {
return meta_.get_all_gids();
}
std::shared_ptr<filesystem_parser> get_parser() const override {
return std::make_unique<filesystem_parser>(mm_, image_offset_);
return std::make_unique<filesystem_parser>(mm_, image_offset_,
options_.image_size);
}
std::optional<std::string>
get_block_category(size_t block_no) const override {
Expand Down Expand Up @@ -341,6 +342,7 @@ class filesystem_ final : public filesystem_v2::impl {
mutable std::unique_ptr<filesystem_info const> fsinfo_;
history history_;
file_off_t const image_offset_;
filesystem_options const options_;
PERFMON_CLS_PROXY_DECL
PERFMON_CLS_TIMER_DECL(find_path)
PERFMON_CLS_TIMER_DECL(find_inode)
Expand Down Expand Up @@ -380,7 +382,7 @@ filesystem_<LoggerPolicy>::get_info(fsinfo_options const& opts) const {
std::lock_guard lock(mx_);

if (!fsinfo_ || opts.block_access > fsinfo_block_access_level_) {
filesystem_parser parser(mm_, image_offset_);
filesystem_parser parser(mm_, image_offset_, options_.image_size);
filesystem_info info;

parser.rewind();
Expand Down Expand Up @@ -429,14 +431,15 @@ filesystem_<LoggerPolicy>::get_info(fsinfo_options const& opts) const {
template <typename LoggerPolicy>
filesystem_<LoggerPolicy>::filesystem_(
logger& lgr, os_access const& os, std::shared_ptr<mmif> mm,
const filesystem_options& options,
filesystem_options const& options,
std::shared_ptr<performance_monitor const> perfmon)
: LOG_PROXY_INIT(lgr)
, os_{os}
, mm_{std::move(mm)}
, history_({.with_timestamps = true})
, image_offset_{filesystem_parser::find_image_offset(
*mm_, options.image_offset)} // clang-format off
, image_offset_{filesystem_parser::find_image_offset(*mm_,
options.image_offset)}
, options_{options} // clang-format off
PERFMON_CLS_PROXY_INIT(perfmon, "filesystem_v2")
PERFMON_CLS_TIMER_INIT(find_path)
PERFMON_CLS_TIMER_INIT(find_inode)
Expand Down Expand Up @@ -465,7 +468,7 @@ filesystem_<LoggerPolicy>::filesystem_(
PERFMON_CLS_TIMER_INIT(readv_future_ec) // clang-format on
{
block_cache cache(lgr, os_, mm_, options.block_cache, perfmon);
filesystem_parser parser(mm_, image_offset_);
filesystem_parser parser(mm_, image_offset_, options.image_size);

if (parser.has_index()) {
LOG_DEBUG << "found valid section index";
Expand Down Expand Up @@ -531,7 +534,7 @@ filesystem_<LoggerPolicy>::filesystem_(
template <typename LoggerPolicy>
int filesystem_<LoggerPolicy>::check(filesystem_check_level level,
size_t num_threads) const {
filesystem_parser parser(mm_, image_offset_);
filesystem_parser parser(mm_, image_offset_, options_.image_size);

worker_group wg(LOG_GET_LOGGER, os_, "fscheck", num_threads);
std::vector<std::future<fs_section>> sections;
Expand Down Expand Up @@ -593,7 +596,7 @@ int filesystem_<LoggerPolicy>::check(filesystem_check_level level,
template <typename LoggerPolicy>
void filesystem_<LoggerPolicy>::dump(std::ostream& os,
fsinfo_options const& opts) const {
filesystem_parser parser(mm_, image_offset_);
filesystem_parser parser(mm_, image_offset_, options_.image_size);

if (opts.features.has(fsinfo_feature::version)) {
os << "DwarFS version " << parser.version();
Expand Down Expand Up @@ -662,7 +665,7 @@ std::string filesystem_<LoggerPolicy>::dump(fsinfo_options const& opts) const {
template <typename LoggerPolicy>
nlohmann::json
filesystem_<LoggerPolicy>::info_as_json(fsinfo_options const& opts) const {
filesystem_parser parser(mm_, image_offset_);
filesystem_parser parser(mm_, image_offset_, options_.image_size);

auto info = nlohmann::json::object();

Expand Down
22 changes: 14 additions & 8 deletions src/reader/internal/filesystem_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,13 @@ filesystem_parser::find_image_offset(mmif& mm, file_off_t image_offset) {
}

filesystem_parser::filesystem_parser(std::shared_ptr<mmif> mm,
file_off_t image_offset)
file_off_t image_offset,
file_off_t image_size)
: mm_{std::move(mm)}
, image_offset_{find_image_offset(*mm_, image_offset)} {
if (mm_->size() < image_offset_ + sizeof(file_header)) {
, image_offset_{find_image_offset(*mm_, image_offset)}
, image_size_{
std::min<file_off_t>(image_size, mm_->size() - image_offset_)} {
if (image_size_ < static_cast<file_off_t>(sizeof(file_header))) {
DWARFS_THROW(runtime_error, "file too small");
}

Expand Down Expand Up @@ -143,7 +146,7 @@ filesystem_parser::filesystem_parser(std::shared_ptr<mmif> mm,

std::optional<fs_section> filesystem_parser::next_section() {
if (index_.empty()) {
if (offset_ < static_cast<file_off_t>(mm_->size())) {
if (offset_ < image_offset_ + image_size_) {
auto section = fs_section(*mm_, offset_, version_);
offset_ = section.end();
return section;
Expand All @@ -154,7 +157,7 @@ std::optional<fs_section> filesystem_parser::next_section() {
uint64_t offset = id & section_offset_mask;
uint64_t next_offset = offset_ < static_cast<file_off_t>(index_.size())
? index_[offset_] & section_offset_mask
: mm_->size() - image_offset_;
: image_size_;
return fs_section(mm_, static_cast<section_type>(id >> 48),
image_offset_ + offset, next_offset - offset, version_);
}
Expand Down Expand Up @@ -189,7 +192,9 @@ bool filesystem_parser::has_checksums() const { return version_ >= 2; }

bool filesystem_parser::has_index() const { return !index_.empty(); }

size_t filesystem_parser::filesystem_size() const { return mm_->size(); }
size_t filesystem_parser::filesystem_size() const {
return image_offset_ + image_size_;
}

std::span<uint8_t const>
filesystem_parser::section_data(fs_section const& s) const {
Expand All @@ -199,14 +204,15 @@ filesystem_parser::section_data(fs_section const& s) const {
void filesystem_parser::find_index() {
uint64_t index_pos;

::memcpy(&index_pos, mm_->as<void>(mm_->size() - sizeof(uint64_t)),
::memcpy(&index_pos,
mm_->as<void>(image_offset_ + image_size_ - sizeof(uint64_t)),
sizeof(uint64_t));

if ((index_pos >> 48) == static_cast<uint16_t>(section_type::SECTION_INDEX)) {
index_pos &= section_offset_mask;
index_pos += image_offset_;

if (index_pos < mm_->size()) {
if (index_pos < static_cast<uint64_t>(image_offset_ + image_size_)) {
auto section = fs_section(*mm_, index_pos, version_);

if (section.check_fast(*mm_)) {
Expand Down
83 changes: 78 additions & 5 deletions test/dwarfs_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include <dwarfs/vfs_stat.h>
#include <dwarfs/writer/entry_factory.h>
#include <dwarfs/writer/filesystem_writer.h>
#include <dwarfs/writer/filesystem_writer_options.h>
#include <dwarfs/writer/filter_debug.h>
#include <dwarfs/writer/fragment_order_options.h>
#include <dwarfs/writer/rule_based_entry_filter.h>
Expand Down Expand Up @@ -81,6 +82,8 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
std::string const& compression,
writer::segmenter::config const& cfg = writer::segmenter::config(),
writer::scanner_options const& options = writer::scanner_options(),
writer::filesystem_writer_options const& writer_opts =
writer::filesystem_writer_options(),
writer::writer_progress* prog = nullptr,
std::shared_ptr<test::filter_transformer_data> ftd = nullptr,
std::optional<std::span<std::filesystem::path const>> input_list =
Expand Down Expand Up @@ -120,7 +123,7 @@ build_dwarfs(logger& lgr, std::shared_ptr<test::os_access_mock> input,
std::ostringstream oss;

block_compressor bc(compression);
writer::filesystem_writer fsw(oss, lgr, pool, *prog);
writer::filesystem_writer fsw(oss, lgr, pool, *prog, writer_opts);
fsw.add_default_compressor(bc);

s.scan(fsw, std::filesystem::path("/"), *prog, input_list);
Expand Down Expand Up @@ -187,7 +190,7 @@ void basic_end_to_end_test(
auto ftd = std::make_shared<test::filter_transformer_data>();

auto fsimage =
build_dwarfs(lgr, input, compressor, cfg, options, &wprog, ftd);
build_dwarfs(lgr, input, compressor, cfg, options, {}, &wprog, ftd);

EXPECT_EQ(14, ftd->filter_calls.size());
EXPECT_EQ(15, ftd->transform_calls.size());
Expand Down Expand Up @@ -1046,7 +1049,7 @@ TEST_P(filter_test, filesystem) {
writer::scanner_options options;
options.remove_empty_dirs = true;

auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, nullptr,
auto fsimage = build_dwarfs(lgr, input, "null", cfg, options, {}, nullptr,
nullptr, std::nullopt, std::move(rbf));

auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));
Expand Down Expand Up @@ -1136,8 +1139,8 @@ TEST(file_scanner, input_list) {
"foo.pl",
};

auto fsimage = build_dwarfs(lgr, input, "null", bmcfg, opts, nullptr, nullptr,
input_list);
auto fsimage = build_dwarfs(lgr, input, "null", bmcfg, opts, {}, nullptr,
nullptr, input_list);

auto mm = std::make_shared<test::mmap_mock>(std::move(fsimage));

Expand Down Expand Up @@ -1926,3 +1929,73 @@ TEST(filesystem, inode_size_cache) {
EXPECT_EQ(st.size(), size);
}
}

TEST(filesystem, multi_image) {
test::test_logger lgr;
std::string data("header");
std::vector<std::pair<file_off_t, file_off_t>> images;

for (std::string str : {"foo", "bar", "baz"}) {
auto input = std::make_shared<test::os_access_mock>();
input->add_dir("");
input->add_file(str, str);
auto img = build_dwarfs(lgr, input, "null", {}, {},
{.no_section_index = str == "bar"});
images.emplace_back(data.size(), img.size());
data += img;
data += "filler";
}

auto mm = std::make_shared<test::mmap_mock>(std::move(data));
auto os = std::make_shared<test::os_access_mock>();

std::vector<reader::filesystem_v2> fss;

for (size_t i = 0; i < images.size(); ++i) {
fss.emplace_back(
lgr, *os, mm,
reader::filesystem_options{.image_offset = images[i].first,
.image_size = images[i].second});
}

ASSERT_EQ(3, fss.size());

{
auto& fs = fss[0];
auto foo = fs.find("/foo");
auto bar = fs.find("/bar");
auto baz = fs.find("/baz");

ASSERT_TRUE(foo);
EXPECT_FALSE(bar);
EXPECT_FALSE(baz);

EXPECT_EQ("foo", fs.read_string(fs.open(foo->inode())));
}

{
auto& fs = fss[1];
auto foo = fs.find("/foo");
auto bar = fs.find("/bar");
auto baz = fs.find("/baz");

EXPECT_FALSE(foo);
ASSERT_TRUE(bar);
EXPECT_FALSE(baz);

EXPECT_EQ("bar", fs.read_string(fs.open(bar->inode())));
}

{
auto& fs = fss[2];
auto foo = fs.find("/foo");
auto bar = fs.find("/bar");
auto baz = fs.find("/baz");

EXPECT_FALSE(foo);
EXPECT_FALSE(bar);
ASSERT_TRUE(baz);

EXPECT_EQ("baz", fs.read_string(fs.open(baz->inode())));
}
}
7 changes: 7 additions & 0 deletions tools/src/dwarfs_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ struct options {
char const* mlock_str{nullptr}; // TODO: const?? -> use string?
char const* decompress_ratio_str{nullptr}; // TODO: const?? -> use string?
char const* image_offset_str{nullptr}; // TODO: const?? -> use string?
char const* image_size_str{nullptr}; // TODO: const?? -> use string?
char const* cache_tidy_strategy_str{nullptr}; // TODO: const?? -> use string?
char const* cache_tidy_interval_str{nullptr}; // TODO: const?? -> use string?
char const* cache_tidy_max_age_str{nullptr}; // TODO: const?? -> use string?
Expand Down Expand Up @@ -238,6 +239,7 @@ constexpr struct ::fuse_opt dwarfs_opts[] = {
DWARFS_OPT("mlock=%s", mlock_str, 0),
DWARFS_OPT("decratio=%s", decompress_ratio_str, 0),
DWARFS_OPT("offset=%s", image_offset_str, 0),
DWARFS_OPT("imagesize=%s", image_size_str, 0),
DWARFS_OPT("tidy_strategy=%s", cache_tidy_strategy_str, 0),
DWARFS_OPT("tidy_interval=%s", cache_tidy_interval_str, 0),
DWARFS_OPT("tidy_max_age=%s", cache_tidy_max_age_str, 0),
Expand Down Expand Up @@ -1203,6 +1205,7 @@ void usage(std::ostream& os, std::filesystem::path const& progname) {
<< " -o mlock=NAME mlock mode: (none), try, must\n"
<< " -o decratio=NUM ratio for full decompression (0.8)\n"
<< " -o offset=NUM|auto filesystem image offset in bytes (0)\n"
<< " -o imagesize=NUM filesystem image size in bytes\n"
<< " -o enable_nlink show correct hardlink numbers\n"
<< " -o readonly show read-only file system\n"
<< " -o (no_)cache_image (don't) keep image in kernel cache\n"
Expand Down Expand Up @@ -1451,6 +1454,10 @@ void load_filesystem(dwarfs_userdata& userdata) {
fsopts.image_offset = reader::parse_image_offset(opts.image_offset_str);
}

if (opts.image_size_str) {
fsopts.image_size = to<file_off_t>(opts.image_size_str);
}

std::unordered_set<std::string> perfmon_enabled;
std::optional<std::filesystem::path> perfmon_trace_file;
#if DWARFS_PERFMON_ENABLED
Expand Down

0 comments on commit 3bb80bb

Please sign in to comment.