Skip to content

Commit

Permalink
feat(mkdwarfs): improve selection of categorize defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Dec 20, 2023
1 parent 7363006 commit 8b5355e
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 22 deletions.
10 changes: 2 additions & 8 deletions doc/mkdwarfs.md
Original file line number Diff line number Diff line change
Expand Up @@ -433,14 +433,8 @@ is category-dependent. The options that can be configured per category are

The resulting configuration matrix can be quite overwhelming, which is why
`mkdwarfs` will run with a reasonable set of defaults if you specify the
`--categorize` option with no arguments. These defaults are currently:

--categorize=pcmaudio,incompressible
--compression incompressible::null
--compression pcmaudio/waveform::flac
--order pcmaudio/waveform::revpath
--max-lookback-blocks pcmaudio/waveform::0
--window-size pcmaudio/waveform::0
`--categorize` option with no arguments. These defaults are also dependent
on the chosen compression level.

Note that in case of the `pcmaudio` categorizer, you can override each
option per category (in this case `pcmaudio/waveform`).
Expand Down
2 changes: 2 additions & 0 deletions include/dwarfs/contextual_option.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ class contextual_option_parser {
return oss.str();
}

std::string const& name() const { return name_; }

private:
void add_contextual(typename option_type::context_type const& ctx,
typename option_type::value_type const& val,
Expand Down
92 changes: 78 additions & 14 deletions src/mkdwarfs_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,54 @@ constexpr std::array<level_defaults, 10> levels{{
// clang-format on
}};

const std::unordered_map<std::string, std::vector<std::string>>
categorize_defaults_common{
// clang-format off
{"--compression", {"incompressible::null",
"pcmaudio/waveform::flac"}},
// clang-format on
};

const std::unordered_map<std::string, std::vector<std::string>>
categorize_defaults_fast{
// clang-format off
{"--order", {"pcmaudio/waveform::revpath"}},
{"--window-size", {"pcmaudio/waveform::0"}},
// clang-format on
};

const std::unordered_map<std::string, std::vector<std::string>>
categorize_defaults_medium{
// clang-format off
{"--order", {"pcmaudio/waveform::revpath"}},
{"--window-size", {"pcmaudio/waveform::20"}},
// clang-format on
};

const std::unordered_map<std::string, std::vector<std::string>>
categorize_defaults_slow{
// clang-format off
{"--window-size", {"pcmaudio/waveform::16"}},
// clang-format on
};

constexpr std::array<
std::unordered_map<std::string, std::vector<std::string>> const*, 10>
categorize_defaults_level{{
// clang-format off
/* 0 */ &categorize_defaults_fast,
/* 1 */ &categorize_defaults_fast,
/* 2 */ &categorize_defaults_fast,
/* 3 */ &categorize_defaults_fast,
/* 4 */ &categorize_defaults_fast,
/* 5 */ &categorize_defaults_medium,
/* 6 */ &categorize_defaults_medium,
/* 7 */ &categorize_defaults_medium,
/* 8 */ &categorize_defaults_slow,
/* 9 */ &categorize_defaults_slow,
// clang-format on
}};

constexpr unsigned default_level = 7;

class categorize_optval {
Expand All @@ -243,7 +291,27 @@ class categorize_optval {
: value{val}
, is_explicit{expl} {}

bool add_implicit_defaults() const { return !value.empty() && !is_explicit; }
bool is_implicit_default() const { return !value.empty() && !is_explicit; }

template <typename T>
void add_implicit_defaults(T& cop) const {
if (is_implicit_default()) {
if (auto it = defaults_.find(cop.name()); it != defaults_.end()) {
for (auto const& value : it->second) {
cop.parse_fallback(value);
}
}
}
}

void
add_defaults(std::unordered_map<std::string, std::vector<std::string>> const&
defaults) {
defaults_.insert(defaults.begin(), defaults.end());
}

private:
std::unordered_map<std::string, std::vector<std::string>> defaults_;
};

std::ostream& operator<<(std::ostream& os, categorize_optval const& optval) {
Expand Down Expand Up @@ -622,6 +690,9 @@ int mkdwarfs_main(int argc, sys_char** argv) {

auto const& defaults = levels[level];

categorizer_list.add_defaults(categorize_defaults_common);
categorizer_list.add_defaults(*categorize_defaults_level[level]);

if (!vm.count("block-size-bits")) {
sf_config.block_size_bits = defaults.block_size_bits;
}
Expand Down Expand Up @@ -1036,9 +1107,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
order_parser);
cop.parse(defaults.order);
cop.parse(order);
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("pcmaudio/waveform::revpath");
}
categorizer_list.add_implicit_defaults(cop);
LOG_VERBOSE << cop.as_string();
}

Expand All @@ -1048,9 +1117,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
max_lookback_parser);
sf_config.max_active_blocks.set_default(1);
cop.parse(max_lookback_blocks);
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("pcmaudio/waveform::0");
}
categorizer_list.add_implicit_defaults(cop);
LOG_VERBOSE << cop.as_string();
}

Expand All @@ -1060,9 +1127,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
window_size_parser);
sf_config.blockhash_window_size.set_default(defaults.window_size);
cop.parse(window_size);
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("pcmaudio/waveform::0");
}
categorizer_list.add_implicit_defaults(cop);
LOG_VERBOSE << cop.as_string();
}

Expand All @@ -1072,6 +1137,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
window_step_parser);
sf_config.window_increment_shift.set_default(defaults.window_step);
cop.parse(window_step);
categorizer_list.add_implicit_defaults(cop);
LOG_VERBOSE << cop.as_string();
}

Expand All @@ -1081,6 +1147,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
bloom_filter_size_parser);
sf_config.bloom_filter_size.set_default(4);
cop.parse(bloom_filter_size);
categorizer_list.add_implicit_defaults(cop);
LOG_VERBOSE << cop.as_string();
}
} catch (std::exception const& e) {
Expand All @@ -1105,10 +1172,7 @@ int mkdwarfs_main(int argc, sys_char** argv) {
compression_opt.set_default(
block_compressor(std::string(defaults.data_compression)));
cop.parse(compression);
if (categorizer_list.add_implicit_defaults()) {
cop.parse_fallback("incompressible::null");
cop.parse_fallback("pcmaudio/waveform::flac");
}
categorizer_list.add_implicit_defaults(cop);
LOG_VERBOSE << cop.as_string();

fsw->add_default_compressor(compression_opt.get());
Expand Down

0 comments on commit 8b5355e

Please sign in to comment.