diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt index 91e4c16500..97ea99aa0a 100644 --- a/cpp/arcticdb/CMakeLists.txt +++ b/cpp/arcticdb/CMakeLists.txt @@ -272,12 +272,21 @@ set(arcticdb_srcs storage/library.hpp storage/library_index.hpp storage/library_manager.hpp + storage/storage_mock_client.hpp + storage/azure/azure_client_wrapper.hpp + storage/azure/azure_mock_client.hpp + storage/azure/azure_real_client.hpp storage/azure/azure_storage.hpp + storage/lmdb/lmdb_client_wrapper.hpp + storage/lmdb/lmdb_mock_client.hpp + storage/lmdb/lmdb_real_client.hpp storage/lmdb/lmdb_storage.hpp storage/memory/memory_storage.hpp storage/memory/memory_storage.cpp storage/mongo/mongo_client.hpp storage/mongo/mongo_instance.hpp + storage/mongo/mongo_client_wrapper.hpp + storage/mongo/mongo_mock_client.hpp storage/mongo/mongo_storage.hpp storage/object_store_utils.hpp storage/file/file_store.hpp @@ -433,10 +442,15 @@ set(arcticdb_srcs storage/failure_simulation.cpp storage/library_manager.cpp storage/azure/azure_storage.cpp + storage/azure/azure_real_client.cpp + storage/azure/azure_mock_client.cpp + storage/lmdb/lmdb_mock_client.cpp + storage/lmdb/lmdb_real_client.cpp storage/lmdb/lmdb_storage.cpp storage/file/mapped_file_storage.cpp storage/mongo/mongo_client.cpp storage/mongo/mongo_instance.cpp + storage/mongo/mongo_mock_client.cpp storage/mongo/mongo_storage.cpp storage/s3/nfs_backed_storage.cpp storage/s3/s3_api.cpp @@ -445,15 +459,6 @@ set(arcticdb_srcs storage/s3/s3_storage.cpp storage/s3/s3_storage_tool.cpp storage/storage_factory.cpp - storage/azure/azure_client_wrapper.hpp - storage/azure/azure_real_client.hpp - storage/azure/azure_real_client.cpp - storage/azure/azure_mock_client.hpp - storage/azure/azure_mock_client.cpp - storage/storage_mock_client.hpp - storage/mongo/mongo_client_wrapper.hpp - storage/mongo/mongo_mock_client.hpp - storage/mongo/mongo_mock_client.cpp stream/aggregator.cpp stream/append_map.cpp stream/piloted_clock.cpp diff --git a/cpp/arcticdb/python/python_module.cpp b/cpp/arcticdb/python/python_module.cpp index 8a89dce921..7596ee1583 100644 --- a/cpp/arcticdb/python/python_module.cpp +++ b/cpp/arcticdb/python/python_module.cpp @@ -207,7 +207,7 @@ void register_error_code_ecosystem(py::module& m, py::exception internal_exception(m, "InternalException", compat_exception.ptr()); static py::exception storage_exception(m, "StorageException", compat_exception.ptr()); - static py::exception<::lmdb::map_full_error> lmdb_map_full_error(m, "LmdbMapFullError", storage_exception.ptr()); + static py::exception lmdb_map_full_exception(m, "LmdbMapFullError", storage_exception.ptr()); static py::exception user_input_exception(m, "UserInputException", compat_exception.ptr()); py::register_exception_translator([](std::exception_ptr p) { @@ -219,13 +219,13 @@ void register_error_code_ecosystem(py::module& m, py::exception +#include +#include +#include + +// LMDB++ is using `std::is_pod` in `lmdb++.h`, which is deprecated as of C++20. +// See: https://github.com/drycpp/lmdbxx/blob/0b43ca87d8cfabba392dfe884eb1edb83874de02/lmdb%2B%2B.h#L1068 +// See: https://en.cppreference.com/w/cpp/types/is_pod +// This suppresses the warning. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#ifdef ARCTICDB_USING_CONDA +#include +#else +#include +#endif +#pragma GCC diagnostic pop + + +namespace arcticdb::storage::lmdb { + +class LmdbClientWrapper { +public: + virtual bool exists( + const std::string& db_name, + std::string& path, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi) const = 0; + + virtual std::optional read( + const std::string& db_name, + std::string& path, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi) const = 0; + + virtual void write( + const std::string& db_name, + std::string& path, + Segment&& segment, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi, + int64_t overwrite_flag) = 0; + + virtual bool remove(const std::string& db_name, std::string& path, ::lmdb::txn& txn, ::lmdb::dbi& dbi) = 0; + + virtual std::vector list( + const std::string& db_name, + const std::string& prefix, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi, + KeyType key_type) const = 0; + + virtual ~LmdbClientWrapper() = default; +}; + +} // namespace arcticdb::storage::lmdb diff --git a/cpp/arcticdb/storage/lmdb/lmdb_mock_client.cpp b/cpp/arcticdb/storage/lmdb/lmdb_mock_client.cpp new file mode 100644 index 0000000000..879089797c --- /dev/null +++ b/cpp/arcticdb/storage/lmdb/lmdb_mock_client.cpp @@ -0,0 +1,135 @@ +/* Copyright 2024 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#include + +#include +#include +#include +#include + + +namespace arcticdb::storage::lmdb { + +std::string MockLmdbClient::get_failure_trigger( + const std::string& path, + StorageOperation operation_to_fail, + int error_code) { + return fmt::format("{}#Failure_{}_{}", path, operation_to_string(operation_to_fail), error_code); +} + +std::string_view lmdb_operation_string(StorageOperation operation) { + switch (operation) { + case StorageOperation::READ: + return "mdb_get"; + case StorageOperation::WRITE: + return "mdb_put"; + case StorageOperation::DELETE: + return "mdb_del"; + case StorageOperation::LIST: + return "mdb_cursor_get"; + case StorageOperation::EXISTS: + return "mdb_get"; + default: + return "unknown"; + } +} + +void raise_if_has_failure_trigger(const LmdbKey& key, StorageOperation operation) { + auto path = key.path_; + auto failure_string_for_operation = "#Failure_" + operation_to_string(operation) + "_"; + auto position = path.rfind(failure_string_for_operation); + + if (position == std::string::npos) { + return; + } + + int error_code = 0; + try { + auto start = position + failure_string_for_operation.size(); + error_code = stoi(path.substr(start)); + auto error_message = fmt::format("Simulated Error, message: operation {}, error code {}", + operation_to_string(operation), error_code); + } catch (std::exception&) { + return; + } + + if (error_code != 0) { + ::lmdb::error::raise(lmdb_operation_string(operation).data(), error_code); + } +} + +void raise_key_exists_error(std::string_view lmdb_op) { + ::lmdb::error::raise(lmdb_op.data(), MDB_KEYEXIST); +} + +bool MockLmdbClient::has_key(const LmdbKey& key) const { + return lmdb_contents_.find(key) != lmdb_contents_.end(); +} + +bool MockLmdbClient::exists(const std::string& db_name, std::string& path, ::lmdb::txn&, ::lmdb::dbi&) const { + LmdbKey key = {db_name, path}; + raise_if_has_failure_trigger(key, StorageOperation::EXISTS); + + return has_key(key); +} + +std::optional MockLmdbClient::read(const std::string& db_name, std::string& path, ::lmdb::txn&, ::lmdb::dbi&) const { + LmdbKey key = {db_name, path}; + raise_if_has_failure_trigger(key, StorageOperation::READ); + + if (!has_key(key)) { + return std::nullopt; + } + + return lmdb_contents_.at(key); +} + +void MockLmdbClient::write(const std::string& db_name, std::string& path, arcticdb::Segment&& segment, + ::lmdb::txn&, ::lmdb::dbi&, int64_t) { + LmdbKey key = {db_name, path}; + raise_if_has_failure_trigger(key, StorageOperation::WRITE); + + if(has_key(key)) { + raise_key_exists_error(lmdb_operation_string(StorageOperation::WRITE)); + } else { + lmdb_contents_.insert({key, segment}); + } +} + +bool MockLmdbClient::remove(const std::string& db_name, std::string& path, ::lmdb::txn&, ::lmdb::dbi&) { + LmdbKey key = {db_name, path}; + raise_if_has_failure_trigger(key, StorageOperation::DELETE); + + if (!has_key(key)) { + return false; + } + + lmdb_contents_.erase(key); + return true; +} + +std::vector MockLmdbClient::list(const std::string& db_name, const std::string& prefix, ::lmdb::txn&, + ::lmdb::dbi&, KeyType key_type) const { + std::vector found_keys; + + for (const auto& [key, segment] : lmdb_contents_) { + if (key.db_name_ == db_name && util::string_starts_with(prefix, key.path_)) { + raise_if_has_failure_trigger(key, StorageOperation::LIST); + + auto k = variant_key_from_bytes( + reinterpret_cast(key.path_.data()), + key.path_.size(), + key_type); + found_keys.push_back(k); + } + } + + return found_keys; +} + +} // namespace arcticdb::storage::lmdb diff --git a/cpp/arcticdb/storage/lmdb/lmdb_mock_client.hpp b/cpp/arcticdb/storage/lmdb/lmdb_mock_client.hpp new file mode 100644 index 0000000000..6e81bf6730 --- /dev/null +++ b/cpp/arcticdb/storage/lmdb/lmdb_mock_client.hpp @@ -0,0 +1,78 @@ +/* Copyright 2024 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include + + +namespace arcticdb::storage::lmdb { + +struct LmdbKey { + std::string db_name_; + std::string path_; + + bool operator==(const LmdbKey& other) const { + return std::pair(db_name_, path_) == std::pair(other.db_name_, other.path_); + } +}; + +struct LmdbKeyHash { + std::size_t operator()(const LmdbKey& k) const { + return std::hash>{}(std::pair(k.db_name_, k.path_)); + } +}; + +class MockLmdbClient : public LmdbClientWrapper { +public: + static std::string get_failure_trigger( + const std::string& path, + StorageOperation operation_to_fail, + int error_code); + + bool exists( + const std::string& db_name, + std::string& path, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi) const override; + + std::optional read( + const std::string& db_name, + std::string& path, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi) const override; + + void write( + const std::string& db_name, + std::string& path, + Segment&& segment, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi, + int64_t overwrite_flag) override; + + bool remove(const std::string& db_name, std::string& path, ::lmdb::txn& txn, ::lmdb::dbi& dbi) override; + + std::vector list( + const std::string& db_name, + const std::string& prefix, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi, + KeyType key_type) const override; + +private: + std::unordered_map lmdb_contents_; + + bool has_key(const LmdbKey& key) const; +}; + +} // namespace arcticdb::storage::lmdb diff --git a/cpp/arcticdb/storage/lmdb/lmdb_real_client.cpp b/cpp/arcticdb/storage/lmdb/lmdb_real_client.cpp new file mode 100644 index 0000000000..4eb35fcd92 --- /dev/null +++ b/cpp/arcticdb/storage/lmdb/lmdb_real_client.cpp @@ -0,0 +1,97 @@ +/* Copyright 2024 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#include +#include +#include +#include +#include +#include + + +namespace arcticdb::storage::lmdb { + +bool RealLmdbClient::exists(const std::string&, std::string& path, ::lmdb::txn& txn, ::lmdb::dbi& dbi) const { + if (unsigned int tmp; ::mdb_dbi_flags(txn, dbi, &tmp) == EINVAL) { + return false; + } + + MDB_val mdb_key{path.size(), path.data()}; + MDB_val mdb_val; + + return ::lmdb::dbi_get(txn, dbi.handle(), &mdb_key, &mdb_val); +} + +std::optional RealLmdbClient::read(const std::string&, std::string& path, ::lmdb::txn& txn, ::lmdb::dbi& dbi) const { + MDB_val mdb_key{path.size(), path.data()}; + MDB_val mdb_val; + + ARCTICDB_SUBSAMPLE(LmdbStorageGet, 0) + if(!::lmdb::dbi_get(txn, dbi.handle(), &mdb_key, &mdb_val)) { + return std::nullopt; + } + + auto segment = Segment::from_bytes(reinterpret_cast(mdb_val.mv_data),mdb_val.mv_size); + return segment; +} + +void RealLmdbClient::write(const std::string&, std::string& path, arcticdb::Segment&& seg, + ::lmdb::txn& txn, ::lmdb::dbi& dbi, int64_t overwrite_flag) { + MDB_val mdb_key{path.size(), path.data()}; + + std::size_t hdr_sz = seg.segment_header_bytes_size(); + MDB_val mdb_val; + mdb_val.mv_size = seg.total_segment_size(hdr_sz); + + ARCTICDB_SUBSAMPLE(LmdbPut, 0) + int rc = ::mdb_put(txn.handle(), dbi.handle(), &mdb_key, &mdb_val, MDB_RESERVE | overwrite_flag); + if(rc != MDB_SUCCESS) { + ::lmdb::error::raise("mdb_put", rc); + } + + ARCTICDB_SUBSAMPLE(LmdbMemCpy, 0) + // mdb_val now points to a reserved memory area we must write to + seg.write_to(reinterpret_cast(mdb_val.mv_data), hdr_sz); +} + +bool RealLmdbClient::remove(const std::string&, std::string& path, ::lmdb::txn& txn, ::lmdb::dbi& dbi) { + MDB_val mdb_key{path.size(), path.data()}; + + ARCTICDB_SUBSAMPLE(LmdbStorageDel, 0) + return ::lmdb::dbi_del(txn, dbi.handle(), &mdb_key); +} + +std::vector RealLmdbClient::list(const std::string&, const std::string& prefix, ::lmdb::txn& txn, + ::lmdb::dbi& dbi, KeyType key_type) const { + ARCTICDB_SUBSAMPLE(LmdbStorageOpenCursor, 0) + auto db_cursor = ::lmdb::cursor::open(txn, dbi); + + MDB_val mdb_db_key; + ARCTICDB_SUBSAMPLE(LmdbStorageCursorFirst, 0) + if (!db_cursor.get(&mdb_db_key, nullptr, MDB_cursor_op::MDB_FIRST)) { + return {}; + } + + auto prefix_matcher = stream_id_prefix_matcher(prefix); + std::vector found_keys; + do { + auto k = variant_key_from_bytes( + static_cast(mdb_db_key.mv_data), + mdb_db_key.mv_size, + key_type); + + ARCTICDB_DEBUG(log::storage(), "Iterating key {}: {}", variant_key_type(k), variant_key_view(k)); + if (prefix_matcher(variant_key_id(k))) { + found_keys.push_back(k); + } + ARCTICDB_SUBSAMPLE(LmdbStorageCursorNext, 0) + } while (db_cursor.get(&mdb_db_key, nullptr, MDB_cursor_op::MDB_NEXT)); + + return found_keys; +} + +} // namespace arcticdb::storage::lmdb diff --git a/cpp/arcticdb/storage/lmdb/lmdb_real_client.hpp b/cpp/arcticdb/storage/lmdb/lmdb_real_client.hpp new file mode 100644 index 0000000000..279ed5f006 --- /dev/null +++ b/cpp/arcticdb/storage/lmdb/lmdb_real_client.hpp @@ -0,0 +1,52 @@ +/* Copyright 2024 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#pragma once + +#include + +#include +#include +#include +#include + + +namespace arcticdb::storage::lmdb { + +class RealLmdbClient : public LmdbClientWrapper { +public: + bool exists( + const std::string& db_name, + std::string& path, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi) const override; + + std::optional read( + const std::string& db_name, + std::string& path, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi) const override; + + void write( + const std::string& db_name, + std::string& path, + Segment&& segment, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi, + int64_t overwrite_flag) override; + + bool remove(const std::string& db_name, std::string& path, ::lmdb::txn& txn, ::lmdb::dbi& dbi) override; + + std::vector list( + const std::string& db_name, + const std::string& prefix, + ::lmdb::txn& txn, + ::lmdb::dbi& dbi, + KeyType key_type) const override; +}; + +} // namespace arcticdb::storage::lmdb diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp index 1f66d1c620..cfb6eb6c09 100644 --- a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp +++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp @@ -6,6 +6,8 @@ */ #include +#include +#include #include @@ -29,6 +31,24 @@ namespace arcticdb::storage::lmdb { namespace fg = folly::gen; +void raise_lmdb_exception(const ::lmdb::error& e) { + auto error_code = e.code(); + + if (error_code == MDB_NOTFOUND) { + throw KeyNotFoundException(fmt::format("Key Not Found Error: LMDBError#{}: {}", error_code, e.what())); + } + + if (error_code == MDB_KEYEXIST) { + throw DuplicateKeyException(fmt::format("Duplicate Key Error: LMDBError#{}: {}", error_code, e.what())); + } + + if (error_code == MDB_MAP_FULL) { + throw LMDBMapFullException(fmt::format("Map Full Error: LMDBError#{}: {}", error_code, e.what())); + } + + raise(fmt::format("Unexpected LMDB Error: LMDBError#{}: {}", error_code, e.what())); +} + void LmdbStorage::do_write_internal(Composite&& kvs, ::lmdb::txn& txn) { auto fmt_db = [](auto &&kv) { return kv.key_type(); }; @@ -43,28 +63,14 @@ void LmdbStorage::do_write_internal(Composite&& kvs, ::lmdb::txn ARCTICDB_DEBUG(log::storage(), "Lmdb storage writing segment with key {}", kv.key_view()); auto k = to_serialized_key(kv.variant_key()); auto &seg = kv.segment(); - MDB_val mdb_key; - mdb_key.mv_data = k.data(); - mdb_key.mv_size = k.size(); - - std::size_t hdr_sz = seg.segment_header_bytes_size(); - - MDB_val mdb_val; - mdb_val.mv_size = seg.total_segment_size(hdr_sz); int64_t overwrite_flag = std::holds_alternative(kv.variant_key()) ? 0 : MDB_NOOVERWRITE; - ARCTICDB_SUBSAMPLE(LmdbPut, 0) - int res = ::mdb_put(txn.handle(), dbi.handle(), &mdb_key, &mdb_val, MDB_RESERVE | overwrite_flag); - if (res == MDB_KEYEXIST) { + try { + lmdb_client_->write(db_name, k, std::move(seg), txn, dbi, overwrite_flag); + } catch (const ::lmdb::key_exist_error&) { throw DuplicateKeyException(kv.variant_key()); - } else if (res == MDB_MAP_FULL) { - throw ::lmdb::map_full_error("mdb_put", res); - } else if (res != 0) { - throw std::runtime_error(fmt::format("Invalid lmdb error code {} while putting key {}", - res, kv.key_view())); + } catch (const ::lmdb::error& ex) { + raise_lmdb_exception(ex); } - ARCTICDB_SUBSAMPLE(LmdbMemCpy, 0) - // mdb_val now points to a reserved memory area we must write to - seg.write_to(reinterpret_cast(mdb_val.mv_data), hdr_sz); } }); } @@ -114,22 +120,25 @@ void LmdbStorage::do_read(Composite&& ks, const ReadVisitor& visitor ::lmdb::dbi& dbi = dbi_by_key_type_.at(db_name); for (auto &k : group.values()) { auto stored_key = to_serialized_key(k); - MDB_val mdb_key{stored_key.size(), stored_key.data()}; - MDB_val mdb_val; - ARCTICDB_SUBSAMPLE(LmdbStorageGet, 0) - - if (::lmdb::dbi_get(*txn, dbi.handle(), &mdb_key, &mdb_val)) { - ARCTICDB_SUBSAMPLE(LmdbStorageVisitSegment, 0) - auto segment = Segment::from_bytes(reinterpret_cast(mdb_val.mv_data), - mdb_val.mv_size); - std::any keepalive; - segment.set_keepalive(std::any(std::move(txn))); - visitor(k, std::move(segment)); - - ARCTICDB_DEBUG(log::storage(), "Read key {}: {}, with {} bytes of data", variant_key_type(k), variant_key_view(k), mdb_val.mv_size); - } else { - ARCTICDB_DEBUG(log::storage(), "Failed to find segment for key {}",variant_key_view(k)); + try { + auto segment = lmdb_client_->read(db_name, stored_key, *txn, dbi); + + if (segment.has_value()) { + ARCTICDB_SUBSAMPLE(LmdbStorageVisitSegment, 0) + std::any keepalive; + segment.value().set_keepalive(std::any(std::move(txn))); + visitor(k, std::move(segment.value())); + ARCTICDB_DEBUG(log::storage(), "Read key {}: {}, with {} bytes of data", variant_key_type(k), + variant_key_view(k), segment.value().total_segment_size()); + } else { + ARCTICDB_DEBUG(log::storage(), "Failed to find segment for key {}", variant_key_view(k)); + failed_reads.push_back(k); + } + } catch (const ::lmdb::not_found_error&) { + ARCTICDB_DEBUG(log::storage(), "Failed to find segment for key {}", variant_key_view(k)); failed_reads.push_back(k); + } catch (const ::lmdb::error& ex) { + raise_lmdb_exception(ex); } } }); @@ -147,17 +156,14 @@ bool LmdbStorage::do_key_exists(const VariantKey&key) { try { ::lmdb::dbi& dbi = dbi_by_key_type_.at(db_name); - if (unsigned int tmp; ::mdb_dbi_flags(txn, dbi, &tmp) == EINVAL) { - return false; - } auto stored_key = to_serialized_key(key); - MDB_val mdb_key{stored_key.size(), stored_key.data()}; - MDB_val mdb_val; - return ::lmdb::dbi_get(txn, dbi.handle(), &mdb_key, &mdb_val); + return lmdb_client_->exists(db_name, stored_key, txn, dbi); } catch ([[maybe_unused]] const ::lmdb::not_found_error &ex) { ARCTICDB_DEBUG(log::storage(), "Caught lmdb not found error: {}", ex.what()); - return false; + } catch (const ::lmdb::error& ex) { + raise_lmdb_exception(ex); } + return false; } std::vector LmdbStorage::do_remove_internal(Composite&& ks, ::lmdb::txn& txn, RemoveOpts opts) @@ -173,10 +179,8 @@ std::vector LmdbStorage::do_remove_internal(Composite&& ::lmdb::dbi& dbi = dbi_by_key_type_.at(db_name); for (auto &k : group.values()) { auto stored_key = to_serialized_key(k); - MDB_val mdb_key{stored_key.size(), stored_key.data()}; - ARCTICDB_SUBSAMPLE(LmdbStorageDel, 0) - if (::lmdb::dbi_del(txn, dbi.handle(), &mdb_key)) { + if (lmdb_client_->remove(db_name, stored_key, txn, dbi)) { ARCTICDB_DEBUG(log::storage(), "Deleted segment for key {}", variant_key_view(k)); } else { if (!opts.ignores_missing_key_) { @@ -185,13 +189,15 @@ std::vector LmdbStorage::do_remove_internal(Composite&& } } } - } catch (const std::exception&) { + } catch (const ::lmdb::not_found_error&) { if (!opts.ignores_missing_key_) { for (auto &k : group.values()) { log::storage().warn("Failed to delete segment for key {}", variant_key_view(k) ); failed_deletes.push_back(k); } } + } catch (const ::lmdb::error& ex) { + raise_lmdb_exception(ex); } }); return failed_deletes; @@ -225,7 +231,11 @@ bool LmdbStorage::do_fast_delete() { ARCTICDB_SUBSAMPLE(LmdbStorageOpenDb, 0) ARCTICDB_DEBUG(log::storage(), "dropping {}", db_name); ::lmdb::dbi& dbi = dbi_by_key_type_.at(db_name); - ::lmdb::dbi_drop(dtxn, dbi); + try { + ::lmdb::dbi_drop(dtxn, dbi); + } catch (const ::lmdb::error& ex) { + raise_lmdb_exception(ex); + } }); dtxn.commit(); @@ -238,28 +248,15 @@ void LmdbStorage::do_iterate_type(KeyType key_type, const IterateTypeVisitor& vi std::string type_db = fmt::format("{}", key_type); ::lmdb::dbi& dbi = dbi_by_key_type_.at(type_db); - ARCTICDB_SUBSAMPLE(LmdbStorageOpenCursor, 0) - auto db_cursor = ::lmdb::cursor::open(txn, dbi); - - MDB_val mdb_db_key; - ARCTICDB_SUBSAMPLE(LmdbStorageCursorFirst, 0) - if (!db_cursor.get(&mdb_db_key, nullptr, MDB_cursor_op::MDB_FIRST)) { - return; - } - auto prefix_matcher = stream_id_prefix_matcher(prefix); - do { - auto k = variant_key_from_bytes( - static_cast(mdb_db_key.mv_data), - mdb_db_key.mv_size, - key_type); - - ARCTICDB_DEBUG(log::storage(), "Iterating key {}: {}", variant_key_type(k), variant_key_view(k)); - if (prefix_matcher(variant_key_id(k))) { + try { + auto keys = lmdb_client_->list(type_db, prefix, txn, dbi, key_type); + for (auto &k: keys) { ARCTICDB_SUBSAMPLE(LmdbStorageVisitKey, 0) visitor(std::move(k)); } - ARCTICDB_SUBSAMPLE(LmdbStorageCursorNext, 0) - } while (db_cursor.get(&mdb_db_key, nullptr, MDB_cursor_op::MDB_NEXT)); + } catch (const ::lmdb::error& ex) { + raise_lmdb_exception(ex); + } } @@ -272,7 +269,12 @@ T or_else(T val, T or_else_val, T def = T()) { LmdbStorage::LmdbStorage(const LibraryPath &library_path, OpenMode mode, const Config &conf) : Storage(library_path, mode) { - + if (conf.use_mock_storage_for_testing()) { + lmdb_client_ = std::make_unique(); + } + else { + lmdb_client_ = std::make_unique(); + } write_mutex_ = std::make_unique(); env_ = std::make_unique<::lmdb::env>(::lmdb::env::create(conf.flags())); dbi_by_key_type_ = std::unordered_map{}; @@ -319,11 +321,16 @@ LmdbStorage::LmdbStorage(const LibraryPath &library_path, OpenMode mode, const C auto txn = ::lmdb::txn::begin(env()); - arcticdb::entity::foreach_key_type([&txn, this](KeyType&& key_type) { - std::string db_name = fmt::format("{}", key_type); - ::lmdb::dbi dbi = ::lmdb::dbi::open(txn, db_name.data(), MDB_CREATE); - dbi_by_key_type_.insert(std::make_pair(std::move(db_name), std::move(dbi))); - }); + try { + + arcticdb::entity::foreach_key_type([&txn, this](KeyType &&key_type) { + std::string db_name = fmt::format("{}", key_type); + ::lmdb::dbi dbi = ::lmdb::dbi::open(txn, db_name.data(), MDB_CREATE); + dbi_by_key_type_.insert(std::make_pair(std::move(db_name), std::move(dbi))); + }); + } catch (const ::lmdb::error& ex) { + raise_lmdb_exception(ex); + } txn.commit(); @@ -353,6 +360,7 @@ LmdbStorage::LmdbStorage(LmdbStorage&& other) noexcept dbi_by_key_type_(std::move(other.dbi_by_key_type_)), lib_dir_(std::move(other.lib_dir_)) { other.lib_dir_ = ""; + lmdb_client_ = std::move(other.lmdb_client_); } LmdbStorage::~LmdbStorage() { diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp index 845cb15287..8d251188f6 100644 --- a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp +++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp @@ -14,19 +14,7 @@ #include #include - -// LMDB++ is using `std::is_pod` in `lmdb++.h`, which is deprecated as of C++20. -// See: https://github.com/drycpp/lmdbxx/blob/0b43ca87d8cfabba392dfe884eb1edb83874de02/lmdb%2B%2B.h#L1068 -// See: https://en.cppreference.com/w/cpp/types/is_pod -// This suppresses the warning. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#ifdef ARCTICDB_USING_CONDA - #include -#else - #include -#endif -#pragma GCC diagnostic pop +#include #include @@ -78,6 +66,8 @@ class LmdbStorage final : public Storage { std::filesystem::path lib_dir_; + std::unique_ptr lmdb_client_; + // For log warning only // Number of times an LMDB path has been opened. See also reinit_lmdb_warning. // Opening an LMDB env over the same path twice in the same process is unsafe, so we warn the user about it. diff --git a/cpp/arcticdb/storage/storage.hpp b/cpp/arcticdb/storage/storage.hpp index 515037eb3a..4d1078fe82 100644 --- a/cpp/arcticdb/storage/storage.hpp +++ b/cpp/arcticdb/storage/storage.hpp @@ -17,6 +17,9 @@ using ReadVisitor = std::function; class DuplicateKeyException : public ArcticSpecificException { public: + explicit DuplicateKeyException(std::string message) : + ArcticSpecificException(message) { } + explicit DuplicateKeyException(VariantKey key) : ArcticSpecificException(std::string(variant_key_view(key))), key_(std::move(key)) {} diff --git a/cpp/arcticdb/storage/test/test_storage_exceptions.cpp b/cpp/arcticdb/storage/test/test_storage_exceptions.cpp index 8226046896..fe0618b0bf 100644 --- a/cpp/arcticdb/storage/test/test_storage_exceptions.cpp +++ b/cpp/arcticdb/storage/test/test_storage_exceptions.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #ifdef ARCTICDB_INCLUDE_ROCKSDB #include @@ -43,11 +44,11 @@ class StorageFactory { class LMDBStorageFactory : public StorageFactory { private: uint64_t map_size; - + bool use_mock; public: - LMDBStorageFactory() : map_size(128ULL * (1ULL << 20) /* 128MB */) { } + explicit LMDBStorageFactory(bool use_mock = false) : map_size(128ULL * (1ULL << 20) /* 128MB */), use_mock(use_mock) { } - explicit LMDBStorageFactory(uint64_t map_size) : map_size(map_size) { } + explicit LMDBStorageFactory(uint64_t map_size, bool use_mock = false) : map_size(map_size), use_mock(use_mock) { } std::unique_ptr create() override { arcticdb::proto::lmdb_storage::Config cfg; @@ -56,6 +57,7 @@ class LMDBStorageFactory : public StorageFactory { cfg.set_path((TEST_DATABASES_PATH / db_name).generic_string()); cfg.set_map_size(map_size); cfg.set_recreate_if_exists(true); + cfg.set_use_mock_storage_for_testing(use_mock); arcticdb::storage::LibraryPath library_path{"a", "b"}; @@ -203,6 +205,7 @@ INSTANTIATE_TEST_SUITE_P( GenericStorageTest, ::testing::Values( std::make_shared(), + std::make_shared(true), std::make_shared() ) ); @@ -238,7 +241,7 @@ class LMDBStorageTestBase : public ::testing::Test { TEST_F(LMDBStorageTestBase, WriteMapFullError) { // Create a Storage with 32KB map size - LMDBStorageFactory factory(32ULL * (1ULL << 10)); + LMDBStorageFactory factory(32ULL * (1ULL << 10), false); auto storage = factory.create(); arcticdb::entity::AtomKey k = arcticdb::entity::atom_key_builder().gen_id(0).build("sym"); @@ -248,8 +251,64 @@ TEST_F(LMDBStorageTestBase, WriteMapFullError) { ASSERT_THROW({ storage->write(std::move(kv)); - }, ::lmdb::map_full_error); + }, LMDBMapFullException); + +} + +TEST_F(LMDBStorageTestBase, MockMapFullError) { + LMDBStorageFactory factory( true); + auto storage = factory.create(); + + std::string failureSymbol = storage::lmdb::MockLmdbClient::get_failure_trigger("sym", StorageOperation::WRITE, MDB_MAP_FULL); + + ASSERT_THROW({ + write_in_store(*storage, failureSymbol); + }, LMDBMapFullException); + + write_in_store(*storage, "sym1"); +} + +TEST_F(LMDBStorageTestBase, MockUnexpectedLMDBErrorException) { + LMDBStorageFactory factory( true); + auto storage = factory.create(); + + write_in_store(*storage, "sym1"); + write_in_store(*storage, "sym2"); + + std::set symbols = {"sym1", "sym2"}; + ASSERT_EQ(list_in_store(*storage), symbols); + + std::string failureSymbol = storage::lmdb::MockLmdbClient::get_failure_trigger("sym3", StorageOperation::WRITE, MDB_INVALID); + ASSERT_THROW({ + write_in_store(*storage, failureSymbol); + }, UnexpectedLMDBErrorException); + + failureSymbol = storage::lmdb::MockLmdbClient::get_failure_trigger("symx", StorageOperation::READ, MDB_CORRUPTED); + ASSERT_THROW({ + read_in_store(*storage, failureSymbol); + }, UnexpectedLMDBErrorException); + + failureSymbol = storage::lmdb::MockLmdbClient::get_failure_trigger("sym1", StorageOperation::EXISTS, MDB_PAGE_NOTFOUND); + ASSERT_THROW({ + exists_in_store(*storage, failureSymbol); + }, UnexpectedLMDBErrorException); + + failureSymbol = storage::lmdb::MockLmdbClient::get_failure_trigger("sym1", StorageOperation::DELETE, MDB_PANIC); + ASSERT_THROW({ + remove_in_store(*storage, {failureSymbol}); + }, UnexpectedLMDBErrorException); + + ASSERT_EQ(list_in_store(*storage), symbols); + + failureSymbol = storage::lmdb::MockLmdbClient::get_failure_trigger("sym3", StorageOperation::LIST, MDB_CURSOR_FULL); + write_in_store(*storage, failureSymbol); + + ASSERT_THROW({ + list_in_store(*storage); + }, UnexpectedLMDBErrorException); + remove_in_store(*storage, {failureSymbol}); + ASSERT_EQ(list_in_store(*storage), symbols); } // S3 error handling with mock client diff --git a/cpp/arcticdb/util/error_code.hpp b/cpp/arcticdb/util/error_code.hpp index c8c0bb30e0..921f649ce6 100644 --- a/cpp/arcticdb/util/error_code.hpp +++ b/cpp/arcticdb/util/error_code.hpp @@ -78,14 +78,15 @@ inline std::unordered_map get_error_category_names() ERROR_CODE(5000, E_KEY_NOT_FOUND) \ ERROR_CODE(5001, E_DUPLICATE_KEY) \ ERROR_CODE(5002, E_SYMBOL_NOT_FOUND) \ - ERROR_CODE(5003, E_LMDB_MAP_FULL) \ - ERROR_CODE(5004, E_PERMISSION) \ - ERROR_CODE(5005, E_UNEXPECTED_S3_ERROR) \ - ERROR_CODE(5006, E_S3_RETRYABLE) \ - ERROR_CODE(5007, E_UNEXPECTED_AZURE_ERROR) \ - ERROR_CODE(5008, E_UNEXPECTED_ROCKSDB_ERROR) \ - ERROR_CODE(5009, E_MONGO_BULK_OP_NO_REPLY) \ - ERROR_CODE(5010, E_UNEXPECTED_MONGO_ERROR) \ + ERROR_CODE(5003, E_PERMISSION) \ + ERROR_CODE(5010, E_LMDB_MAP_FULL) \ + ERROR_CODE(5011, E_UNEXPECTED_LMDB_ERROR) \ + ERROR_CODE(5020, E_UNEXPECTED_S3_ERROR) \ + ERROR_CODE(5021, E_S3_RETRYABLE) \ + ERROR_CODE(5030, E_UNEXPECTED_AZURE_ERROR) \ + ERROR_CODE(5040, E_UNEXPECTED_ROCKSDB_ERROR) \ + ERROR_CODE(5050, E_MONGO_BULK_OP_NO_REPLY) \ + ERROR_CODE(5051, E_UNEXPECTED_MONGO_ERROR) \ ERROR_CODE(6000, E_UNSORTED_DATA) \ ERROR_CODE(7000, E_INVALID_USER_ARGUMENT) \ ERROR_CODE(7001, E_INVALID_DECIMAL_STRING) \ @@ -160,6 +161,8 @@ using NoSuchVersionException = ArcticSpecificException; using MissingDataException = ArcticCategorizedException; using PermissionException = ArcticSpecificException; +using LMDBMapFullException = ArcticSpecificException; +using UnexpectedLMDBErrorException = ArcticSpecificException; using UnexpectedS3ErrorException = ArcticSpecificException; using S3RetryableException = ArcticSpecificException; using UnexpectedAzureException = ArcticSpecificException; @@ -182,6 +185,16 @@ template<> throw ArcticSpecificException(msg); } +template<> +[[noreturn]] inline void throw_error(const std::string& msg) { + throw ArcticSpecificException(msg); +} + +template<> +[[noreturn]] inline void throw_error(const std::string& msg) { + throw ArcticSpecificException(msg); +} + template<> [[noreturn]] inline void throw_error(const std::string& msg) { throw ArcticSpecificException(msg); diff --git a/cpp/proto/arcticc/pb2/lmdb_storage.proto b/cpp/proto/arcticc/pb2/lmdb_storage.proto index 2c27c7ead7..3b2a7bd538 100644 --- a/cpp/proto/arcticc/pb2/lmdb_storage.proto +++ b/cpp/proto/arcticc/pb2/lmdb_storage.proto @@ -19,6 +19,7 @@ message Config { uint64 map_size = 3; uint32 max_dbs = 4; uint32 max_readers = 5; + bool use_mock_storage_for_testing = 6; bool recreate_if_exists = 100; // defaults to false, useful for unit test or dev mode } diff --git a/docs/mkdocs/docs/error_messages.md b/docs/mkdocs/docs/error_messages.md index 65c0c82010..4ba610cbb4 100644 --- a/docs/mkdocs/docs/error_messages.md +++ b/docs/mkdocs/docs/error_messages.md @@ -56,14 +56,15 @@ For legacy reasons, the terms `symbol`, `stream`, and `stream ID` are used inter | 5000 | A missing key has been requested. | ArcticDB has requested a key that does not exist in storage. Ensure that you have requested a `symbol`, `snapshot`, `version`, or column statistic that exists. | | 5001 | ArcticDB is attempting to write to an already-existing key in storage. | This error is unexpected - please ensure that no other tools are writing data the same storage location that may conflict with ArcticDB. | | 5002 | The symbol being worked on does not exist. | ArcticDB has requested a key that does not exist in storage. Ensure that the symbol exists. | -| 5003 | The LMDB map is full. | Close and reopen your LMDB backed Arctic instance with a larger map size. For example to open `/tmp/a/b/` with a map size of 5GB, use `adb.Arctic("lmdb:///tmp/a/b?map_size=5GB")`. Also see the [LMDB documentation](http://www.lmdb.tech/doc/group__mdb.html#gaa2506ec8dab3d969b0e609cd82e619e5). | -| 5004 | Don't have permissions to carry out the operation. | Ensure that you have the permissions to perform the requested operation on the given key. | -| 5005 | An unexpected S3 error occurred. e.g. Network error, Service not available, Throttling failure etc. | Varies depending on the type of failure. | -| 5006 | An unexpected S3 error occurred which is retryable. | Varies depending on the type of failure. | -| 5007 | An unexpected Azure error occurred with a given status code and error code. | Varies depending on the type of failure. Read more on [Azure Blob Storage error code docs](https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes). | -| 5008 | An unexpected RocksDB error occurred. | Varies depending on the type of failure. Check [RocksDB statuses](https://github.com/facebook/rocksdb/blob/main/util/status.cc). | -| 5009 | Mongo didn't acknowledge the operation. This means that the mongo apis didn't confirm whether the operation was successful. | Retry running the previous operation. | -| 5010 | An unexpected Mongo error occurred with a given error code. | Varies depending on the type of failure. Check [MongoDB error codes](https://www.mongodb.com/docs/manual/reference/error-codes/). | +| 5003 | Don't have permissions to carry out the operation. | Ensure that you have the permissions to perform the requested operation on the given key. | +| 5010 | The LMDB map is full. | Close and reopen your LMDB backed Arctic instance with a larger map size. For example to open `/tmp/a/b/` with a map size of 5GB, use `adb.Arctic("lmdb:///tmp/a/b?map_size=5GB")`. Also see the [LMDB documentation](http://www.lmdb.tech/doc/group__mdb.html#gaa2506ec8dab3d969b0e609cd82e619e5). | +| 5011 | An unexpected LMDB error occurred. e.g. File corruption, Environment version mismatch, Page type mismatch etc. | Varies depending on the type of failure. Read more on [LMDB: return codes](http://www.lmdb.tech/doc/group__errors.html) | +| 5020 | An unexpected S3 error occurred. e.g. Network error, Service not available, Throttling failure etc. | Varies depending on the type of failure. | +| 5021 | An unexpected S3 error occurred which is retryable. | Varies depending on the type of failure. | +| 5030 | An unexpected Azure error occurred with a given status code and error code. | Varies depending on the type of failure. Read more on [Azure Blob Storage error code docs](https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-error-codes). | +| 5040 | An unexpected RocksDB error occurred. | Varies depending on the type of failure. Check [RocksDB statuses](https://github.com/facebook/rocksdb/blob/main/util/status.cc). | +| 5050 | Mongo didn't acknowledge the operation. This means that the mongo apis didn't confirm whether the operation was successful. | Retry running the previous operation. | +| 5051 | An unexpected Mongo error occurred with a given error code. | Varies depending on the type of failure. Check [MongoDB error codes](https://www.mongodb.com/docs/manual/reference/error-codes/). | ### Sorting Errors