From 1572029e76102c5621ffdfe68059d1906c7670ee Mon Sep 17 00:00:00 2001 From: Muhammad Hamza Sajjad <45758804+muhammadhamzasajjad@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:17:31 +0100 Subject: [PATCH] bugfix/1247: Don't create an lmdb library if the name is invalid. (#1481) #### Reference Issues/PRs Fixes #1247 #### What does this implement or fix? #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--- cpp/arcticdb/async/async_store.hpp | 4 +++ cpp/arcticdb/storage/library.hpp | 4 +++ cpp/arcticdb/storage/library_manager.cpp | 2 +- cpp/arcticdb/storage/lmdb/lmdb_storage.cpp | 17 ++++++++++++ cpp/arcticdb/storage/lmdb/lmdb_storage.hpp | 2 ++ cpp/arcticdb/storage/storage.hpp | 6 +++++ cpp/arcticdb/storage/storages.hpp | 4 +++ cpp/arcticdb/storage/test/in_memory_store.hpp | 4 +++ cpp/arcticdb/stream/stream_sink.hpp | 2 ++ cpp/arcticdb/util/name_validation.cpp | 9 ++++++- cpp/arcticdb/util/name_validation.hpp | 3 ++- .../tests/integration/arcticdb/test_lmdb.py | 26 ++++++++++++++++++- 12 files changed, 79 insertions(+), 4 deletions(-) diff --git a/cpp/arcticdb/async/async_store.hpp b/cpp/arcticdb/async/async_store.hpp index 3c105e1adc..6f2d6c9413 100644 --- a/cpp/arcticdb/async/async_store.hpp +++ b/cpp/arcticdb/async/async_store.hpp @@ -147,6 +147,10 @@ entity::VariantKey write_sync( return WriteSegmentTask{library_}(std::move(encoded)); } +bool is_path_valid(const std::string_view path) const override { + return library_->is_path_valid(path); +} + folly::Future write_compressed(storage::KeySegmentPair &&ks) override { return async::submit_io_task(WriteCompressedTask{std::move(ks), library_}); } diff --git a/cpp/arcticdb/storage/library.hpp b/cpp/arcticdb/storage/library.hpp index bc02a2f653..3e5122b636 100644 --- a/cpp/arcticdb/storage/library.hpp +++ b/cpp/arcticdb/storage/library.hpp @@ -130,6 +130,10 @@ class Library { return storages_->key_exists(key); } + bool is_path_valid(const std::string_view path) const { + return storages_->is_path_valid(path); + } + KeySegmentPair read(VariantKey key, ReadKeyOpts opts = ReadKeyOpts{}) { KeySegmentPair res{VariantKey{key}}; util::check(!std::holds_alternative(variant_key_id(key)) || !std::get(variant_key_id(key)).empty(), "Unexpected empty id"); diff --git a/cpp/arcticdb/storage/library_manager.cpp b/cpp/arcticdb/storage/library_manager.cpp index 8072f413bb..098f48c300 100644 --- a/cpp/arcticdb/storage/library_manager.cpp +++ b/cpp/arcticdb/storage/library_manager.cpp @@ -103,7 +103,7 @@ void LibraryManager::write_library_config(const py::object& lib_cfg, const Libra segment.set_metadata(std::move(output)); auto library_name = path.to_delim_path(); - verify_library_path_on_write(library_name); + verify_library_path_on_write(store_.get(), library_name); store_->write_sync( entity::KeyType::LIBRARY_CONFIG, diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp index 0559d1f60d..db1879cd2a 100644 --- a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp +++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp @@ -259,6 +259,23 @@ void LmdbStorage::do_iterate_type(KeyType key_type, const IterateTypeVisitor& vi } } +bool LmdbStorage::do_is_path_valid(const std::string_view pathString) const { +#ifdef _WIN32 + std::string_view invalid_win32_chars = "<>:\"|?*"; + auto found = pathString.find_first_of(invalid_win32_chars); + if (found != std::string::npos) { + return false; + } + + if (!pathString.empty() && (pathString.back() == '.' || std::isspace(pathString.back()))) { + return false; + } +#else + (void) pathString; // suppress -Werror=unused-parameter +#endif + return true; +} + void remove_db_files(const fs::path& lib_path) { std::vector files = {"lock.mdb", "data.mdb"}; diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp index 704472db71..5629738084 100644 --- a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp +++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp @@ -52,6 +52,8 @@ class LmdbStorage final : public Storage { bool do_key_exists(const VariantKey & key) final; + bool do_is_path_valid(const std::string_view path) const final; + ::lmdb::env& env() { if (!env_) { raise("Unexpected LMDB Error: Invalid operation: LMDB environment has been removed. " diff --git a/cpp/arcticdb/storage/storage.hpp b/cpp/arcticdb/storage/storage.hpp index c0da8795c0..9825d4415c 100644 --- a/cpp/arcticdb/storage/storage.hpp +++ b/cpp/arcticdb/storage/storage.hpp @@ -166,6 +166,10 @@ class Storage { return do_key_path(key); } + bool is_path_valid(const std::string_view path) const { + return do_is_path_valid(path); + } + [[nodiscard]] const LibraryPath &library_path() const { return lib_path_; } [[nodiscard]] OpenMode open_mode() const { return mode_; } @@ -188,6 +192,8 @@ class Storage { virtual std::string do_key_path(const VariantKey& key) const = 0; + virtual bool do_is_path_valid(const std::string_view) const { return true; } + LibraryPath lib_path_; OpenMode mode_; }; diff --git a/cpp/arcticdb/storage/storages.hpp b/cpp/arcticdb/storage/storages.hpp index 8199a3746e..20e447f2ba 100644 --- a/cpp/arcticdb/storage/storages.hpp +++ b/cpp/arcticdb/storage/storages.hpp @@ -67,6 +67,10 @@ class Storages { return primary().key_exists(key); } + bool is_path_valid(const std::string_view path) const { + return primary().is_path_valid(path); + } + auto read(Composite&& ks, const ReadVisitor& visitor, ReadKeyOpts opts, bool primary_only=true) { ARCTICDB_RUNTIME_SAMPLE(StoragesRead, 0) if(primary_only) diff --git a/cpp/arcticdb/storage/test/in_memory_store.hpp b/cpp/arcticdb/storage/test/in_memory_store.hpp index 66c045c91e..695c2fced6 100644 --- a/cpp/arcticdb/storage/test/in_memory_store.hpp +++ b/cpp/arcticdb/storage/test/in_memory_store.hpp @@ -144,6 +144,10 @@ namespace arcticdb { return write(key_type, stream_id, std::move(segment)).get(); } + bool is_path_valid(const std::string_view) const override { + return true; + } + folly::Future write(stream::KeyType key_type, const StreamId& stream_id, SegmentInMemory &&segment) override { util::check(is_ref_key_class(key_type), "Cannot write ref key with atom key type {}", key_type); diff --git a/cpp/arcticdb/stream/stream_sink.hpp b/cpp/arcticdb/stream/stream_sink.hpp index abbf5363b4..5b252e85a9 100644 --- a/cpp/arcticdb/stream/stream_sink.hpp +++ b/cpp/arcticdb/stream/stream_sink.hpp @@ -112,6 +112,8 @@ struct StreamSink { folly::Future> &&input_fut, const std::shared_ptr &de_dup_map) = 0; + virtual bool is_path_valid(const std::string_view path) const = 0; + [[nodiscard]] virtual folly::Future batch_write_compressed( std::vector kvs) = 0; diff --git a/cpp/arcticdb/util/name_validation.cpp b/cpp/arcticdb/util/name_validation.cpp index fbf0ac3849..b066b97595 100644 --- a/cpp/arcticdb/util/name_validation.cpp +++ b/cpp/arcticdb/util/name_validation.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace arcticdb { @@ -118,8 +119,14 @@ void verify_library_path_part(const std::string& library_part, char delim) { } } -void verify_library_path_on_write(const StringId& library_path) { +void verify_library_path_on_write(const Store* store, const StringId& library_path) { verify_name("library name", library_path, true, UNSUPPORTED_S3_CHARS); + if(!store->is_path_valid(library_path)) { + user_input::raise( + "The library name contains unsupported chars. Library Name: {}", + library_path + ); + } } } \ No newline at end of file diff --git a/cpp/arcticdb/util/name_validation.hpp b/cpp/arcticdb/util/name_validation.hpp index 95673e24ef..ad0587e433 100644 --- a/cpp/arcticdb/util/name_validation.hpp +++ b/cpp/arcticdb/util/name_validation.hpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace arcticdb { @@ -21,7 +22,7 @@ void verify_symbol_key(const StreamId &symbol_key); // Does strict checks on library names and raises UserInputException if it encounters an error. // Should be checked only when writing new libraries to allow for backwards compatibility // with old invalid libraries. -void verify_library_path_on_write(const StringId& library_path); +void verify_library_path_on_write(const Store* store, const StringId& library_path); // These two do relaxed checks which should always be run on each library operation (including // already existing libraries). These raise friendly error messages instead of segfaulting or diff --git a/python/tests/integration/arcticdb/test_lmdb.py b/python/tests/integration/arcticdb/test_lmdb.py index 75f52ff3b8..aebf42ad97 100644 --- a/python/tests/integration/arcticdb/test_lmdb.py +++ b/python/tests/integration/arcticdb/test_lmdb.py @@ -17,7 +17,7 @@ from arcticdb import Arctic from arcticdb.util.test import assert_frame_equal -from arcticdb.exceptions import LmdbMapFullError, StorageException +from arcticdb.exceptions import LmdbMapFullError, StorageException, UserInputException from arcticdb.util.test import get_wide_dataframe import arcticdb.adapters.lmdb_library_adapter as la from arcticdb.exceptions import LmdbOptionsError @@ -102,6 +102,30 @@ def test_lmdb_malloc_trim(lmdb_storage): lib = ac.create_library("test_lmdb_malloc_trim") lib._nvs.trim() +@pytest.mark.skipif(sys.platform != "win32", reason="Non Windows platforms have different file path name restrictions") +@pytest.mark.parametrize("invalid_lib_name", ["lib?1", "lib:1", "lib|1", "lib.", "lib "]) +def test_invalid_lmdb_lib_name_windows(lmdb_storage, invalid_lib_name): + ac = lmdb_storage.create_arctic() + with pytest.raises(UserInputException) as e: + ac.create_library(invalid_lib_name) + + assert ac.list_libraries() == [] + +# Valid names on all platforms +@pytest.mark.parametrize("valid_lib_name", ["lib#~@,1", "lib{)[.1", "!lib$%^"]) +def test_valid_lib_name(lmdb_storage, valid_lib_name): + ac = lmdb_storage.create_arctic() + ac.create_library(valid_lib_name) + + assert ac.list_libraries() == [valid_lib_name] + +@pytest.mark.skipif(sys.platform == "win32", reason="Windows has different file path name restrictions") +@pytest.mark.parametrize("valid_lib_name", ["lib?1", "lib:1", "lib|1", "lib "]) +def test_valid_lib_name_linux(lmdb_storage, valid_lib_name): + ac = lmdb_storage.create_arctic() + ac.create_library(valid_lib_name) + + assert ac.list_libraries() == [valid_lib_name] def test_lmdb_mapsize(tmp_path): # Given - tiny map size