From 1572029e76102c5621ffdfe68059d1906c7670ee Mon Sep 17 00:00:00 2001
From: Muhammad Hamza Sajjad
<45758804+muhammadhamzasajjad@users.noreply.github.com>
Date: Wed, 17 Apr 2024 10:17:31 +0100
Subject: [PATCH] bugfix/1247: Don't create an lmdb library if the name is
invalid. (#1481)
#### Reference Issues/PRs
Fixes #1247
#### What does this implement or fix?
#### Any other comments?
#### Checklist
Checklist for code changes...
- [ ] Have you updated the relevant docstrings, documentation and
copyright notice?
- [ ] Is this contribution tested against [all ArcticDB's
features](../docs/mkdocs/docs/technical/contributing.md)?
- [ ] Do all exceptions introduced raise appropriate [error
messages](https://docs.arcticdb.io/error_messages/)?
- [ ] Are API changes highlighted in the PR description?
- [ ] Is the PR labelled as enhancement or bug so it appears in
autogenerated release notes?
---
cpp/arcticdb/async/async_store.hpp | 4 +++
cpp/arcticdb/storage/library.hpp | 4 +++
cpp/arcticdb/storage/library_manager.cpp | 2 +-
cpp/arcticdb/storage/lmdb/lmdb_storage.cpp | 17 ++++++++++++
cpp/arcticdb/storage/lmdb/lmdb_storage.hpp | 2 ++
cpp/arcticdb/storage/storage.hpp | 6 +++++
cpp/arcticdb/storage/storages.hpp | 4 +++
cpp/arcticdb/storage/test/in_memory_store.hpp | 4 +++
cpp/arcticdb/stream/stream_sink.hpp | 2 ++
cpp/arcticdb/util/name_validation.cpp | 9 ++++++-
cpp/arcticdb/util/name_validation.hpp | 3 ++-
.../tests/integration/arcticdb/test_lmdb.py | 26 ++++++++++++++++++-
12 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/cpp/arcticdb/async/async_store.hpp b/cpp/arcticdb/async/async_store.hpp
index 3c105e1adc..6f2d6c9413 100644
--- a/cpp/arcticdb/async/async_store.hpp
+++ b/cpp/arcticdb/async/async_store.hpp
@@ -147,6 +147,10 @@ entity::VariantKey write_sync(
return WriteSegmentTask{library_}(std::move(encoded));
}
+bool is_path_valid(const std::string_view path) const override {
+ return library_->is_path_valid(path);
+}
+
folly::Future write_compressed(storage::KeySegmentPair &&ks) override {
return async::submit_io_task(WriteCompressedTask{std::move(ks), library_});
}
diff --git a/cpp/arcticdb/storage/library.hpp b/cpp/arcticdb/storage/library.hpp
index bc02a2f653..3e5122b636 100644
--- a/cpp/arcticdb/storage/library.hpp
+++ b/cpp/arcticdb/storage/library.hpp
@@ -130,6 +130,10 @@ class Library {
return storages_->key_exists(key);
}
+ bool is_path_valid(const std::string_view path) const {
+ return storages_->is_path_valid(path);
+ }
+
KeySegmentPair read(VariantKey key, ReadKeyOpts opts = ReadKeyOpts{}) {
KeySegmentPair res{VariantKey{key}};
util::check(!std::holds_alternative(variant_key_id(key)) || !std::get(variant_key_id(key)).empty(), "Unexpected empty id");
diff --git a/cpp/arcticdb/storage/library_manager.cpp b/cpp/arcticdb/storage/library_manager.cpp
index 8072f413bb..098f48c300 100644
--- a/cpp/arcticdb/storage/library_manager.cpp
+++ b/cpp/arcticdb/storage/library_manager.cpp
@@ -103,7 +103,7 @@ void LibraryManager::write_library_config(const py::object& lib_cfg, const Libra
segment.set_metadata(std::move(output));
auto library_name = path.to_delim_path();
- verify_library_path_on_write(library_name);
+ verify_library_path_on_write(store_.get(), library_name);
store_->write_sync(
entity::KeyType::LIBRARY_CONFIG,
diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
index 0559d1f60d..db1879cd2a 100644
--- a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
+++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
@@ -259,6 +259,23 @@ void LmdbStorage::do_iterate_type(KeyType key_type, const IterateTypeVisitor& vi
}
}
+bool LmdbStorage::do_is_path_valid(const std::string_view pathString) const {
+#ifdef _WIN32
+ std::string_view invalid_win32_chars = "<>:\"|?*";
+ auto found = pathString.find_first_of(invalid_win32_chars);
+ if (found != std::string::npos) {
+ return false;
+ }
+
+ if (!pathString.empty() && (pathString.back() == '.' || std::isspace(pathString.back()))) {
+ return false;
+ }
+#else
+ (void) pathString; // suppress -Werror=unused-parameter
+#endif
+ return true;
+}
+
void remove_db_files(const fs::path& lib_path) {
std::vector files = {"lock.mdb", "data.mdb"};
diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp
index 704472db71..5629738084 100644
--- a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp
+++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp
@@ -52,6 +52,8 @@ class LmdbStorage final : public Storage {
bool do_key_exists(const VariantKey & key) final;
+ bool do_is_path_valid(const std::string_view path) const final;
+
::lmdb::env& env() {
if (!env_) {
raise("Unexpected LMDB Error: Invalid operation: LMDB environment has been removed. "
diff --git a/cpp/arcticdb/storage/storage.hpp b/cpp/arcticdb/storage/storage.hpp
index c0da8795c0..9825d4415c 100644
--- a/cpp/arcticdb/storage/storage.hpp
+++ b/cpp/arcticdb/storage/storage.hpp
@@ -166,6 +166,10 @@ class Storage {
return do_key_path(key);
}
+ bool is_path_valid(const std::string_view path) const {
+ return do_is_path_valid(path);
+ }
+
[[nodiscard]] const LibraryPath &library_path() const { return lib_path_; }
[[nodiscard]] OpenMode open_mode() const { return mode_; }
@@ -188,6 +192,8 @@ class Storage {
virtual std::string do_key_path(const VariantKey& key) const = 0;
+ virtual bool do_is_path_valid(const std::string_view) const { return true; }
+
LibraryPath lib_path_;
OpenMode mode_;
};
diff --git a/cpp/arcticdb/storage/storages.hpp b/cpp/arcticdb/storage/storages.hpp
index 8199a3746e..20e447f2ba 100644
--- a/cpp/arcticdb/storage/storages.hpp
+++ b/cpp/arcticdb/storage/storages.hpp
@@ -67,6 +67,10 @@ class Storages {
return primary().key_exists(key);
}
+ bool is_path_valid(const std::string_view path) const {
+ return primary().is_path_valid(path);
+ }
+
auto read(Composite&& ks, const ReadVisitor& visitor, ReadKeyOpts opts, bool primary_only=true) {
ARCTICDB_RUNTIME_SAMPLE(StoragesRead, 0)
if(primary_only)
diff --git a/cpp/arcticdb/storage/test/in_memory_store.hpp b/cpp/arcticdb/storage/test/in_memory_store.hpp
index 66c045c91e..695c2fced6 100644
--- a/cpp/arcticdb/storage/test/in_memory_store.hpp
+++ b/cpp/arcticdb/storage/test/in_memory_store.hpp
@@ -144,6 +144,10 @@ namespace arcticdb {
return write(key_type, stream_id, std::move(segment)).get();
}
+ bool is_path_valid(const std::string_view) const override {
+ return true;
+ }
+
folly::Future
write(stream::KeyType key_type, const StreamId& stream_id, SegmentInMemory &&segment) override {
util::check(is_ref_key_class(key_type), "Cannot write ref key with atom key type {}", key_type);
diff --git a/cpp/arcticdb/stream/stream_sink.hpp b/cpp/arcticdb/stream/stream_sink.hpp
index abbf5363b4..5b252e85a9 100644
--- a/cpp/arcticdb/stream/stream_sink.hpp
+++ b/cpp/arcticdb/stream/stream_sink.hpp
@@ -112,6 +112,8 @@ struct StreamSink {
folly::Future> &&input_fut,
const std::shared_ptr &de_dup_map) = 0;
+ virtual bool is_path_valid(const std::string_view path) const = 0;
+
[[nodiscard]] virtual folly::Future batch_write_compressed(
std::vector kvs) = 0;
diff --git a/cpp/arcticdb/util/name_validation.cpp b/cpp/arcticdb/util/name_validation.cpp
index fbf0ac3849..b066b97595 100644
--- a/cpp/arcticdb/util/name_validation.cpp
+++ b/cpp/arcticdb/util/name_validation.cpp
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
namespace arcticdb {
@@ -118,8 +119,14 @@ void verify_library_path_part(const std::string& library_part, char delim) {
}
}
-void verify_library_path_on_write(const StringId& library_path) {
+void verify_library_path_on_write(const Store* store, const StringId& library_path) {
verify_name("library name", library_path, true, UNSUPPORTED_S3_CHARS);
+ if(!store->is_path_valid(library_path)) {
+ user_input::raise(
+ "The library name contains unsupported chars. Library Name: {}",
+ library_path
+ );
+ }
}
}
\ No newline at end of file
diff --git a/cpp/arcticdb/util/name_validation.hpp b/cpp/arcticdb/util/name_validation.hpp
index 95673e24ef..ad0587e433 100644
--- a/cpp/arcticdb/util/name_validation.hpp
+++ b/cpp/arcticdb/util/name_validation.hpp
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
namespace arcticdb {
@@ -21,7 +22,7 @@ void verify_symbol_key(const StreamId &symbol_key);
// Does strict checks on library names and raises UserInputException if it encounters an error.
// Should be checked only when writing new libraries to allow for backwards compatibility
// with old invalid libraries.
-void verify_library_path_on_write(const StringId& library_path);
+void verify_library_path_on_write(const Store* store, const StringId& library_path);
// These two do relaxed checks which should always be run on each library operation (including
// already existing libraries). These raise friendly error messages instead of segfaulting or
diff --git a/python/tests/integration/arcticdb/test_lmdb.py b/python/tests/integration/arcticdb/test_lmdb.py
index 75f52ff3b8..aebf42ad97 100644
--- a/python/tests/integration/arcticdb/test_lmdb.py
+++ b/python/tests/integration/arcticdb/test_lmdb.py
@@ -17,7 +17,7 @@
from arcticdb import Arctic
from arcticdb.util.test import assert_frame_equal
-from arcticdb.exceptions import LmdbMapFullError, StorageException
+from arcticdb.exceptions import LmdbMapFullError, StorageException, UserInputException
from arcticdb.util.test import get_wide_dataframe
import arcticdb.adapters.lmdb_library_adapter as la
from arcticdb.exceptions import LmdbOptionsError
@@ -102,6 +102,30 @@ def test_lmdb_malloc_trim(lmdb_storage):
lib = ac.create_library("test_lmdb_malloc_trim")
lib._nvs.trim()
+@pytest.mark.skipif(sys.platform != "win32", reason="Non Windows platforms have different file path name restrictions")
+@pytest.mark.parametrize("invalid_lib_name", ["lib?1", "lib:1", "lib|1", "lib.", "lib "])
+def test_invalid_lmdb_lib_name_windows(lmdb_storage, invalid_lib_name):
+ ac = lmdb_storage.create_arctic()
+ with pytest.raises(UserInputException) as e:
+ ac.create_library(invalid_lib_name)
+
+ assert ac.list_libraries() == []
+
+# Valid names on all platforms
+@pytest.mark.parametrize("valid_lib_name", ["lib#~@,1", "lib{)[.1", "!lib$%^"])
+def test_valid_lib_name(lmdb_storage, valid_lib_name):
+ ac = lmdb_storage.create_arctic()
+ ac.create_library(valid_lib_name)
+
+ assert ac.list_libraries() == [valid_lib_name]
+
+@pytest.mark.skipif(sys.platform == "win32", reason="Windows has different file path name restrictions")
+@pytest.mark.parametrize("valid_lib_name", ["lib?1", "lib:1", "lib|1", "lib "])
+def test_valid_lib_name_linux(lmdb_storage, valid_lib_name):
+ ac = lmdb_storage.create_arctic()
+ ac.create_library(valid_lib_name)
+
+ assert ac.list_libraries() == [valid_lib_name]
def test_lmdb_mapsize(tmp_path):
# Given - tiny map size