Skip to content

Commit

Permalink
bugfix/1247: Don't create an lmdb library if the name is invalid. (#1481
Browse files Browse the repository at this point in the history
)

#### Reference Issues/PRs
Fixes #1247 

#### What does this implement or fix?

#### Any other comments?

#### Checklist

<details>
  <summary>
   Checklist for code changes...
  </summary>
 
- [ ] Have you updated the relevant docstrings, documentation and
copyright notice?
- [ ] Is this contribution tested against [all ArcticDB's
features](../docs/mkdocs/docs/technical/contributing.md)?
- [ ] Do all exceptions introduced raise appropriate [error
messages](https://docs.arcticdb.io/error_messages/)?
 - [ ] Are API changes highlighted in the PR description?
- [ ] Is the PR labelled as enhancement or bug so it appears in
autogenerated release notes?
</details>
  • Loading branch information
muhammadhamzasajjad authored Apr 17, 2024
1 parent a2aa686 commit 1572029
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 4 deletions.
4 changes: 4 additions & 0 deletions cpp/arcticdb/async/async_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@ entity::VariantKey write_sync(
return WriteSegmentTask{library_}(std::move(encoded));
}

bool is_path_valid(const std::string_view path) const override {
return library_->is_path_valid(path);
}

folly::Future<folly::Unit> write_compressed(storage::KeySegmentPair &&ks) override {
return async::submit_io_task(WriteCompressedTask{std::move(ks), library_});
}
Expand Down
4 changes: 4 additions & 0 deletions cpp/arcticdb/storage/library.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ class Library {
return storages_->key_exists(key);
}

bool is_path_valid(const std::string_view path) const {
return storages_->is_path_valid(path);
}

KeySegmentPair read(VariantKey key, ReadKeyOpts opts = ReadKeyOpts{}) {
KeySegmentPair res{VariantKey{key}};
util::check(!std::holds_alternative<StringId>(variant_key_id(key)) || !std::get<StringId>(variant_key_id(key)).empty(), "Unexpected empty id");
Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/storage/library_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void LibraryManager::write_library_config(const py::object& lib_cfg, const Libra
segment.set_metadata(std::move(output));

auto library_name = path.to_delim_path();
verify_library_path_on_write(library_name);
verify_library_path_on_write(store_.get(), library_name);

store_->write_sync(
entity::KeyType::LIBRARY_CONFIG,
Expand Down
17 changes: 17 additions & 0 deletions cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,23 @@ void LmdbStorage::do_iterate_type(KeyType key_type, const IterateTypeVisitor& vi
}
}

bool LmdbStorage::do_is_path_valid(const std::string_view pathString) const {
#ifdef _WIN32
std::string_view invalid_win32_chars = "<>:\"|?*";
auto found = pathString.find_first_of(invalid_win32_chars);
if (found != std::string::npos) {
return false;
}

if (!pathString.empty() && (pathString.back() == '.' || std::isspace(pathString.back()))) {
return false;
}
#else
(void) pathString; // suppress -Werror=unused-parameter
#endif
return true;
}

void remove_db_files(const fs::path& lib_path) {
std::vector<std::string> files = {"lock.mdb", "data.mdb"};

Expand Down
2 changes: 2 additions & 0 deletions cpp/arcticdb/storage/lmdb/lmdb_storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class LmdbStorage final : public Storage {

bool do_key_exists(const VariantKey & key) final;

bool do_is_path_valid(const std::string_view path) const final;

::lmdb::env& env() {
if (!env_) {
raise<ErrorCode::E_UNEXPECTED_LMDB_ERROR>("Unexpected LMDB Error: Invalid operation: LMDB environment has been removed. "
Expand Down
6 changes: 6 additions & 0 deletions cpp/arcticdb/storage/storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ class Storage {
return do_key_path(key);
}

bool is_path_valid(const std::string_view path) const {
return do_is_path_valid(path);
}

[[nodiscard]] const LibraryPath &library_path() const { return lib_path_; }
[[nodiscard]] OpenMode open_mode() const { return mode_; }

Expand All @@ -188,6 +192,8 @@ class Storage {

virtual std::string do_key_path(const VariantKey& key) const = 0;

virtual bool do_is_path_valid(const std::string_view) const { return true; }

LibraryPath lib_path_;
OpenMode mode_;
};
Expand Down
4 changes: 4 additions & 0 deletions cpp/arcticdb/storage/storages.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ class Storages {
return primary().key_exists(key);
}

bool is_path_valid(const std::string_view path) const {
return primary().is_path_valid(path);
}

auto read(Composite<VariantKey>&& ks, const ReadVisitor& visitor, ReadKeyOpts opts, bool primary_only=true) {
ARCTICDB_RUNTIME_SAMPLE(StoragesRead, 0)
if(primary_only)
Expand Down
4 changes: 4 additions & 0 deletions cpp/arcticdb/storage/test/in_memory_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ namespace arcticdb {
return write(key_type, stream_id, std::move(segment)).get();
}

bool is_path_valid(const std::string_view) const override {
return true;
}

folly::Future<entity::VariantKey>
write(stream::KeyType key_type, const StreamId& stream_id, SegmentInMemory &&segment) override {
util::check(is_ref_key_class(key_type), "Cannot write ref key with atom key type {}", key_type);
Expand Down
2 changes: 2 additions & 0 deletions cpp/arcticdb/stream/stream_sink.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ struct StreamSink {
folly::Future<std::tuple<PartialKey, SegmentInMemory, pipelines::FrameSlice>> &&input_fut,
const std::shared_ptr<DeDupMap> &de_dup_map) = 0;

virtual bool is_path_valid(const std::string_view path) const = 0;

[[nodiscard]] virtual folly::Future<folly::Unit> batch_write_compressed(
std::vector<storage::KeySegmentPair> kvs) = 0;

Expand Down
9 changes: 8 additions & 1 deletion cpp/arcticdb/util/name_validation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <arcticdb/entity/types.hpp>
#include <arcticdb/util/configs_map.hpp>
#include <arcticdb/stream/index.hpp>
#include <arcticdb/storage/store.hpp>

namespace arcticdb {

Expand Down Expand Up @@ -118,8 +119,14 @@ void verify_library_path_part(const std::string& library_part, char delim) {
}
}

void verify_library_path_on_write(const StringId& library_path) {
void verify_library_path_on_write(const Store* store, const StringId& library_path) {
verify_name("library name", library_path, true, UNSUPPORTED_S3_CHARS);
if(!store->is_path_valid(library_path)) {
user_input::raise<ErrorCode::E_INVALID_CHAR_IN_NAME>(
"The library name contains unsupported chars. Library Name: {}",
library_path
);
}
}

}
3 changes: 2 additions & 1 deletion cpp/arcticdb/util/name_validation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <arcticdb/entity/types.hpp>
#include <arcticdb/util/configs_map.hpp>
#include <arcticdb/stream/index.hpp>
#include <arcticdb/pipeline/index_segment_reader.hpp>

namespace arcticdb {

Expand All @@ -21,7 +22,7 @@ void verify_symbol_key(const StreamId &symbol_key);
// Does strict checks on library names and raises UserInputException if it encounters an error.
// Should be checked only when writing new libraries to allow for backwards compatibility
// with old invalid libraries.
void verify_library_path_on_write(const StringId& library_path);
void verify_library_path_on_write(const Store* store, const StringId& library_path);

// These two do relaxed checks which should always be run on each library operation (including
// already existing libraries). These raise friendly error messages instead of segfaulting or
Expand Down
26 changes: 25 additions & 1 deletion python/tests/integration/arcticdb/test_lmdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from arcticdb import Arctic
from arcticdb.util.test import assert_frame_equal
from arcticdb.exceptions import LmdbMapFullError, StorageException
from arcticdb.exceptions import LmdbMapFullError, StorageException, UserInputException
from arcticdb.util.test import get_wide_dataframe
import arcticdb.adapters.lmdb_library_adapter as la
from arcticdb.exceptions import LmdbOptionsError
Expand Down Expand Up @@ -102,6 +102,30 @@ def test_lmdb_malloc_trim(lmdb_storage):
lib = ac.create_library("test_lmdb_malloc_trim")
lib._nvs.trim()

@pytest.mark.skipif(sys.platform != "win32", reason="Non Windows platforms have different file path name restrictions")
@pytest.mark.parametrize("invalid_lib_name", ["lib?1", "lib:1", "lib|1", "lib.", "lib "])
def test_invalid_lmdb_lib_name_windows(lmdb_storage, invalid_lib_name):
ac = lmdb_storage.create_arctic()
with pytest.raises(UserInputException) as e:
ac.create_library(invalid_lib_name)

assert ac.list_libraries() == []

# Valid names on all platforms
@pytest.mark.parametrize("valid_lib_name", ["lib#~@,1", "lib{)[.1", "!lib$%^"])
def test_valid_lib_name(lmdb_storage, valid_lib_name):
ac = lmdb_storage.create_arctic()
ac.create_library(valid_lib_name)

assert ac.list_libraries() == [valid_lib_name]

@pytest.mark.skipif(sys.platform == "win32", reason="Windows has different file path name restrictions")
@pytest.mark.parametrize("valid_lib_name", ["lib?1", "lib:1", "lib|1", "lib "])
def test_valid_lib_name_linux(lmdb_storage, valid_lib_name):
ac = lmdb_storage.create_arctic()
ac.create_library(valid_lib_name)

assert ac.list_libraries() == [valid_lib_name]

def test_lmdb_mapsize(tmp_path):
# Given - tiny map size
Expand Down

0 comments on commit 1572029

Please sign in to comment.