Skip to content
This repository has been archived by the owner on Jan 3, 2024. It is now read-only.

rgw/sfs: sqlite_modern_cpp blobs, users and objects changes #246

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/rgw/driver/sfs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ set(sfs_srcs
sqlite/sqlite_versioned_objects.cc
sqlite/sqlite_lifecycle.cc
sqlite/sqlite_multipart.cc
sqlite/users/users_conversions.cc
sqlite/buckets/bucket_conversions.cc
sqlite/dbconn.cc
sqlite/errors.cc
Expand Down
2 changes: 1 addition & 1 deletion src/rgw/driver/sfs/object_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#if FMT_VERSION >= 90000
#include <fmt/ostream.h>
#endif
#include "sqlite/dbapi.h"
#include "sqlite/dbapi_type_wrapper.h"

namespace rgw::sal::sfs {

Expand Down
86 changes: 74 additions & 12 deletions src/rgw/driver/sfs/sqlite/bindings/blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,33 @@
*/
#pragma once

#include <tuple>
#include <type_traits>

#include "rgw/driver/sfs/sqlite/conversion_utils.h"
// we need to include dbapi_type_wrapper.h only because including dbapi.h
// creates circular dependencies
#include "rgw/driver/sfs/sqlite/dbapi_type_wrapper.h"
#include "rgw/driver/sfs/sqlite/sqlite_orm.h"
#include "rgw_common.h"

namespace sqlite_orm {

template <typename T>
struct __is_sqlite_blob : std::false_type {};
// Add to this tuple all the types that you need to store in sqlite as a blob.
// Those types need to have encode/decode methods based on ceph's bufferlist.
// Also if your type has the decode/encode methods out of the ceph namespace, go
// to conversion-utils.h and add your type to the
// TypesDecodeIsNOTInCephNamespace tuple.
using BlobTypes = std::tuple<
rgw::sal::Attrs, ACLOwner, rgw_placement_rule,
std::map<std::string, RGWAccessKey>, std::map<std::string, RGWSubUser>,
RGWUserCaps, std::list<std::string>, std::map<int, std::string>,
RGWQuotaInfo, std::set<std::string>, RGWBucketWebsiteConf,
std::map<std::string, uint32_t>, RGWObjectLock, rgw_sync_policy_info>;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add also: RGWBucketInfo?
Saying this because I'm using encode/decode functions over this type but maybe not needed.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we store the whole bucket info as a blob we can't then query over specific bucket columns. That's why we didn't store buckets that way.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right!


template <typename T>
inline constexpr bool is_sqlite_blob = __is_sqlite_blob<T>::value;

template <>
struct __is_sqlite_blob<rgw::sal::Attrs> : std::true_type {};

template <>
struct __is_sqlite_blob<ACLOwner> : std::true_type {};

template <>
struct __is_sqlite_blob<rgw_placement_rule> : std::true_type {};
inline constexpr bool is_sqlite_blob =
blob_utils::has_type<T, BlobTypes>::value;

template <class T>
struct type_printer<T, typename std::enable_if<is_sqlite_blob<T>, void>::type>
Expand Down Expand Up @@ -77,3 +82,60 @@ struct row_extractor<
}
};
} // namespace sqlite_orm

namespace rgw::sal::sfs::dbapi::sqlite {
template <typename T>
struct has_sqlite_type<T, SQLITE_BLOB, void>
: blob_utils::has_type<T, sqlite_orm::BlobTypes> {};

template <class T>
inline std::enable_if<sqlite_orm::is_sqlite_blob<T>, int>::type bind_col_in_db(
sqlite3_stmt* stmt, int inx, const T& val
) {
std::vector<char> blobValue;
rgw::sal::sfs::sqlite::encode_blob(val, blobValue);
return dbapi::sqlite::bind_col_in_db(stmt, inx, blobValue);
}
template <class T>
inline std::enable_if<sqlite_orm::is_sqlite_blob<T>, void>::type
store_result_in_db(sqlite3_context* db, const T& val) {
std::vector<char> blobValue;
rgw::sal::sfs::sqlite::encode_blob(val, blobValue);
dbapi::sqlite::store_result_in_db(db, blobValue);
}
template <class T>
inline std::enable_if<sqlite_orm::is_sqlite_blob<T>, T>::type
get_col_from_db(sqlite3_stmt* stmt, int inx, result_type<T>) {
if (sqlite3_column_type(stmt, inx) == SQLITE_NULL) {
ceph_abort_msg("cannot make blob value from NULL");
}
auto blob_data = sqlite3_column_blob(stmt, inx);
auto blob_size = sqlite3_column_bytes(stmt, inx);
if (blob_data == nullptr || blob_size < 0) {
ceph_abort_msg("Invalid blob at column : (" + std::to_string(inx) + ")");
}
T ret;
rgw::sal::sfs::sqlite::decode_blob(
reinterpret_cast<const char*>(blob_data), static_cast<size_t>(blob_size),
ret
);
return ret;
}

template <class T>
inline std::enable_if<sqlite_orm::is_sqlite_blob<T>, T>::type
get_val_from_db(sqlite3_value* value, result_type<T>) {
if (sqlite3_value_type(value) == SQLITE_NULL) {
ceph_abort_msg("cannot make blob value from NULL");
}
std::vector<char> vector_value;
vector_value = get_val_from_db(value, result_type<std::vector<char>>());
T ret;
rgw::sal::sfs::sqlite::decode_blob(
reinterpret_cast<const char*>(vector_value),
static_cast<size_t>(vector_value.size()), ret
);
return ret;
}

} // namespace rgw::sal::sfs::dbapi::sqlite
2 changes: 1 addition & 1 deletion src/rgw/driver/sfs/sqlite/bindings/real_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

#include "common/ceph_time.h"
#include "include/ceph_assert.h"
#include "rgw/driver/sfs/sqlite/dbapi.h"
#include "rgw/driver/sfs/sqlite/dbapi_type_wrapper.h"
#include "rgw/driver/sfs/sqlite/sqlite_orm.h"

/// ceph::real_time is represented as a uint64 (unsigned).
Expand Down
40 changes: 40 additions & 0 deletions src/rgw/driver/sfs/sqlite/bindings/uuid_d.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

#include "rgw/driver/sfs/sqlite/sqlite_orm.h"
#include "rgw_common.h"
// we need to include dbapi_type_wrapper.h only because including dbapi.h
// creates circular dependencies
#include "rgw/driver/sfs/sqlite/dbapi_type_wrapper.h"

namespace sqlite_orm {
template <>
Expand Down Expand Up @@ -69,3 +72,40 @@ struct row_extractor<uuid_d> {
}
};
} // namespace sqlite_orm

namespace rgw::sal::sfs::dbapi::sqlite {

template <>
struct has_sqlite_type<uuid_d, SQLITE_TEXT, void> : ::std::true_type {};

inline int bind_col_in_db(sqlite3_stmt* stmt, int inx, const uuid_d& val) {
return bind_col_in_db(stmt, inx, val.to_string());
}
inline void store_result_in_db(sqlite3_context* db, const uuid_d& val) {
store_result_in_db(db, val.to_string());
}
inline uuid_d
get_col_from_db(sqlite3_stmt* stmt, int inx, result_type<uuid_d>) {
std::string db_value = get_col_from_db(stmt, inx, result_type<std::string>());
uuid_d ret_value;
if (!ret_value.parse(db_value.c_str())) {
throw std::system_error(
ERANGE, std::system_category(),
"incorrect uuid string (" + db_value + ")"
);
}
return ret_value;
}

inline uuid_d get_val_from_db(sqlite3_value* value, result_type<uuid_d>) {
std::string db_value = get_val_from_db(value, result_type<std::string>());
uuid_d ret_value;
if (!ret_value.parse(db_value.c_str())) {
throw std::system_error(
ERANGE, std::system_category(),
"incorrect uuid string (" + db_value + ")"
);
}
return ret_value;
}
} // namespace rgw::sal::sfs::dbapi::sqlite
45 changes: 27 additions & 18 deletions src/rgw/driver/sfs/sqlite/conversion_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,37 @@
#include "rgw_acl.h"
#include "rgw_common.h"

namespace rgw::sal::sfs::sqlite {
namespace blob_utils {

template <typename T, typename Tuple>
struct has_type;

/// by default type's decode function is under the ceph namespace
template <typename T>
struct __ceph_ns_decode : std::true_type {};
struct has_type<T, std::tuple<>> : std::false_type {};

template <typename T, typename U, typename... Ts>
struct has_type<T, std::tuple<U, Ts...>> : has_type<T, std::tuple<Ts...>> {};

template <typename T, typename... Ts>
struct has_type<T, std::tuple<T, Ts...>> : std::true_type {};
} // namespace blob_utils

namespace rgw::sal::sfs::sqlite {

// Normally the encode/decode methods for rgw types are found in the ceph
// namespace. But there are a few types where that's not true.
// This tuple lists all the types where the encode/decode methods are NOT in the
// ceph namespace.
// This is required to specify which call will need your type when encoding or
// decoding it from/to a bufferlist
using TypesDecodeIsNOTInCephNamespace = std::tuple<
RGWAccessControlPolicy, RGWQuotaInfo, RGWObjectLock, RGWUserCaps, ACLOwner,
rgw_placement_rule>;

/// Returns if a type has its encode/decode methods in the ceph namespace.
template <typename T>
inline constexpr bool ceph_ns_decode = __ceph_ns_decode<T>::value;

// specialize the ones that are not under the ceph namespace
template <>
struct __ceph_ns_decode<RGWAccessControlPolicy> : std::false_type {};
template <>
struct __ceph_ns_decode<RGWQuotaInfo> : std::false_type {};
template <>
struct __ceph_ns_decode<RGWObjectLock> : std::false_type {};
template <>
struct __ceph_ns_decode<RGWUserCaps> : std::false_type {};
template <>
struct __ceph_ns_decode<ACLOwner> : std::false_type {};
template <>
struct __ceph_ns_decode<rgw_placement_rule> : std::false_type {};
inline constexpr bool ceph_ns_decode =
!blob_utils::has_type<T, TypesDecodeIsNOTInCephNamespace>::value;

template <typename BLOB_HOLDER, typename DEST>
void decode_blob(const BLOB_HOLDER& blob_holder, DEST& dest) {
Expand Down
9 changes: 9 additions & 0 deletions src/rgw/driver/sfs/sqlite/dbapi_type_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

#include <sqlite3.h>

namespace rgw::sal::sfs::dbapi {

#include "sqlite_modern_cpp/hdr/sqlite_modern_cpp/type_wrapper.h"

} // namespace rgw::sal::sfs::dbapi
34 changes: 34 additions & 0 deletions src/rgw/driver/sfs/sqlite/dbconn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <sqlite3.h>

#include <filesystem>
#include <memory>
#include <system_error>

#include "common/dout.h"
Expand Down Expand Up @@ -131,6 +132,14 @@ DBConn::DBConn(CephContext* _cct)
// storage->on_open() called from get_storage(), which has the exclusive
// mutex.
sqlite_conns.emplace_back(db);
std::shared_ptr<sqlite3> db_connection =
std::shared_ptr<sqlite3>(db, [=](sqlite3*) {
// doing nothing for now...
// this is just a workaround to reuse the connection opened from
// sqlite_orm, the real owner of the connection is still sqlite_orm.
// This won't be needed after code is fully ported to new sqlite lib
});
storage_pool_new.emplace(std::this_thread::get_id(), db_connection);

sqlite3_extended_result_codes(db, 1);
sqlite3_busy_timeout(db, 10000);
Expand Down Expand Up @@ -187,6 +196,31 @@ StorageRef DBConn::get_storage() {
}
}

dbapi::sqlite::database DBConn::get() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a thread already called get_storage() this returns that connection (set in on_open) with the sqlite init we exect (busy timeout, error log, profiling settings)

If that thread did not, this is a new connection with sqlite modern cpp default sqlite_open settings.

We should avoid mixing sqlite connections with different initializations.

Would it suffice to just call get_storage() in the catch, ignore the result and take storage_pool_new.at() which was set in on_open?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point (I was wondering about that too). I suspect that workaround you suggested should work.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, that's something I was afraid of. I will try that workaround.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this to use get_connection() to open the db the same way for all cases, meanwhile we finish porting the rest of the code.

std::thread::id this_thread = std::this_thread::get_id();
try {
// using the same mutex as meanwhile code is being ported connections might
// be created for sqlite_orm code or sqlite_modern_cpp
std::shared_lock lock(storage_pool_mutex);
auto connection = storage_pool_new.at(this_thread);
return dbapi::sqlite::database(connection);
} catch (const std::out_of_range& ex) {
// call get_storage to open the connection the same way it was opened in
// the main thread.
get_storage();
std::shared_lock lock(storage_pool_mutex);
if (storage_pool_new.find(this_thread) == storage_pool_new.end()) {
// something went really really wrong.
throw std::system_error(
ENOENT, std::system_category(),
"Could not find a valid SQLITE connection"
);
}
auto connection = storage_pool_new.at(this_thread);
return dbapi::sqlite::database(connection);
}
}

void DBConn::check_metadata_is_compatible() const {
bool sync_error = false;
std::string result_message;
Expand Down
8 changes: 5 additions & 3 deletions src/rgw/driver/sfs/sqlite/dbconn.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,15 @@ inline auto _make_storage(const std::string& path) {
using Storage = decltype(_make_storage(""));
using StorageRef = Storage*;

// TODO revisit this when code is fully ported to sqlite modern cpp
using ConnectionNewLib = std::shared_ptr<sqlite3>;

// TODO(https://github.com/aquarist-labs/s3gw/issues/788): Make
// dbapi::sqlite::database the primary interface for sqlite3.
class DBConn {
private:
std::unordered_map<std::thread::id, Storage> storage_pool;
std::unordered_map<std::thread::id, ConnectionNewLib> storage_pool_new;
Copy link
Member

@irq0 irq0 Nov 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a general discussion note:

It's a bit suboptimal that sqlite_modern_cpp want's a shared ptr. I'd rather have DBConn own all the sqlite connections with a unique_ptr. It's lifetime is already equal to sfs / RGW.

The nice thing is that with sqlite_modern_cpp's std::shared_ptr constructor we can take full charge of the sqlite database initialization.

std::vector<sqlite3*> sqlite_conns;
const std::thread::id main_thread;
mutable std::shared_mutex storage_pool_mutex;
Expand All @@ -285,9 +289,7 @@ class DBConn {
return sqlite_conns;
}

dbapi::sqlite::database get() {
return dbapi::sqlite::database(get_storage()->filename());
}
dbapi::sqlite::database get();

static std::string getDBPath(CephContext* cct) {
auto rgw_sfs_path = cct->_conf.get_val<std::string>("rgw_sfs_data_path");
Expand Down
11 changes: 11 additions & 0 deletions src/rgw/driver/sfs/sqlite/objects/object_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ struct DBObject {
uuid_d uuid;
std::string bucket_id;
std::string name;

using DBObjectQueryResult = std::tuple<
decltype(DBObject::uuid), decltype(DBObject::bucket_id),
decltype(DBObject::name)>;

DBObject() = default;

explicit DBObject(DBObjectQueryResult&& values)
: uuid(std::move(std::get<0>(values))),
bucket_id(std::move(std::get<1>(values))),
name(std::move(std::get<2>(values))) {}
};

} // namespace rgw::sal::sfs::sqlite
Expand Down
Loading
Loading