From 48a99215d4520a383cff14ab77b11c0e7316c23c Mon Sep 17 00:00:00 2001 From: Marcel Lauhoff Date: Fri, 6 Oct 2023 11:36:21 +0200 Subject: [PATCH] rgw/sfs: Change retry utility into busy retry util Rename to RetrySQLiteBusy. Change behavior to: - rethrow non-busy errors immediately. We no longer retry errors that we should not like constraint violations - abort on critical errors (e.g db corruption) Signed-off-by: Marcel Lauhoff --- src/rgw/driver/sfs/sqlite/retry.h | 20 ++++++++++++------- src/rgw/driver/sfs/sqlite/sqlite_buckets.cc | 2 +- src/rgw/driver/sfs/sqlite/sqlite_multipart.cc | 6 +++--- .../sfs/sqlite/sqlite_versioned_objects.cc | 6 +++--- src/test/rgw/sfs/test_rgw_sfs_retry.cc | 8 ++++---- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/rgw/driver/sfs/sqlite/retry.h b/src/rgw/driver/sfs/sqlite/retry.h index 126d3bf5e41d01..ee48282e29bb66 100644 --- a/src/rgw/driver/sfs/sqlite/retry.h +++ b/src/rgw/driver/sfs/sqlite/retry.h @@ -30,7 +30,7 @@ namespace rgw::sal::sfs::sqlite { // after retrying. Catches all non-critical exceptions and makes them // available via failed_error(). Critical exceptions are passed on. template -class RetrySQLite { +class RetrySQLiteBusy { public: using Func = std::function; @@ -42,11 +42,11 @@ class RetrySQLite { std::error_code m_failed_error{}; public: - RetrySQLite(Func&& fn) : m_fn(std::forward(fn)) {} - RetrySQLite(RetrySQLite&&) = delete; - RetrySQLite(const RetrySQLite&) = delete; - RetrySQLite& operator=(const RetrySQLite&) = delete; - RetrySQLite& operator=(RetrySQLite&&) = delete; + RetrySQLiteBusy(Func&& fn) : m_fn(std::forward(fn)) {} + RetrySQLiteBusy(RetrySQLiteBusy&&) = delete; + RetrySQLiteBusy(const RetrySQLiteBusy&) = delete; + RetrySQLiteBusy& operator=(const RetrySQLiteBusy&) = delete; + RetrySQLiteBusy& operator=(RetrySQLiteBusy&&) = delete; /// run runs fn with up to m_max_retries retries. It may throw /// critical-exceptions. Non-critical errors are made available via @@ -66,7 +66,13 @@ class RetrySQLite { } catch (const std::system_error& ex) { m_failed_error = ex.code(); if (critical_error(ex.code().value())) { - // Rethrow, expect a higher layer to shut us down + ceph_abort(fmt::print( + "Critical SQLite error {}. Shutting down.", ex.code().value() + )); + } + if (!busy_error(ex.code().value())) { + // Rethrow, expect a higher layer to handle (e.g constraint + // violations), reply internal server error or shut us down throw ex; } std::this_thread::sleep_for(10ms * retry); diff --git a/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc b/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc index 42877f50b288cb..a3a509e2a64d7b 100644 --- a/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc +++ b/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc @@ -139,7 +139,7 @@ std::optional SQLiteBuckets::delete_bucket_transact( const std::string& bucket_id, uint max_objects, bool& bucket_deleted ) const { auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { bucket_deleted = false; DBDeletedObjectItems ret_values; storage.begin_transaction(); diff --git a/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc b/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc index c0341b45e37553..1adb3bdda84ef1 100644 --- a/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc +++ b/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc @@ -222,7 +222,7 @@ std::optional SQLiteMultipart::create_or_reset_part( ) const { auto storage = conn->get_storage(); - RetrySQLite> retry([&]() { + RetrySQLiteBusy> retry([&]() { auto transaction = storage.transaction_guard(); std::optional entry = std::nullopt; auto cnt = storage.count(where( @@ -466,7 +466,7 @@ SQLiteMultipart::remove_multiparts_by_bucket_id_transact( ) const { DBDeletedMultipartItems ret_parts; auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); // get first the list of parts to be deleted up to max_items ret_parts = storage.select( @@ -532,7 +532,7 @@ SQLiteMultipart::remove_done_or_aborted_multiparts_transact(uint max_items ) const { DBDeletedMultipartItems ret_parts; auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); // get first the list of parts to be deleted up to max_items ret_parts = storage.select( diff --git a/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc b/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc index 719d7741ccacc7..724cef63c26691 100644 --- a/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc +++ b/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc @@ -147,7 +147,7 @@ bool SQLiteVersionedObjects:: const DBVersionedObject& object, std::vector allowed_states ) const { auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); storage.update_all( set(c(&DBVersionedObject::object_id) = object.object_id, @@ -449,7 +449,7 @@ SQLiteVersionedObjects::create_new_versioned_object_transact( const std::string& version_id ) const { auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); auto objs = storage.select( columns(&DBObject::uuid), @@ -491,7 +491,7 @@ SQLiteVersionedObjects::remove_deleted_versions_transact(uint max_objects ) const { DBDeletedObjectItems ret_objs; auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); // get first the list of objects to be deleted up to max_objects // order by size so when we delete the versions data we are more efficient diff --git a/src/test/rgw/sfs/test_rgw_sfs_retry.cc b/src/test/rgw/sfs/test_rgw_sfs_retry.cc index a0d74fd8640911..0e0d8833981fba 100644 --- a/src/test/rgw/sfs/test_rgw_sfs_retry.cc +++ b/src/test/rgw/sfs/test_rgw_sfs_retry.cc @@ -28,7 +28,7 @@ class TestSFSRetrySQLite : public ::testing::Test { TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) { auto exception = std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()}; - RetrySQLite uut([&]() { + RetrySQLiteBusy uut([&]() { throw exception; return 0; }); @@ -41,7 +41,7 @@ TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) { TEST_F(TestSFSRetrySQLite, crit_throws_without_retry) { auto exception = std::system_error{ SQLITE_CORRUPT, sqlite_orm::get_sqlite_error_category()}; - RetrySQLite uut([&]() { + RetrySQLiteBusy uut([&]() { throw exception; return 0; }); @@ -52,7 +52,7 @@ TEST_F(TestSFSRetrySQLite, crit_throws_without_retry) { } TEST_F(TestSFSRetrySQLite, simple_return_succeeds_immediately) { - RetrySQLite uut([&]() { return 42; }); + RetrySQLiteBusy uut([&]() { return 42; }); EXPECT_EQ(uut.run(), 42); EXPECT_TRUE(uut.successful()); EXPECT_EQ(uut.retries(), 0); @@ -62,7 +62,7 @@ TEST_F(TestSFSRetrySQLite, retry_second_time_success) { auto exception = std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()}; bool first = true; - RetrySQLite uut([&]() { + RetrySQLiteBusy uut([&]() { if (first) { first = false; throw exception;