diff --git a/src/rgw/driver/sfs/sqlite/retry.h b/src/rgw/driver/sfs/sqlite/retry.h index 126d3bf5e41d0..0bd6996bb9df4 100644 --- a/src/rgw/driver/sfs/sqlite/retry.h +++ b/src/rgw/driver/sfs/sqlite/retry.h @@ -30,7 +30,7 @@ namespace rgw::sal::sfs::sqlite { // after retrying. Catches all non-critical exceptions and makes them // available via failed_error(). Critical exceptions are passed on. template -class RetrySQLite { +class RetrySQLiteBusy { public: using Func = std::function; @@ -42,11 +42,11 @@ class RetrySQLite { std::error_code m_failed_error{}; public: - RetrySQLite(Func&& fn) : m_fn(std::forward(fn)) {} - RetrySQLite(RetrySQLite&&) = delete; - RetrySQLite(const RetrySQLite&) = delete; - RetrySQLite& operator=(const RetrySQLite&) = delete; - RetrySQLite& operator=(RetrySQLite&&) = delete; + RetrySQLiteBusy(Func&& fn) : m_fn(std::forward(fn)) {} + RetrySQLiteBusy(RetrySQLiteBusy&&) = delete; + RetrySQLiteBusy(const RetrySQLiteBusy&) = delete; + RetrySQLiteBusy& operator=(const RetrySQLiteBusy&) = delete; + RetrySQLiteBusy& operator=(RetrySQLiteBusy&&) = delete; /// run runs fn with up to m_max_retries retries. It may throw /// critical-exceptions. Non-critical errors are made available via @@ -66,7 +66,13 @@ class RetrySQLite { } catch (const std::system_error& ex) { m_failed_error = ex.code(); if (critical_error(ex.code().value())) { - // Rethrow, expect a higher layer to shut us down + ceph_abort_msgf( + "Critical SQLite error %d. Shutting down.", ex.code().value() + ); + } + if (!busy_error(ex.code().value())) { + // Rethrow, expect a higher layer to handle (e.g constraint + // violations), reply internal server error or shut us down throw ex; } std::this_thread::sleep_for(10ms * retry); diff --git a/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc b/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc index 42877f50b288c..a3a509e2a64d7 100644 --- a/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc +++ b/src/rgw/driver/sfs/sqlite/sqlite_buckets.cc @@ -139,7 +139,7 @@ std::optional SQLiteBuckets::delete_bucket_transact( const std::string& bucket_id, uint max_objects, bool& bucket_deleted ) const { auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { bucket_deleted = false; DBDeletedObjectItems ret_values; storage.begin_transaction(); diff --git a/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc b/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc index c0341b45e3755..1adb3bdda84ef 100644 --- a/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc +++ b/src/rgw/driver/sfs/sqlite/sqlite_multipart.cc @@ -222,7 +222,7 @@ std::optional SQLiteMultipart::create_or_reset_part( ) const { auto storage = conn->get_storage(); - RetrySQLite> retry([&]() { + RetrySQLiteBusy> retry([&]() { auto transaction = storage.transaction_guard(); std::optional entry = std::nullopt; auto cnt = storage.count(where( @@ -466,7 +466,7 @@ SQLiteMultipart::remove_multiparts_by_bucket_id_transact( ) const { DBDeletedMultipartItems ret_parts; auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); // get first the list of parts to be deleted up to max_items ret_parts = storage.select( @@ -532,7 +532,7 @@ SQLiteMultipart::remove_done_or_aborted_multiparts_transact(uint max_items ) const { DBDeletedMultipartItems ret_parts; auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); // get first the list of parts to be deleted up to max_items ret_parts = storage.select( diff --git a/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc b/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc index 719d7741ccacc..724cef63c2669 100644 --- a/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc +++ b/src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc @@ -147,7 +147,7 @@ bool SQLiteVersionedObjects:: const DBVersionedObject& object, std::vector allowed_states ) const { auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); storage.update_all( set(c(&DBVersionedObject::object_id) = object.object_id, @@ -449,7 +449,7 @@ SQLiteVersionedObjects::create_new_versioned_object_transact( const std::string& version_id ) const { auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); auto objs = storage.select( columns(&DBObject::uuid), @@ -491,7 +491,7 @@ SQLiteVersionedObjects::remove_deleted_versions_transact(uint max_objects ) const { DBDeletedObjectItems ret_objs; auto storage = conn->get_storage(); - RetrySQLite retry([&]() { + RetrySQLiteBusy retry([&]() { auto transaction = storage.transaction_guard(); // get first the list of objects to be deleted up to max_objects // order by size so when we delete the versions data we are more efficient diff --git a/src/test/rgw/sfs/test_rgw_sfs_retry.cc b/src/test/rgw/sfs/test_rgw_sfs_retry.cc index a0d74fd864091..05a9ef7239ec4 100644 --- a/src/test/rgw/sfs/test_rgw_sfs_retry.cc +++ b/src/test/rgw/sfs/test_rgw_sfs_retry.cc @@ -7,6 +7,7 @@ #include "common/ceph_context.h" #include "driver/sfs/sqlite/retry.h" #include "driver/sfs/sqlite/sqlite_orm.h" +#include "gtest/gtest.h" #include "rgw/rgw_sal_sfs.h" #include "rgw_common.h" @@ -25,10 +26,12 @@ class TestSFSRetrySQLite : public ::testing::Test { void TearDown() override {} }; +class TestSFSRetrySQLiteDeathTest : public TestSFSRetrySQLite {}; + TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) { auto exception = std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()}; - RetrySQLite uut([&]() { + RetrySQLiteBusy uut([&]() { throw exception; return 0; }); @@ -38,21 +41,19 @@ TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) { EXPECT_GT(uut.retries(), 0); } -TEST_F(TestSFSRetrySQLite, crit_throws_without_retry) { +TEST_F(TestSFSRetrySQLiteDeathTest, crit_aborts) { + GTEST_FLAG_SET(death_test_style, "threadsafe"); auto exception = std::system_error{ SQLITE_CORRUPT, sqlite_orm::get_sqlite_error_category()}; - RetrySQLite uut([&]() { + RetrySQLiteBusy uut([&]() { throw exception; return 0; }); - EXPECT_THROW(uut.run(), decltype(exception)); - EXPECT_FALSE(uut.successful()); - EXPECT_EQ(uut.failed_error(), exception.code()); - EXPECT_EQ(uut.retries(), 0); + ASSERT_DEATH(uut.run(), "Critical SQLite error"); } TEST_F(TestSFSRetrySQLite, simple_return_succeeds_immediately) { - RetrySQLite uut([&]() { return 42; }); + RetrySQLiteBusy uut([&]() { return 42; }); EXPECT_EQ(uut.run(), 42); EXPECT_TRUE(uut.successful()); EXPECT_EQ(uut.retries(), 0); @@ -62,7 +63,7 @@ TEST_F(TestSFSRetrySQLite, retry_second_time_success) { auto exception = std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()}; bool first = true; - RetrySQLite uut([&]() { + RetrySQLiteBusy uut([&]() { if (first) { first = false; throw exception;