From 5a9e2d1c1567d7f3a0fff897e5de1d05f2ff2ec3 Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 24 Jul 2024 11:07:12 +0800 Subject: [PATCH 1/5] Trigger a save of the cluster configuration file before shutting down The cluster configuration file is the metadata "database" for the cluster. It is best to trigger a save when shutdown the server, to avoid inconsistent content that is not refreshed. Signed-off-by: Binbin --- src/cluster.h | 1 + src/server.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cluster.h b/src/cluster.h index aefb3a7b82..d88c65d87a 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -38,6 +38,7 @@ void clusterInit(void); void clusterInitLast(void); void clusterCron(void); void clusterBeforeSleep(void); +int clusterSaveConfig(int do_fsync); int verifyClusterConfigWithData(void); int clusterSendModuleMessageToTarget(const char *target, diff --git a/src/server.c b/src/server.c index af172fcd2f..3549d22d82 100644 --- a/src/server.c +++ b/src/server.c @@ -4336,6 +4336,12 @@ int finishShutdown(void) { /* Close the listening sockets. Apparently this allows faster restarts. */ closeListeningSockets(1); + if (server.cluster_enabled) { + /* The error logs have been logged in the save function if the save fails. */ + serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting."); + clusterSaveConfig(1); + } + #if !defined(__sun) /* Unlock the cluster config file before shutdown */ if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) { @@ -4343,7 +4349,6 @@ int finishShutdown(void) { } #endif /* __sun */ - serverLog(LL_WARNING, "%s is now ready to exit, bye bye...", server.sentinel_mode ? "Sentinel" : "Valkey"); return C_OK; From 7691559fec2cf5839675264cfa02d1faff13ea52 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 25 Jul 2024 17:06:21 +0800 Subject: [PATCH 2/5] adding a new clusterHandleServerShutdown Signed-off-by: Binbin --- src/cluster.h | 3 +-- src/cluster_legacy.c | 14 ++++++++++++++ src/server.c | 16 +++------------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index d88c65d87a..9961710f9d 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -38,8 +38,8 @@ void clusterInit(void); void clusterInitLast(void); void clusterCron(void); void clusterBeforeSleep(void); -int clusterSaveConfig(int do_fsync); int verifyClusterConfigWithData(void); +void clusterHandleServerShutdown(void); int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, @@ -84,7 +84,6 @@ int getNodeDefaultClientPort(clusterNode *n); clusterNode *getMyClusterNode(void); int getClusterSize(void); int getMyShardSlotCount(void); -int handleDebugClusterCommand(client *c); int clusterNodePending(clusterNode *node); int clusterNodeIsPrimary(clusterNode *n); char **getClusterNodesList(size_t *numnodes); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 73d484c61c..79fb612559 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1136,6 +1136,20 @@ void clusterInitLast(void) { } } +/* Called when a cluster node calls SHUTDOWN. */ +void clusterHandleServerShutdown(void) { + /* The error logs have been logged in the save function if the save fails. */ + serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting."); + clusterSaveConfig(1); + +#if !defined(__sun) + /* Unlock the cluster config file before shutdown, see clusterLockConfig. */ + if (server.cluster_config_file_lock_fd != -1) { + flock(server.cluster_config_file_lock_fd, LOCK_UN | LOCK_NB); + } +#endif /* __sun */ +} + /* Reset a node performing a soft or hard reset: * * 1) All other nodes are forgotten. diff --git a/src/server.c b/src/server.c index 3549d22d82..2acfab4554 100644 --- a/src/server.c +++ b/src/server.c @@ -4333,22 +4333,12 @@ int finishShutdown(void) { * send them pending writes. */ flushReplicasOutputBuffers(); + /* Handle cluster-related matters when shutdown. */ + if (server.cluster_enabled) clusterHandleServerShutdown(); + /* Close the listening sockets. Apparently this allows faster restarts. */ closeListeningSockets(1); - if (server.cluster_enabled) { - /* The error logs have been logged in the save function if the save fails. */ - serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting."); - clusterSaveConfig(1); - } - -#if !defined(__sun) - /* Unlock the cluster config file before shutdown */ - if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) { - flock(server.cluster_config_file_lock_fd, LOCK_UN | LOCK_NB); - } -#endif /* __sun */ - serverLog(LL_WARNING, "%s is now ready to exit, bye bye...", server.sentinel_mode ? "Sentinel" : "Valkey"); return C_OK; From dcac0d2d601ed1d6754daa1ce0776ee7e774b0d3 Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 26 Jul 2024 11:40:10 +0800 Subject: [PATCH 3/5] Update src/cluster_legacy.c Co-authored-by: Harkrishn Patro Signed-off-by: Binbin --- src/cluster_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 79fb612559..daa4d2f64b 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1136,7 +1136,7 @@ void clusterInitLast(void) { } } -/* Called when a cluster node calls SHUTDOWN. */ +/* Called when a cluster node receives SHUTDOWN. */ void clusterHandleServerShutdown(void) { /* The error logs have been logged in the save function if the save fails. */ serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting."); From 9849a775bb45f815ff33a45fa5230cf866896ee5 Mon Sep 17 00:00:00 2001 From: Binbin Date: Tue, 27 Aug 2024 11:55:09 +0800 Subject: [PATCH 4/5] code review: holding the lock until the very end Signed-off-by: Binbin --- src/server.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/server.c b/src/server.c index 07f799320a..c6e485a01c 100644 --- a/src/server.c +++ b/src/server.c @@ -4377,12 +4377,12 @@ int finishShutdown(void) { * send them pending writes. */ flushReplicasOutputBuffers(); - /* Handle cluster-related matters when shutdown. */ - if (server.cluster_enabled) clusterHandleServerShutdown(); - /* Close the listening sockets. Apparently this allows faster restarts. */ closeListeningSockets(1); + /* Handle cluster-related matters when shutdown. */ + if (server.cluster_enabled) clusterHandleServerShutdown(); + serverLog(LL_WARNING, "%s is now ready to exit, bye bye...", server.sentinel_mode ? "Sentinel" : "Valkey"); return C_OK; From f69f80351e8afc5bfc9cfcf6571ba9e5c1d71823 Mon Sep 17 00:00:00 2001 From: Binbin Date: Tue, 27 Aug 2024 14:41:29 +0800 Subject: [PATCH 5/5] Add comment about the unlock Signed-off-by: Binbin --- src/cluster_legacy.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index b3f0ea93e7..d234cf7711 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1198,7 +1198,15 @@ void clusterHandleServerShutdown(void) { clusterSaveConfig(1); #if !defined(__sun) - /* Unlock the cluster config file before shutdown, see clusterLockConfig. */ + /* Unlock the cluster config file before shutdown, see clusterLockConfig. + * + * This is needed if you shutdown a very large server process, it will take + * a while for the OS to release resources and unlock the cluster configuration + * file. Therefore, if we immediately try to restart the server process, it + * may not be able to acquire the lock on the cluster configuration file and + * fail to start. We explicitly releases the lock on the cluster configuration + * file on shutdown, rather than relying on the OS to release the lock, which + * is a cleaner and safer way to release acquired resources. */ if (server.cluster_config_file_lock_fd != -1) { flock(server.cluster_config_file_lock_fd, LOCK_UN | LOCK_NB); }