From 41e547620730028a3927a6d151b8251a13eb02e7 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 12 Sep 2024 15:43:12 +0800 Subject: [PATCH] Trigger a save of the cluster configuration file before shutting down (#822) The cluster configuration file is the metadata "database" for the cluster. It is best to trigger a save when shutdown the server, to avoid inconsistent content that is not refreshed. We save the nodes.conf whenever something that affects the nodes.conf has changed. But we are saving nodes.conf in clusterBeforeSleep, and some events may save it without a fsync, there is a time gap. And shutdown has its own save seems good to me, it doesn't need to care about the others. At the same time, a comment is added in unlock nodes.conf to explain why we actively unlock when shutdown. Signed-off-by: Binbin Signed-off-by: Ping Xie --- src/cluster.h | 2 +- src/cluster_legacy.c | 22 ++++++++++++++++++++++ src/server.c | 9 ++------- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index aefb3a7b82..9961710f9d 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -39,6 +39,7 @@ void clusterInitLast(void); void clusterCron(void); void clusterBeforeSleep(void); int verifyClusterConfigWithData(void); +void clusterHandleServerShutdown(void); int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, @@ -83,7 +84,6 @@ int getNodeDefaultClientPort(clusterNode *n); clusterNode *getMyClusterNode(void); int getClusterSize(void); int getMyShardSlotCount(void); -int handleDebugClusterCommand(client *c); int clusterNodePending(clusterNode *node); int clusterNodeIsPrimary(clusterNode *n); char **getClusterNodesList(size_t *numnodes); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 21bdd09919..f33c5f8068 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1191,6 +1191,28 @@ void clusterInitLast(void) { } } +/* Called when a cluster node receives SHUTDOWN. */ +void clusterHandleServerShutdown(void) { + /* The error logs have been logged in the save function if the save fails. */ + serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting."); + clusterSaveConfig(1); + +#if !defined(__sun) + /* Unlock the cluster config file before shutdown, see clusterLockConfig. + * + * This is needed if you shutdown a very large server process, it will take + * a while for the OS to release resources and unlock the cluster configuration + * file. Therefore, if we immediately try to restart the server process, it + * may not be able to acquire the lock on the cluster configuration file and + * fail to start. We explicitly releases the lock on the cluster configuration + * file on shutdown, rather than relying on the OS to release the lock, which + * is a cleaner and safer way to release acquired resources. */ + if (server.cluster_config_file_lock_fd != -1) { + flock(server.cluster_config_file_lock_fd, LOCK_UN | LOCK_NB); + } +#endif /* __sun */ +} + /* Reset a node performing a soft or hard reset: * * 1) All other nodes are forgotten. diff --git a/src/server.c b/src/server.c index 0a148ae334..56c096faed 100644 --- a/src/server.c +++ b/src/server.c @@ -4411,13 +4411,8 @@ int finishShutdown(void) { /* Close the listening sockets. Apparently this allows faster restarts. */ closeListeningSockets(1); -#if !defined(__sun) - /* Unlock the cluster config file before shutdown */ - if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) { - flock(server.cluster_config_file_lock_fd, LOCK_UN | LOCK_NB); - } -#endif /* __sun */ - + /* Handle cluster-related matters when shutdown. */ + if (server.cluster_enabled) clusterHandleServerShutdown(); serverLog(LL_WARNING, "%s is now ready to exit, bye bye...", server.sentinel_mode ? "Sentinel" : "Valkey"); return C_OK;