From cecec4ebfa406798f8d58e22a59f041dc6a9d581 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 12 Sep 2024 15:43:12 +0800 Subject: [PATCH] Trigger a save of the cluster configuration file before shutting down (#822) The cluster configuration file is the metadata "database" for the cluster. It is best to trigger a save when shutdown the server, to avoid inconsistent content that is not refreshed. We save the nodes.conf whenever something that affects the nodes.conf has changed. But we are saving nodes.conf in clusterBeforeSleep, and some events may save it without a fsync, there is a time gap. And shutdown has its own save seems good to me, it doesn't need to care about the others. At the same time, a comment is added in unlock nodes.conf to explain why we actively unlock when shutdown. Signed-off-by: Binbin Signed-off-by: Ping Xie --- src/cluster.h | 2 +- src/cluster_legacy.c | 22 ++++++++++++++++++++++ src/server.c | 9 ++------- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index aefb3a7b82..9961710f9d 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -39,6 +39,7 @@ void clusterInitLast(void); void clusterCron(void); void clusterBeforeSleep(void); int verifyClusterConfigWithData(void); +void clusterHandleServerShutdown(void); int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, @@ -83,7 +84,6 @@ int getNodeDefaultClientPort(clusterNode *n); clusterNode *getMyClusterNode(void); int getClusterSize(void); int getMyShardSlotCount(void); -int handleDebugClusterCommand(client *c); int clusterNodePending(clusterNode *node); int clusterNodeIsPrimary(clusterNode *n); char **getClusterNodesList(size_t *numnodes); diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 12cd03e21c..65d8081978 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1191,6 +1191,28 @@ void clusterInitLast(void) { } } +/* Called when a cluster node receives SHUTDOWN. */ +void clusterHandleServerShutdown(void) { + /* The error logs have been logged in the save function if the save fails. */ + serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting."); + clusterSaveConfig(1); + +#if !defined(__sun) + /* Unlock the cluster config file before shutdown, see clusterLockConfig. + * + * This is needed if you shutdown a very large server process, it will take + * a while for the OS to release resources and unlock the cluster configuration + * file. Therefore, if we immediately try to restart the server process, it + * may not be able to acquire the lock on the cluster configuration file and + * fail to start. We explicitly releases the lock on the cluster configuration + * file on shutdown, rather than relying on the OS to release the lock, which + * is a cleaner and safer way to release acquired resources. */ + if (server.cluster_config_file_lock_fd != -1) { + flock(server.cluster_config_file_lock_fd, LOCK_UN | LOCK_NB); + } +#endif /* __sun */ +} + /* Reset a node performing a soft or hard reset: * * 1) All other nodes are forgotten. diff --git a/src/server.c b/src/server.c index 253e585e44..8970c43724 100644 --- a/src/server.c +++ b/src/server.c @@ -4422,13 +4422,8 @@ int finishShutdown(void) { /* Close the listening sockets. Apparently this allows faster restarts. */ closeListeningSockets(1); -#if !defined(__sun) - /* Unlock the cluster config file before shutdown */ - if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) { - flock(server.cluster_config_file_lock_fd, LOCK_UN | LOCK_NB); - } -#endif /* __sun */ - + /* Handle cluster-related matters when shutdown. */ + if (server.cluster_enabled) clusterHandleServerShutdown(); serverLog(LL_WARNING, "%s is now ready to exit, bye bye...", server.sentinel_mode ? "Sentinel" : "Valkey"); return C_OK;