Skip to content

Commit

Permalink
Cleanup aof temp files in sigShutdownHandler before exiting
Browse files Browse the repository at this point in the history
I think this code previously only meant to handle save in shutdown,
which means we may a do foreground save in shutdown. It also implicitly
handles the backgroud save, for example the child process will also
get the chance to call getpid() and clean up the temp RDB file.

However, we did not handle AOFRW, so we also leave temp files in here.
Noted that we can not use the same aofRemoveTempFile(getpid()) trick
for AOF. Since we rename the temp file (temp-rewriteaof-bg-pid.aof) to
the base aof file only in backgroundRewriteDoneHandler, and in this time,
there is no child process, so the cleanup job must be done by the parent
child.

The following sequences have been tested (not just signal handlers):
1. Long foreground save + shutdown
2. Long backgroud save + shutdown
3. Long backgroud save + long lua script + shutdown
4. Long bgrewriteaof + shutdown
5. Long bgrewriteaof + long lua script + shutdown
6. Short bgrewriteaof + long done handler + shutdown

All temporary files will be removed in the local tests.

Signed-off-by: Binbin <[email protected]>
  • Loading branch information
enjoy-binbin committed Dec 24, 2024
1 parent d00c856 commit ab77b25
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 12 deletions.
41 changes: 32 additions & 9 deletions src/aof.c
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ void killAppendOnlyChild(void) {
if (kill(server.child_pid, SIGUSR1) != -1) {
while (waitpid(-1, &statloc, 0) != server.child_pid);
}
aofRemoveTempFile(server.child_pid);
aofRemoveTempFile(server.child_pid, 0);
resetChildState();
server.aof_rewrite_time_start = -1;
}
Expand Down Expand Up @@ -2470,14 +2470,37 @@ void bgrewriteaofCommand(client *c) {
}
}

void aofRemoveTempFile(pid_t childpid) {
/* Note that we may call this function in signal handle 'sigShutdownHandler',
* so we need guarantee all functions we call are async-signal-safe.
* If we call this function from signal handle, we won't call bg_unlink that
* is not async-signal-safe. */
void aofRemoveTempFile(pid_t childpid, int from_signal) {
char tmpfile[256];

snprintf(tmpfile, 256, "temp-rewriteaof-bg-%d.aof", (int)childpid);
bg_unlink(tmpfile);

snprintf(tmpfile, 256, "temp-rewriteaof-%d.aof", (int)childpid);
bg_unlink(tmpfile);
char tmpfile2[256];
char pid[32];

/* Generate temp aof file name using aync-signal safe functions. */
ll2string(pid, sizeof(pid), childpid);
valkey_strlcpy(tmpfile, "temp-rewriteaof-bg-", sizeof(tmpfile));
valkey_strlcat(tmpfile, pid, sizeof(tmpfile));
valkey_strlcat(tmpfile, ".aof", sizeof(tmpfile));
valkey_strlcpy(tmpfile2, "temp-rewriteaof-", sizeof(tmpfile2));
valkey_strlcat(tmpfile2, pid, sizeof(tmpfile2));
valkey_strlcat(tmpfile2, ".aof", sizeof(tmpfile2));

if (from_signal) {
/* bg_unlink is not async-signal-safe, but in this case we don't really
* need to close the fd, it'll be released when the process exists. */
int fd = open(tmpfile, O_RDONLY | O_NONBLOCK);
int fd2 = open(tmpfile2, O_RDONLY | O_NONBLOCK);
UNUSED(fd);
UNUSED(fd2);
unlink(tmpfile);
unlink(tmpfile2);
} else {
bg_unlink(tmpfile);
bg_unlink(tmpfile2);
}
}

/* Get size of an AOF file.
Expand Down Expand Up @@ -2675,7 +2698,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
}

cleanup:
aofRemoveTempFile(server.child_pid);
aofRemoveTempFile(server.child_pid, 0);
/* Clear AOF buffer and delete temp incr aof for next rewrite. */
if (server.aof_state == AOF_WAIT_REWRITE) {
sdsfree(server.aof_buf);
Expand Down
10 changes: 9 additions & 1 deletion src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -6387,7 +6387,15 @@ static void sigShutdownHandler(int sig) {
* on disk and without waiting for lagging replicas. */
if (server.shutdown_asap && sig == SIGINT) {
serverLogRawFromHandler(LL_WARNING, "You insist... exiting now.");
rdbRemoveTempFile(getpid(), 1);
/* Make sure the process cleans up the temp files before exiting.
* We let the parent process handle this. For RDB, we have foreground save
* and background save, so we need to handle both pid and child_pid. For AOF,
* we only have background aof rewrite, so we only need to handle child_pid. */
if (!server.in_fork_child) {
rdbRemoveTempFile(server.pid, 1);
rdbRemoveTempFile(server.child_pid, 1);
aofRemoveTempFile(server.child_pid, 1);
}
exit(1); /* Exit with an error since this was not a clean shutdown. */
} else if (server.loading) {
msg = "Received shutdown signal during loading, scheduling shutdown.";
Expand Down
2 changes: 1 addition & 1 deletion src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -3148,7 +3148,7 @@ int bg_unlink(const char *filename);
/* AOF persistence */
void flushAppendOnlyFile(int force);
void feedAppendOnlyFile(int dictid, robj **argv, int argc);
void aofRemoveTempFile(pid_t childpid);
void aofRemoveTempFile(pid_t childpid, int from_signal);
int rewriteAppendOnlyFileBackground(void);
int loadAppendOnlyFiles(aofManifest *am);
void stopAppendOnly(void);
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/shutdown.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ start_server {tags {"shutdown external:skip"} overrides {save {900 1}}} {
wait_for_condition 50 10 {
![file exists $temp_rdb]
} else {
fail "Can't trigger rdb save on shutdown"
fail "Can't delete rdb temp file on shutdown"
}
}
}
Expand Down

0 comments on commit ab77b25

Please sign in to comment.