From 91cf42ddf9c05c1b92a590e8a0d0214969df6347 Mon Sep 17 00:00:00 2001 From: kronwerk Date: Tue, 10 Dec 2024 10:57:35 +0300 Subject: [PATCH 1/2] added aof-max-size parameter with tests; fixes #540 Signed-off-by: kronwerk improved aof-max-size tests Signed-off-by: kronwerk --- src/aof.c | 16 +++++++--- src/config.c | 1 + src/server.c | 15 +++++++++ src/server.h | 1 + tests/unit/aof-max-size.tcl | 61 +++++++++++++++++++++++++++++++++++++ valkey.conf | 3 ++ 6 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 tests/unit/aof-max-size.tcl diff --git a/src/aof.c b/src/aof.c index 0fd3cf5c26..8af3a9928f 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1010,16 +1010,22 @@ int startAppendOnly(void) { * the first call is short, there is a end-of-space condition, so the next * is likely to fail. However apparently in modern systems this is no longer * true, and in general it looks just more resilient to retry the write. If - * there is an actual error condition we'll get it at the next try. */ -ssize_t aofWrite(int fd, const char *buf, size_t len) { - ssize_t nwritten = 0, totwritten = 0; + * there is an actual error condition we'll get it at the next try. + * We also check for aof-max-size limit here returning "no space" on exceed. */ +ssize_t aofWrite(int fd, const char *buf, size_t len, off_t aof_current_size, unsigned long long aof_max_size) { + ssize_t nwritten = 0, totwritten = 0, nonewritten = -1; + + if (aof_max_size && (unsigned long long)aof_current_size >= aof_max_size) { + errno = ENOSPC; + return nonewritten; + } while (len) { nwritten = write(fd, buf, len); if (nwritten < 0) { if (errno == EINTR) continue; - return totwritten ? totwritten : -1; + return totwritten ? totwritten : nonewritten; } len -= nwritten; @@ -1119,7 +1125,7 @@ void flushAppendOnlyFile(int force) { } latencyStartMonitor(latency); - nwritten = aofWrite(server.aof_fd, server.aof_buf, sdslen(server.aof_buf)); + nwritten = aofWrite(server.aof_fd, server.aof_buf, sdslen(server.aof_buf), server.aof_current_size, server.aof_max_size); latencyEndMonitor(latency); /* We want to capture different events for delayed writes: * when the delay happens with a pending fsync, or with a saving child diff --git a/src/config.c b/src/config.c index cc0f8d2dd8..bcfa465e1f 100644 --- a/src/config.c +++ b/src/config.c @@ -3337,6 +3337,7 @@ standardConfig static_configs[] = { /* Unsigned Long Long configs */ createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory), createULongLongConfig("cluster-link-sendbuf-limit", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.cluster_link_msg_queue_limit_bytes, 0, MEMORY_CONFIG, NULL, NULL), + createULongLongConfig("aof-max-size", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.aof_max_size, 0, INTEGER_CONFIG, NULL, NULL), /* Size_t configs */ createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL), diff --git a/src/server.c b/src/server.c index 1e38b5ac69..518ecad603 100644 --- a/src/server.c +++ b/src/server.c @@ -5800,10 +5800,17 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) { "module_fork_last_cow_size:%zu\r\n", server.stat_module_cow_bytes)); if (server.aof_enabled) { + char aof_current_size_hdsk[64]; + char aof_max_size_hdsk[64]; + bytesToHuman(aof_current_size_hdsk, sizeof(aof_current_size_hdsk), (unsigned long long)server.aof_current_size); + bytesToHuman(aof_max_size_hdsk, sizeof(aof_max_size_hdsk), server.aof_max_size); info = sdscatprintf( info, FMTARGS( "aof_current_size:%lld\r\n", (long long)server.aof_current_size, + "aof_current_size_human:%s\r\n", aof_current_size_hdsk, + "aof_max_size:%lld\r\n", server.aof_max_size, + "aof_max_size_human:%s\r\n", aof_max_size_hdsk, "aof_base_size:%lld\r\n", (long long)server.aof_rewrite_base_size, "aof_pending_rewrite:%d\r\n", server.aof_rewrite_scheduled, "aof_buffer_length:%zu\r\n", sdslen(server.aof_buf), @@ -7130,6 +7137,14 @@ __attribute__((weak)) int main(int argc, char **argv) { server.maxmemory); } + /* Warning the user about suspicious aof-max-size setting. */ + if (server.aof_max_size > 0 && server.aof_max_size < 1024 * 1024) { + serverLog(LL_WARNING, + "WARNING: You specified a aof-max-size value that is less than 1MB (current value is %llu bytes). Are " + "you sure this is what you really want?", + server.aof_max_size); + } + serverSetCpuAffinity(server.server_cpulist); setOOMScoreAdj(-1); diff --git a/src/server.h b/src/server.h index 14a16593b0..3ba7a61b7d 100644 --- a/src/server.h +++ b/src/server.h @@ -1939,6 +1939,7 @@ struct valkeyServer { off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */ + unsigned long long aof_max_size; /* Max number of disk bytes to use for AOF */ off_t aof_last_incr_size; /* The size of the latest incr AOF. */ off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk. * Compare with the aof_last_incr_size. */ diff --git a/tests/unit/aof-max-size.tcl b/tests/unit/aof-max-size.tcl new file mode 100644 index 0000000000..4c35220a77 --- /dev/null +++ b/tests/unit/aof-max-size.tcl @@ -0,0 +1,61 @@ +start_server {tags {"aof-max-size" "external:skip"}} { + r config set auto-aof-rewrite-percentage 0 ; # disable auto-rewrite + r config set appendonly yes ; # enable AOF + + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + test "Low aof-max-size stops writing AOF with ENOSPC" { + r set k v + r config set aof-max-size 1 + + r set k2 v2 + wait_for_log_messages 0 {"*Error writing to the AOF file: No space left on device*"} 0 100 10 + } + + test "New write attempts fail and doesn't insrease AOF buffer anymore" { + set info1 [r info] + set buf1 [getInfoProperty $info1 mem_aof_buffer] + set len1 [getInfoProperty $info1 aof_buffer_length] + + catch {r set somelongerkey somelongervalue} err + assert {$err eq "MISCONF Errors writing to the AOF file: No space left on device"} + assert_equal [r get somelongerkey] "" + + set info2 [r info] + set buf2 [getInfoProperty $info2 mem_aof_buffer] + set len2 [getInfoProperty $info2 aof_buffer_length] + assert_equal $buf1 $buf2 + assert_equal $len1 $len2 + } + + test "Increasing aof-max-size fixes AOF write error" { + r config set aof-max-size 1000 + wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} 0 100 10 + + assert_equal [r set k3 v3] "OK" + assert_equal [r get k3] "v3" + } + + test "Meeting aof-max-size does not prevent AOF rewrite" { + set loglines [count_log_lines 0] ; # want to check new line, not from previous test + + # start write load + set load_handle0 [start_write_load $master_host $master_port 10] + wait_for_condition 50 100 { + [r dbsize] > 0 + } else { + fail "No write load detected." + } + + waitForBgrewriteaof r + r bgrewriteaof + wait_for_log_messages 0 {"*Background AOF rewrite finished successfully*"} $loglines 100 10 + wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} $loglines 100 10 + + # stop write load + stop_write_load $load_handle0 + wait_load_handlers_disconnected + } +} \ No newline at end of file diff --git a/valkey.conf b/valkey.conf index e23aea39de..8ea5273045 100644 --- a/valkey.conf +++ b/valkey.conf @@ -1653,6 +1653,9 @@ aof-use-rdb-preamble yes # the AOF format in a way that may not be compatible with existing AOF parsers. aof-timestamp-enabled no +# Maximum size for AOF files on disk in bytes. Ignored, if set to 0. +aof-max-size 0 + ################################ SHUTDOWN ##################################### # Maximum time to wait for replicas when shutting down, in seconds. From b57409c7b44f31ce230af4f1020aaa46f84f4909 Mon Sep 17 00:00:00 2001 From: kronwerk Date: Wed, 11 Dec 2024 17:46:46 +0300 Subject: [PATCH 2/2] tuned tests Signed-off-by: kronwerk --- tests/unit/aof-max-size.tcl | 45 ++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/tests/unit/aof-max-size.tcl b/tests/unit/aof-max-size.tcl index 4c35220a77..e5526d819f 100644 --- a/tests/unit/aof-max-size.tcl +++ b/tests/unit/aof-max-size.tcl @@ -1,25 +1,34 @@ -start_server {tags {"aof-max-size" "external:skip"}} { +proc setup {{size 1}} { + r set k v + r config set aof-max-size $size + r set k2 v2 +} + +proc cleanup {} { + r config set aof-max-size 0 + r flushall +} + +start_server {tags {"external:skip"}} { r config set auto-aof-rewrite-percentage 0 ; # disable auto-rewrite r config set appendonly yes ; # enable AOF - set master [srv 0 client] set master_host [srv 0 host] set master_port [srv 0 port] test "Low aof-max-size stops writing AOF with ENOSPC" { - r set k v - r config set aof-max-size 1 - - r set k2 v2 + setup wait_for_log_messages 0 {"*Error writing to the AOF file: No space left on device*"} 0 100 10 + cleanup } - test "New write attempts fail and doesn't insrease AOF buffer anymore" { + test "New write attempts when limited with aof-max-size fail and doesn't insrease AOF buffer anymore" { + setup set info1 [r info] set buf1 [getInfoProperty $info1 mem_aof_buffer] set len1 [getInfoProperty $info1 aof_buffer_length] - catch {r set somelongerkey somelongervalue} err + catch {r set somelongerkey somelongrvalue} err assert {$err eq "MISCONF Errors writing to the AOF file: No space left on device"} assert_equal [r get somelongerkey] "" @@ -28,34 +37,28 @@ start_server {tags {"aof-max-size" "external:skip"}} { set len2 [getInfoProperty $info2 aof_buffer_length] assert_equal $buf1 $buf2 assert_equal $len1 $len2 + cleanup } test "Increasing aof-max-size fixes AOF write error" { + setup + set loglines [count_log_lines 0] ; # want to check new line, not from previous test r config set aof-max-size 1000 - wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} 0 100 10 + wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} $loglines 100 10 assert_equal [r set k3 v3] "OK" assert_equal [r get k3] "v3" + cleanup } test "Meeting aof-max-size does not prevent AOF rewrite" { + setup 200 set loglines [count_log_lines 0] ; # want to check new line, not from previous test - - # start write load - set load_handle0 [start_write_load $master_host $master_port 10] - wait_for_condition 50 100 { - [r dbsize] > 0 - } else { - fail "No write load detected." - } waitForBgrewriteaof r r bgrewriteaof wait_for_log_messages 0 {"*Background AOF rewrite finished successfully*"} $loglines 100 10 wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} $loglines 100 10 - - # stop write load - stop_write_load $load_handle0 - wait_load_handlers_disconnected + cleanup } } \ No newline at end of file