diff --git a/src/aof.c b/src/aof.c index 0fd3cf5c26..8af3a9928f 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1010,16 +1010,22 @@ int startAppendOnly(void) { * the first call is short, there is a end-of-space condition, so the next * is likely to fail. However apparently in modern systems this is no longer * true, and in general it looks just more resilient to retry the write. If - * there is an actual error condition we'll get it at the next try. */ -ssize_t aofWrite(int fd, const char *buf, size_t len) { - ssize_t nwritten = 0, totwritten = 0; + * there is an actual error condition we'll get it at the next try. + * We also check for aof-max-size limit here returning "no space" on exceed. */ +ssize_t aofWrite(int fd, const char *buf, size_t len, off_t aof_current_size, unsigned long long aof_max_size) { + ssize_t nwritten = 0, totwritten = 0, nonewritten = -1; + + if (aof_max_size && (unsigned long long)aof_current_size >= aof_max_size) { + errno = ENOSPC; + return nonewritten; + } while (len) { nwritten = write(fd, buf, len); if (nwritten < 0) { if (errno == EINTR) continue; - return totwritten ? totwritten : -1; + return totwritten ? totwritten : nonewritten; } len -= nwritten; @@ -1119,7 +1125,7 @@ void flushAppendOnlyFile(int force) { } latencyStartMonitor(latency); - nwritten = aofWrite(server.aof_fd, server.aof_buf, sdslen(server.aof_buf)); + nwritten = aofWrite(server.aof_fd, server.aof_buf, sdslen(server.aof_buf), server.aof_current_size, server.aof_max_size); latencyEndMonitor(latency); /* We want to capture different events for delayed writes: * when the delay happens with a pending fsync, or with a saving child diff --git a/src/config.c b/src/config.c index cc0f8d2dd8..bcfa465e1f 100644 --- a/src/config.c +++ b/src/config.c @@ -3337,6 +3337,7 @@ standardConfig static_configs[] = { /* Unsigned Long Long configs */ createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory), createULongLongConfig("cluster-link-sendbuf-limit", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.cluster_link_msg_queue_limit_bytes, 0, MEMORY_CONFIG, NULL, NULL), + createULongLongConfig("aof-max-size", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.aof_max_size, 0, INTEGER_CONFIG, NULL, NULL), /* Size_t configs */ createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL), diff --git a/src/server.c b/src/server.c index 1e38b5ac69..518ecad603 100644 --- a/src/server.c +++ b/src/server.c @@ -5800,10 +5800,17 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) { "module_fork_last_cow_size:%zu\r\n", server.stat_module_cow_bytes)); if (server.aof_enabled) { + char aof_current_size_hdsk[64]; + char aof_max_size_hdsk[64]; + bytesToHuman(aof_current_size_hdsk, sizeof(aof_current_size_hdsk), (unsigned long long)server.aof_current_size); + bytesToHuman(aof_max_size_hdsk, sizeof(aof_max_size_hdsk), server.aof_max_size); info = sdscatprintf( info, FMTARGS( "aof_current_size:%lld\r\n", (long long)server.aof_current_size, + "aof_current_size_human:%s\r\n", aof_current_size_hdsk, + "aof_max_size:%lld\r\n", server.aof_max_size, + "aof_max_size_human:%s\r\n", aof_max_size_hdsk, "aof_base_size:%lld\r\n", (long long)server.aof_rewrite_base_size, "aof_pending_rewrite:%d\r\n", server.aof_rewrite_scheduled, "aof_buffer_length:%zu\r\n", sdslen(server.aof_buf), @@ -7130,6 +7137,14 @@ __attribute__((weak)) int main(int argc, char **argv) { server.maxmemory); } + /* Warning the user about suspicious aof-max-size setting. */ + if (server.aof_max_size > 0 && server.aof_max_size < 1024 * 1024) { + serverLog(LL_WARNING, + "WARNING: You specified a aof-max-size value that is less than 1MB (current value is %llu bytes). Are " + "you sure this is what you really want?", + server.aof_max_size); + } + serverSetCpuAffinity(server.server_cpulist); setOOMScoreAdj(-1); diff --git a/src/server.h b/src/server.h index 14a16593b0..3ba7a61b7d 100644 --- a/src/server.h +++ b/src/server.h @@ -1939,6 +1939,7 @@ struct valkeyServer { off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */ + unsigned long long aof_max_size; /* Max number of disk bytes to use for AOF */ off_t aof_last_incr_size; /* The size of the latest incr AOF. */ off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk. * Compare with the aof_last_incr_size. */ diff --git a/tests/unit/aof-max-size.tcl b/tests/unit/aof-max-size.tcl new file mode 100644 index 0000000000..e5526d819f --- /dev/null +++ b/tests/unit/aof-max-size.tcl @@ -0,0 +1,64 @@ +proc setup {{size 1}} { + r set k v + r config set aof-max-size $size + r set k2 v2 +} + +proc cleanup {} { + r config set aof-max-size 0 + r flushall +} + +start_server {tags {"external:skip"}} { + r config set auto-aof-rewrite-percentage 0 ; # disable auto-rewrite + r config set appendonly yes ; # enable AOF + + set master_host [srv 0 host] + set master_port [srv 0 port] + + test "Low aof-max-size stops writing AOF with ENOSPC" { + setup + wait_for_log_messages 0 {"*Error writing to the AOF file: No space left on device*"} 0 100 10 + cleanup + } + + test "New write attempts when limited with aof-max-size fail and doesn't insrease AOF buffer anymore" { + setup + set info1 [r info] + set buf1 [getInfoProperty $info1 mem_aof_buffer] + set len1 [getInfoProperty $info1 aof_buffer_length] + + catch {r set somelongerkey somelongrvalue} err + assert {$err eq "MISCONF Errors writing to the AOF file: No space left on device"} + assert_equal [r get somelongerkey] "" + + set info2 [r info] + set buf2 [getInfoProperty $info2 mem_aof_buffer] + set len2 [getInfoProperty $info2 aof_buffer_length] + assert_equal $buf1 $buf2 + assert_equal $len1 $len2 + cleanup + } + + test "Increasing aof-max-size fixes AOF write error" { + setup + set loglines [count_log_lines 0] ; # want to check new line, not from previous test + r config set aof-max-size 1000 + wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} $loglines 100 10 + + assert_equal [r set k3 v3] "OK" + assert_equal [r get k3] "v3" + cleanup + } + + test "Meeting aof-max-size does not prevent AOF rewrite" { + setup 200 + set loglines [count_log_lines 0] ; # want to check new line, not from previous test + + waitForBgrewriteaof r + r bgrewriteaof + wait_for_log_messages 0 {"*Background AOF rewrite finished successfully*"} $loglines 100 10 + wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} $loglines 100 10 + cleanup + } +} \ No newline at end of file diff --git a/valkey.conf b/valkey.conf index e23aea39de..8ea5273045 100644 --- a/valkey.conf +++ b/valkey.conf @@ -1653,6 +1653,9 @@ aof-use-rdb-preamble yes # the AOF format in a way that may not be compatible with existing AOF parsers. aof-timestamp-enabled no +# Maximum size for AOF files on disk in bytes. Ignored, if set to 0. +aof-max-size 0 + ################################ SHUTDOWN ##################################### # Maximum time to wait for replicas when shutting down, in seconds.