Skip to content

Commit

Permalink
added aof-max-size parameter with tests; fixes valkey-io#540
Browse files Browse the repository at this point in the history
Signed-off-by: kronwerk <[email protected]>

improved aof-max-size tests

Signed-off-by: kronwerk <[email protected]>
  • Loading branch information
kronwerk committed Dec 11, 2024
1 parent 1acf7f7 commit 91cf42d
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 5 deletions.
16 changes: 11 additions & 5 deletions src/aof.c
Original file line number Diff line number Diff line change
Expand Up @@ -1010,16 +1010,22 @@ int startAppendOnly(void) {
* the first call is short, there is a end-of-space condition, so the next
* is likely to fail. However apparently in modern systems this is no longer
* true, and in general it looks just more resilient to retry the write. If
* there is an actual error condition we'll get it at the next try. */
ssize_t aofWrite(int fd, const char *buf, size_t len) {
ssize_t nwritten = 0, totwritten = 0;
* there is an actual error condition we'll get it at the next try.
* We also check for aof-max-size limit here returning "no space" on exceed. */
ssize_t aofWrite(int fd, const char *buf, size_t len, off_t aof_current_size, unsigned long long aof_max_size) {
ssize_t nwritten = 0, totwritten = 0, nonewritten = -1;

if (aof_max_size && (unsigned long long)aof_current_size >= aof_max_size) {
errno = ENOSPC;
return nonewritten;
}

while (len) {
nwritten = write(fd, buf, len);

if (nwritten < 0) {
if (errno == EINTR) continue;
return totwritten ? totwritten : -1;
return totwritten ? totwritten : nonewritten;
}

len -= nwritten;
Expand Down Expand Up @@ -1119,7 +1125,7 @@ void flushAppendOnlyFile(int force) {
}

latencyStartMonitor(latency);
nwritten = aofWrite(server.aof_fd, server.aof_buf, sdslen(server.aof_buf));
nwritten = aofWrite(server.aof_fd, server.aof_buf, sdslen(server.aof_buf), server.aof_current_size, server.aof_max_size);
latencyEndMonitor(latency);
/* We want to capture different events for delayed writes:
* when the delay happens with a pending fsync, or with a saving child
Expand Down
1 change: 1 addition & 0 deletions src/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -3337,6 +3337,7 @@ standardConfig static_configs[] = {
/* Unsigned Long Long configs */
createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory),
createULongLongConfig("cluster-link-sendbuf-limit", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.cluster_link_msg_queue_limit_bytes, 0, MEMORY_CONFIG, NULL, NULL),
createULongLongConfig("aof-max-size", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.aof_max_size, 0, INTEGER_CONFIG, NULL, NULL),

/* Size_t configs */
createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL),
Expand Down
15 changes: 15 additions & 0 deletions src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -5800,10 +5800,17 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
"module_fork_last_cow_size:%zu\r\n", server.stat_module_cow_bytes));

if (server.aof_enabled) {
char aof_current_size_hdsk[64];
char aof_max_size_hdsk[64];
bytesToHuman(aof_current_size_hdsk, sizeof(aof_current_size_hdsk), (unsigned long long)server.aof_current_size);
bytesToHuman(aof_max_size_hdsk, sizeof(aof_max_size_hdsk), server.aof_max_size);
info = sdscatprintf(
info,
FMTARGS(
"aof_current_size:%lld\r\n", (long long)server.aof_current_size,
"aof_current_size_human:%s\r\n", aof_current_size_hdsk,
"aof_max_size:%lld\r\n", server.aof_max_size,
"aof_max_size_human:%s\r\n", aof_max_size_hdsk,
"aof_base_size:%lld\r\n", (long long)server.aof_rewrite_base_size,
"aof_pending_rewrite:%d\r\n", server.aof_rewrite_scheduled,
"aof_buffer_length:%zu\r\n", sdslen(server.aof_buf),
Expand Down Expand Up @@ -7130,6 +7137,14 @@ __attribute__((weak)) int main(int argc, char **argv) {
server.maxmemory);
}

/* Warning the user about suspicious aof-max-size setting. */
if (server.aof_max_size > 0 && server.aof_max_size < 1024 * 1024) {
serverLog(LL_WARNING,
"WARNING: You specified a aof-max-size value that is less than 1MB (current value is %llu bytes). Are "
"you sure this is what you really want?",
server.aof_max_size);
}

serverSetCpuAffinity(server.server_cpulist);
setOOMScoreAdj(-1);

Expand Down
1 change: 1 addition & 0 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -1939,6 +1939,7 @@ struct valkeyServer {
off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */
unsigned long long aof_max_size; /* Max number of disk bytes to use for AOF */
off_t aof_last_incr_size; /* The size of the latest incr AOF. */
off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk.
* Compare with the aof_last_incr_size. */
Expand Down
61 changes: 61 additions & 0 deletions tests/unit/aof-max-size.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
start_server {tags {"aof-max-size" "external:skip"}} {
r config set auto-aof-rewrite-percentage 0 ; # disable auto-rewrite
r config set appendonly yes ; # enable AOF

set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]

test "Low aof-max-size stops writing AOF with ENOSPC" {
r set k v
r config set aof-max-size 1

r set k2 v2
wait_for_log_messages 0 {"*Error writing to the AOF file: No space left on device*"} 0 100 10
}

test "New write attempts fail and doesn't insrease AOF buffer anymore" {
set info1 [r info]
set buf1 [getInfoProperty $info1 mem_aof_buffer]
set len1 [getInfoProperty $info1 aof_buffer_length]

catch {r set somelongerkey somelongervalue} err
assert {$err eq "MISCONF Errors writing to the AOF file: No space left on device"}
assert_equal [r get somelongerkey] ""

set info2 [r info]
set buf2 [getInfoProperty $info2 mem_aof_buffer]
set len2 [getInfoProperty $info2 aof_buffer_length]
assert_equal $buf1 $buf2
assert_equal $len1 $len2
}

test "Increasing aof-max-size fixes AOF write error" {
r config set aof-max-size 1000
wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} 0 100 10

assert_equal [r set k3 v3] "OK"
assert_equal [r get k3] "v3"
}

test "Meeting aof-max-size does not prevent AOF rewrite" {
set loglines [count_log_lines 0] ; # want to check new line, not from previous test

# start write load
set load_handle0 [start_write_load $master_host $master_port 10]
wait_for_condition 50 100 {
[r dbsize] > 0
} else {
fail "No write load detected."
}

waitForBgrewriteaof r
r bgrewriteaof
wait_for_log_messages 0 {"*Background AOF rewrite finished successfully*"} $loglines 100 10
wait_for_log_messages 0 {"*AOF write error looks solved. The server can write again.*"} $loglines 100 10

# stop write load
stop_write_load $load_handle0
wait_load_handlers_disconnected
}
}
3 changes: 3 additions & 0 deletions valkey.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1653,6 +1653,9 @@ aof-use-rdb-preamble yes
# the AOF format in a way that may not be compatible with existing AOF parsers.
aof-timestamp-enabled no

# Maximum size for AOF files on disk in bytes. Ignored, if set to 0.
aof-max-size 0

################################ SHUTDOWN #####################################

# Maximum time to wait for replicas when shutting down, in seconds.
Expand Down

0 comments on commit 91cf42d

Please sign in to comment.