From 8c78afc12b565e4cb5fa4edb9a209ffce6ea8b51 Mon Sep 17 00:00:00 2001 From: InterLinked1 <24227567+InterLinked1@users.noreply.github.com> Date: Fri, 27 Dec 2024 14:25:33 -0500 Subject: [PATCH] logger.c: Improve robustness of remote console logging. * Eliminate the fixed-size array for keeping track of whether logging is enabled for remote consoles and store this in a bit directly on the struct that already exists for each console. It made sense to do this, given the struct already exist, and this eliminates the 1024 fd constraint. * Add mutex for remote consoles so they can be locked when writing log messages, for atomicity. This also reduces (eliminates?) resource contention; previously, if a lot of logging was in progress, certain writes would fail due to the console's fd being busy (with nonblocking writes). This virtually doesn't happen anymore. * If remote console writes fail completely or partially, retry them (to an extent), since waiting a fraction of a ms and retrying can make it work. In practice, because each console now has a mutex, these cases shouldn't really happen anymore either (I tested both with and without mutexes to compare). * Make localdebug command work for the foreground console. Previously, it only worked for remote consoles. * mod_sysop: Disable logging immediately when a remote console disconnects, to avoid log failure messages on foreground console. --- bbs/logger.c | 133 +++++++++++++++++++++++++++++++++++--------- bbs/socket.c | 3 +- modules/mod_sysop.c | 1 + 3 files changed, 110 insertions(+), 27 deletions(-) diff --git a/bbs/logger.c b/bbs/logger.c index d0e2b0a..28a3fe7 100644 --- a/bbs/logger.c +++ b/bbs/logger.c @@ -37,6 +37,9 @@ #include "include/linkedlists.h" #include "include/cli.h" +/* Log messages to STDERR when we fail to write log messages to remote consoles */ +#define DEBUG_LOGGING_FAILURES + /* bbs.c */ extern int option_debug; extern int option_verbose; @@ -173,25 +176,31 @@ int bbs_set_stdout_logging(int enabled) struct remote_log_fd { int fd; /* File descriptor for this console */ int maxdebug; /* Max debug level for this console */ + unsigned int skipped:1; + unsigned int enabled:1; + bbs_mutex_t lock; RWLIST_ENTRY(remote_log_fd) entry; /* Next entry */ }; static RWLIST_HEAD_STATIC(remote_log_fds, remote_log_fd); -/*! \note Assumes all remote consoles are going to use a fd within the first 1024 */ -static int fd_logging[1024]; /* Array for constant time access instead of a linked list. Even though we traverse the list for writing, to set logging on/off, we don't need to. */ +/* Local debug setting for foreground console, since it doesn't have a struct remote_log_fd */ +static int fg_maxdebug = MAX_DEBUG; + static int active_remote_consoles = 0; int bbs_set_fd_logging(int fd, int enabled) { - if (!ARRAY_IN_BOUNDS(fd, fd_logging)) { - bbs_error("Cannot set logging for fd %d: out of bounds\n", fd); - return -1; - } + struct remote_log_fd *rfd; RWLIST_RDLOCK(&remote_log_fds); /* We're not modifying the list itself. */ - fd_logging[fd] = enabled; + RWLIST_TRAVERSE(&remote_log_fds, rfd, entry) { + if (rfd->fd == fd) { + SET_BITFIELD(rfd->enabled, enabled); + break; + } + } RWLIST_UNLOCK(&remote_log_fds); - return 0; + return rfd ? 0 : -1; } static int set_fd_logging_max_debug(int fd, int maxdebug) @@ -218,9 +227,15 @@ static int cli_localdebug(struct bbs_cli_args *a) if (newmax < 0 || newmax > MAX_DEBUG) { return -1; } - oldmax = set_fd_logging_max_debug(a->fdout, newmax); - if (oldmax == -1) { - return -1; + + if (a->fdout == STDOUT_FILENO) { + oldmax = fg_maxdebug; + fg_maxdebug = newmax; + } else { + oldmax = set_fd_logging_max_debug(a->fdout, newmax); + if (oldmax == -1) { + return -1; + } } bbs_debug(1, "Max local debug level for fd %d changed from %d to %d\n", a->fdout, oldmax, newmax); return 0; @@ -230,11 +245,6 @@ int bbs_add_logging_fd(int fd) { struct remote_log_fd *rfd; - if (fd >= (int) ARRAY_LEN(fd_logging)) { - bbs_error("Cannot register file descriptors greater than %lu\n", ARRAY_LEN(fd_logging)); - return -1; - } - RWLIST_WRLOCK(&remote_log_fds); RWLIST_TRAVERSE(&remote_log_fds, rfd, entry) { if (rfd->fd == fd) { @@ -252,9 +262,10 @@ int bbs_add_logging_fd(int fd) return -1; } rfd->fd = fd; + rfd->enabled = 1; /* Initialize to enabled */ rfd->maxdebug = MAX_DEBUG; /* By default, there is no local restriction on the max debug level */ + bbs_mutex_init(&rfd->lock, NULL); RWLIST_INSERT_HEAD(&remote_log_fds, rfd, entry); - fd_logging[fd] = 1; /* Initialize to enabled */ active_remote_consoles++; RWLIST_UNLOCK(&remote_log_fds); bbs_debug(5, "Registered file descriptor %d for logging\n", fd); @@ -272,6 +283,7 @@ int bbs_remove_logging_fd(int fd) if (unlikely(!rfd)) { bbs_error("File descriptor %d did not have logging\n", fd); } else { + bbs_mutex_destroy(&rfd->lock); free(rfd); bbs_debug(5, "Unregistered file descriptor %d from logging\n", fd); } @@ -379,6 +391,8 @@ void __attribute__ ((format (gnu_printf, 2, 3))) bbs_dprintf(int fd, const char /* from bbs.c */ extern int shutdown_finished; +static unsigned short int logid_counter; /* Can roll over, only used for local diagnostics */ + void __attribute__ ((format (gnu_printf, 6, 7))) __bbs_log(enum bbs_log_level loglevel, int level, const char *file, int lineno, const char *func, const char *fmt, ...) { int len; @@ -397,6 +411,7 @@ void __attribute__ ((format (gnu_printf, 6, 7))) __bbs_log(enum bbs_log_level lo int log_stdout, log_remote; int need_reset = 0; int skip_logfile = 0; + short unsigned int logid; switch (loglevel) { case LOG_DEBUG: @@ -455,6 +470,7 @@ void __attribute__ ((format (gnu_printf, 6, 7))) __bbs_log(enum bbs_log_level lo strftime(datestr, sizeof(datestr), "%Y-%m-%d %T", &logdate); thread_id = bbs_gettid(); + logid = bbs_atomic_fetchadd_int(&logid_counter, +1); va_start(ap, fmt); len = vsnprintf(logminibuf, sizeof(logminibuf), fmt, ap); @@ -493,6 +509,7 @@ void __attribute__ ((format (gnu_printf, 6, 7))) __bbs_log(enum bbs_log_level lo /* If the foreground or any remote consoles are active, prepare a log message for any connected consoles. */ if (log_stdout || log_remote) { + int delays = 0; struct remote_log_fd *rfd; if (loglevel == LOG_VERBOSE && verbose_special_formatting) { const char *verbprefix = verbose_prefix(level); @@ -525,28 +542,92 @@ void __attribute__ ((format (gnu_printf, 6, 7))) __bbs_log(enum bbs_log_level lo } /* Log to foreground console, if there is one */ - if (log_stdout) { + if (log_stdout && (loglevel != LOG_DEBUG || level <= fg_maxdebug)) { term_puts(fullbuf); } /* Log to remote consoles */ - RWLIST_RDLOCK(&remote_log_fds); + RWLIST_RDLOCK(&remote_log_fds); /* Each console has its own lock, so a read lock is perfectly fine and allows more logging throughput */ RWLIST_TRAVERSE(&remote_log_fds, rfd, entry) { + char *bufstart = fullbuf; + size_t bytesleft = (size_t) bytes; + ssize_t wres; + if (loglevel == LOG_DEBUG && level > rfd->maxdebug) { /* Skip consoles that don't want debug messages of this debug level */ continue; + } else if (!rfd->enabled) { + continue; /* Logging disabled for this console */ } + + /* Although acquiring a lock for every log message to every remote console is not ideal, + * it is necessary because otherwise, concurrent writes could result + * in messages being split up when logged, and we need to ensure messages are logged atomically. */ + bbs_mutex_lock(&rfd->lock); + /* Prevent libc_write from blocking if there's a ton of logging going on. */ bbs_unblock_fd(rfd->fd); - if (fd_logging[rfd->fd]) { - ssize_t wres = write(rfd->fd, fullbuf, (size_t) bytes); - if (wres != (ssize_t) bytes) { - /* Well, we can't log a message if we failed to log a message. - * That would probably not work out well. */ - fprintf(stderr, "Failed to log %d-byte message (wrote %ld): %s\n", bytes, wres, strerror(errno)); + + /* Can't use bbs_timed_write, since that can log additional messages. */ + wres = write(rfd->fd, bufstart, bytesleft); + /* Failed or partial write. Retry if we have time. + * Introduce no more than a 10 ms delay max, even that might be too much. The 10 ms is for all consoles, not just this one. + * In practice, since we have a mutex for each remote console, this is rarely needed. */ + while (((wres == -1 && errno == EAGAIN) || (wres > 0 && (size_t) wres < bytesleft)) && delays < 10) { + /* Since we don't have a log queue, it is possible that multiple threads + * could try logging to the same terminals concurrently, + * after all, this traversal only uses a RDLOCK. + * If this happens, try again after a small instant. That will usually succeed. */ + struct pollfd pfd; + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = rfd->fd; + pfd.events = POLLOUT; + /* Do not wait more than 1 millisecond. Even that is already a lot, + * since this could add a delay of up to 1 ms per remote console, + * and this log operation is blocking a thread that is doing actual work! + * select() would give us more granularity, but it's unsafe. */ + delays++; + if (wres > 0) { + bufstart += wres; + bytesleft -= (size_t) wres; +#ifdef DEBUG_LOGGING_FAILURES + fprintf(stderr, "%5u [%d <%d>] Retrying partial write\n", logid, thread_id, rfd->fd); + } else { + fprintf(stderr, "%5u [%d <%d>] Retrying failed write\n", logid, thread_id, rfd->fd); +#endif + } + /* If we can make progress now, try again. Otherwise, give up immediately. + * (And if poll returns 1, we probably can make progress, since we are still holding the console lock, + * nobody else can write to it in the meantime.) */ + if (poll(&pfd, 1, 1) > 0) { + wres = write(rfd->fd, bufstart, bytesleft); + } else { + /* Release console lock ASAP */ + bbs_block_fd(rfd->fd); + bbs_mutex_unlock(&rfd->lock); + fprintf(stderr, "%5u [%d <%d>] Failed to log %lu bytes: Resource temporarily unavailable\n", logid, thread_id, rfd->fd, bytesleft); + goto nextiteration; /* Can't continue since we're in a double loop. We want to continue the outer loop. */ } +#ifdef DEBUG_LOGGING_FAILURES + if (wres == (ssize_t) bytesleft) { + fprintf(stderr, "%5u [%d <%d>] Succeeded after retry\n", logid, thread_id, rfd->fd); + } +#endif + } + if (wres != (ssize_t) bytesleft) { + int saved_errno = errno; + /* Release console lock ASAP */ + bbs_block_fd(rfd->fd); + bbs_mutex_unlock(&rfd->lock); + /* Well, we can't log a message if we failed to log a message. + * That would probably not work out well. */ + fprintf(stderr, "%5u [%d <%d>] Failed to log %lu bytes (wrote %ld): %s\n", logid, thread_id, rfd->fd, bytesleft, wres, strerror(saved_errno)); + } else { + bbs_block_fd(rfd->fd); + bbs_mutex_unlock(&rfd->lock); } - bbs_block_fd(rfd->fd); +nextiteration: + ; } RWLIST_UNLOCK(&remote_log_fds); if (fulldynamic) { diff --git a/bbs/socket.c b/bbs/socket.c index f170d05..8d859d5 100644 --- a/bbs/socket.c +++ b/bbs/socket.c @@ -327,7 +327,8 @@ int bbs_node_cork(struct bbs_node *node, int enabled) bbs_error("setsockopt failed: %s\n", strerror(errno)); return -1; } else { - bbs_debug(5, "%s corking on node %d\n", enabled ? "Enabled" : "Disabled", node->id); + /* net_imap fiddles corking around a lot, so high debug level to reduce noisiness */ + bbs_debug(10, "%s corking on node %d\n", enabled ? "Enabled" : "Disabled", node->id); } return 0; } diff --git a/modules/mod_sysop.c b/modules/mod_sysop.c index 2b72ac8..8f64a29 100644 --- a/modules/mod_sysop.c +++ b/modules/mod_sysop.c @@ -329,6 +329,7 @@ static void *sysop_handler(void *varg) } else if (pfds[0].revents & POLLIN) { ssize_t bytes_read = read(sysopfdin, buf, sizeof(buf)); if (bytes_read <= 0) { + my_set_stdout_logging(sysopfdout, 0); /* The console has disconnected, so don't attempt to log to it anymore, it will fail. */ bbs_debug(5, "read returned %ld\n", bytes_read); break; }