From 38ccba9952e6d11167ca3f5b0b233aecb49bcc54 Mon Sep 17 00:00:00 2001 From: Anway De Date: Mon, 29 Apr 2024 22:28:43 +0000 Subject: [PATCH] xdp: fix issues for drv mode --- src/app/fdctl/configure/xdp.c | 5 +- .../fdctl/run/tiles/generated/net_seccomp.h | 50 ++++++++----------- src/app/fdctl/run/tiles/net.seccomppolicy | 4 +- src/waltz/xdp/fd_xsk.c | 6 ++- 4 files changed, 29 insertions(+), 36 deletions(-) diff --git a/src/app/fdctl/configure/xdp.c b/src/app/fdctl/configure/xdp.c index a96ee2d85e..59e86c447f 100644 --- a/src/app/fdctl/configure/xdp.c +++ b/src/app/fdctl/configure/xdp.c @@ -69,11 +69,12 @@ init( config_t * const config ) { So for now we need to also bind to loopback. This is a small performance hit for other traffic, but we only redirect packets destined for our target IP and port so - it will not otherwise interfere. */ + it will not otherwise interfere. Loopback only supports + XDP in SKB mode. */ if( FD_LIKELY( strcmp( config->tiles.net.interface, "lo" ) ) ) { if( FD_UNLIKELY( fd_xdp_hook_iface( config->name, "lo", - mode, + XDP_FLAGS_SKB_MODE, fd_xdp_redirect_prog, fd_xdp_redirect_prog_sz ) ) ) FD_LOG_ERR(( "fd_xdp_hook_iface failed" )); diff --git a/src/app/fdctl/run/tiles/generated/net_seccomp.h b/src/app/fdctl/run/tiles/generated/net_seccomp.h index eaf63e1316..51d87134aa 100644 --- a/src/app/fdctl/run/tiles/generated/net_seccomp.h +++ b/src/app/fdctl/run/tiles/generated/net_seccomp.h @@ -21,14 +21,14 @@ #else # error "Target architecture is unsupported by seccomp." #endif -static const unsigned int sock_filter_policy_net_instr_cnt = 57; +static const unsigned int sock_filter_policy_net_instr_cnt = 53; static void populate_sock_filter_policy_net( ulong out_cnt, struct sock_filter * out, unsigned int logfile_fd, unsigned int xsk_fd, unsigned int lo_xsk_fd, unsigned int netlink_fd) { - FD_TEST( out_cnt >= 57 ); - struct sock_filter filter[57] = { + FD_TEST( out_cnt >= 53 ); + struct sock_filter filter[53] = { /* Check: Jump to RET_KILL_PROCESS if the script's arch != the runtime arch */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, ( offsetof( struct seccomp_data, arch ) ) ), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, ARCH_NR, 0, /* RET_KILL_PROCESS */ 53 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, ARCH_NR, 0, /* RET_KILL_PROCESS */ 49 ), /* loading syscall number in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, ( offsetof( struct seccomp_data, nr ) ) ), /* allow write based on expression */ @@ -40,21 +40,21 @@ static void populate_sock_filter_policy_net( ulong out_cnt, struct sock_filter * /* allow recvmsg based on expression */ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_recvmsg, /* check_recvmsg */ 30, 0 ), /* allow recvfrom based on expression */ - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_recvfrom, /* check_recvfrom */ 39, 0 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_recvfrom, /* check_recvfrom */ 35, 0 ), /* none of the syscalls matched */ - { BPF_JMP | BPF_JA, 0, 0, /* RET_KILL_PROCESS */ 46 }, + { BPF_JMP | BPF_JA, 0, 0, /* RET_KILL_PROCESS */ 42 }, // check_write: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 2, /* RET_ALLOW */ 45, /* lbl_1 */ 0 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 2, /* RET_ALLOW */ 41, /* lbl_1 */ 0 ), // lbl_1: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 43, /* RET_KILL_PROCESS */ 42 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 39, /* RET_KILL_PROCESS */ 38 ), // check_fsync: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 41, /* RET_KILL_PROCESS */ 40 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 37, /* RET_KILL_PROCESS */ 36 ), // check_sendto: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), @@ -82,23 +82,23 @@ static void populate_sock_filter_policy_net( ulong out_cnt, struct sock_filter * // lbl_8: /* load syscall argument 5 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[5])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 27, /* lbl_2 */ 0 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 23, /* lbl_2 */ 0 ), // lbl_2: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, netlink_fd, /* lbl_9 */ 0, /* RET_KILL_PROCESS */ 24 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, netlink_fd, /* lbl_9 */ 0, /* RET_KILL_PROCESS */ 20 ), // lbl_9: /* load syscall argument 3 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_10 */ 0, /* RET_KILL_PROCESS */ 22 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_10 */ 0, /* RET_KILL_PROCESS */ 18 ), // lbl_10: /* load syscall argument 4 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[4])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_11 */ 0, /* RET_KILL_PROCESS */ 20 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_11 */ 0, /* RET_KILL_PROCESS */ 16 ), // lbl_11: /* load syscall argument 5 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[5])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 19, /* RET_KILL_PROCESS */ 18 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 15, /* RET_KILL_PROCESS */ 14 ), // check_recvmsg: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), @@ -106,32 +106,24 @@ static void populate_sock_filter_policy_net( ulong out_cnt, struct sock_filter * // lbl_13: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, lo_xsk_fd, /* lbl_12 */ 0, /* RET_KILL_PROCESS */ 14 ), + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, lo_xsk_fd, /* lbl_12 */ 0, /* RET_KILL_PROCESS */ 10 ), // lbl_12: - /* load syscall argument 1 in accumulator */ - BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[1])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_14 */ 0, /* RET_KILL_PROCESS */ 12 ), -// lbl_14: /* load syscall argument 2 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[2])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_15 */ 0, /* RET_KILL_PROCESS */ 10 ), -// lbl_15: - /* load syscall argument 3 in accumulator */ - BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])), BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* RET_ALLOW */ 9, /* RET_KILL_PROCESS */ 8 ), // check_recvfrom: /* load syscall argument 0 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, netlink_fd, /* lbl_16 */ 0, /* RET_KILL_PROCESS */ 6 ), -// lbl_16: + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, netlink_fd, /* lbl_14 */ 0, /* RET_KILL_PROCESS */ 6 ), +// lbl_14: /* load syscall argument 3 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_17 */ 0, /* RET_KILL_PROCESS */ 4 ), -// lbl_17: + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_15 */ 0, /* RET_KILL_PROCESS */ 4 ), +// lbl_15: /* load syscall argument 4 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[4])), - BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_18 */ 0, /* RET_KILL_PROCESS */ 2 ), -// lbl_18: + BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_16 */ 0, /* RET_KILL_PROCESS */ 2 ), +// lbl_16: /* load syscall argument 5 in accumulator */ BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[5])), BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 1, /* RET_KILL_PROCESS */ 0 ), diff --git a/src/app/fdctl/run/tiles/net.seccomppolicy b/src/app/fdctl/run/tiles/net.seccomppolicy index b2970dd5a9..cfd5f0c8ad 100644 --- a/src/app/fdctl/run/tiles/net.seccomppolicy +++ b/src/app/fdctl/run/tiles/net.seccomppolicy @@ -77,9 +77,7 @@ sendto: (or (and (or (eq (arg 0) xsk_fd) # packets on a network device or the loopback device. recvmsg: (and (or (eq (arg 0) xsk_fd) (eq (arg 0) lo_xsk_fd)) - (eq (arg 1) 0) - (eq (arg 2) 0) - (eq (arg 3) MSG_DONTWAIT)) + (eq (arg 2) MSG_DONTWAIT)) # netlink: receive netlink messages from kernel for ARP tables # diff --git a/src/waltz/xdp/fd_xsk.c b/src/waltz/xdp/fd_xsk.c index 77c9d2f601..39e1779253 100644 --- a/src/waltz/xdp/fd_xsk.c +++ b/src/waltz/xdp/fd_xsk.c @@ -698,7 +698,8 @@ fd_xsk_rx_enqueue( fd_xsk_t * xsk, syscalls, and the performance would be even better. Sadly, this is not possible. */ if( FD_UNLIKELY( fd_xsk_rx_need_wakeup( xsk ) ) ) { - if( FD_UNLIKELY( -1==recvmsg( xsk->xsk_fd, NULL, MSG_DONTWAIT ) ) ) { + struct msghdr _ignored[ 1 ] = { 0 }; + if( FD_UNLIKELY( -1==recvmsg( xsk->xsk_fd, _ignored, MSG_DONTWAIT ) ) ) { if( FD_UNLIKELY( errno!=EAGAIN ) ) { FD_LOG_WARNING(( "xsk recvmsg failed xsk_fd=%d (%i-%s)", xsk->xsk_fd, errno, fd_io_strerror( errno ) )); } @@ -755,7 +756,8 @@ fd_xsk_rx_enqueue2( fd_xsk_t * xsk, /* See the corresponding comments in fd_xsk_rx_enqueue */ if( FD_UNLIKELY( fd_xsk_rx_need_wakeup( xsk ) ) ) { - if( FD_UNLIKELY( -1==recvmsg( xsk->xsk_fd, NULL, MSG_DONTWAIT ) ) ) { + struct msghdr _ignored[ 1 ] = { 0 }; + if( FD_UNLIKELY( -1==recvmsg( xsk->xsk_fd, _ignored, MSG_DONTWAIT ) ) ) { if( FD_UNLIKELY( errno!=EAGAIN ) ) { FD_LOG_WARNING(( "xsk recvmsg failed xsk_fd=%d (%i-%s)", xsk->xsk_fd, errno, fd_io_strerror( errno ) )); }