You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello, my usecase for F-Stack is that I want to optimize for packet read speed for TCP.
I've setup a simple test that has a python server that will send out timestamp packets:
#! /usr/bin/python
# a simple tcp server
import socket,os
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('0.0.0.0', 12373))
sock.listen(5)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, b"ens7")
import time
while True:
connection,address = sock.accept()
buf = connection.recv(1024)
print(buf)
start= time.time()
for i in range(int(1e6)):
connection.send((str(time.time_ns()) * 200).encode('utf-8'))
connection.close()
And then I setup a simple receiving using native linux sockets and F-stack, to compare how long it takes to proceses each of 5M messages.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <arpa/inet.h>
#include <errno.h>
#include <assert.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netdb.h>
#define SOCK_FSTACK 0x01000000
#define SOCK_KERNEL 0x02000000
#define MAX_EVENTS 10
#define BUFFER_SIZE 4096
int64_t get_time_difference(const char *buffer, size_t bytes_read) {
if (bytes_read < 16) {
// printf("Buffer too small\n");
return -1;
}
int64_t d = strtoll(buffer + (bytes_read - 20), NULL, 10);
struct timespec current_time;
clock_gettime(CLOCK_REALTIME, ¤t_time);
// // Handle nanosecond underflow
int64_t delta = current_time.tv_sec * 1000000000 + current_time.tv_nsec - d;
// Step 6: Print the time difference
return delta;
}
int set_nonblocking(int sockfd) {
int opt = 1;
if (ioctl(sockfd, FIONBIO, &opt)) {
perror("ioctl FIONBIO");
return -1;
}
return 0;
}
int do_req() {
const char *hostname = "10.0.3.122";
const char *port = "12373";
const char *path = "/";
// printf("socket\n");
// Create a socket
int sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
// Resolve hostname
struct addrinfo hints, *res;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
if (getaddrinfo(hostname, port, &hints, &res) != 0) {
perror("getaddrinfo");
close(sockfd);
exit(EXIT_FAILURE);
}
// Set the socket to non-blocking mode
if (set_nonblocking(sockfd) == -1) {
close(sockfd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
// Start connecting
int connect_status = connect(sockfd, res->ai_addr, res->ai_addrlen);
if (connect_status == -1 && errno != EINPROGRESS) {
perror("connect");
close(sockfd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
// Set up epoll
int epoll_fd = epoll_create1(0);
if (epoll_fd == -1) {
perror("epoll_create1");
close(sockfd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
struct epoll_event ev, events[MAX_EVENTS];
ev.events = EPOLLOUT | EPOLLIN | EPOLLET; // Wait for the socket to be writable (connect completion) and readable
ev.data.fd = sockfd;
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sockfd, &ev) == -1) {
perror("epoll_ctl");
close(sockfd);
close(epoll_fd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
// HTTP GET request to send
char request[512];
snprintf(request, sizeof(request), "GET %s HTTP/1.1\r\nHost: %s\r\nConnection: keep-alive\r\n\r\n", path, hostname);
int64_t br = 0;
int64_t buffer_reads = 0;
int last_latency = 0;
int start_lat = 0;
int done = 0;
while (!done) {
int n = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
for (int i = 0; i < n; i++) {
if (events[i].events & EPOLLOUT) {
// Ready to write (send request)
int sent = send(sockfd, request, strlen(request), 0);
if (sent == -1) {
perror("send");
done = 1;
break;
}
// printf("Sent HTTP request:\n%s", request);
ev.events = EPOLLIN | EPOLLET; // Now, wait for the response
epoll_ctl(epoll_fd, EPOLL_CTL_MOD, sockfd, &ev);
} else if (events[i].events & EPOLLIN) {
// Ready to read (response received)
char buffer[BUFFER_SIZE];
int bytes_read;
while ((bytes_read = recv(sockfd, buffer, sizeof(buffer) - 1, 0)) > 0) {
br += bytes_read;
++buffer_reads;
bytes_read += 1;
buffer[bytes_read - 1] = '\0';
last_latency = get_time_difference(buffer, bytes_read);
if (start_lat < 10 || start_lat > 10000000)
start_lat = last_latency;
}
if (bytes_read == 0) {
// Connection closed by the server
done = 1;
} else if (bytes_read == -1 && errno != EAGAIN) {
perror("recv");
done = 1;
}
}
}
}
float avg_bytes = (float)br / (float)buffer_reads;
printf("avg bytes: %.6f\n", avg_bytes);
printf("buffer_reads: %ld\n", buffer_reads);
printf("bytes_read: %ld\n", br);
printf("last_latency: %d\n", last_latency);
printf("first_latency: %d\n", start_lat);
printf("lat_diff: %d\n", last_latency - start_lat);
// Clean up
close(sockfd);
close(epoll_fd);
freeaddrinfo(res);
return 0;
}
int main() {
for (int i = 0; i < 1; ++i)
do_req();
return 0;
}
Then, I write something similar using F-stack:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <arpa/inet.h>
#include <errno.h>
#include <assert.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netdb.h>
#include "ff_config.h"
#include "ff_api.h"
#include "ff_epoll.h"
#define SOCK_FSTACK 0x01000000
#define SOCK_KERNEL 0x02000000
#define MAX_EVENTS 10
#define BUFFER_SIZE 4096
struct epoll_event ev;
struct epoll_event events[MAX_EVENTS];
int epfd;
int sockfd;
// HTTP GET request to send
char request[512];
int64_t br = 0;
int64_t buffer_reads = 0;
int last_latency = 0;
int start_lat = 0;
int64_t get_time_difference(const char *buffer, size_t bytes_read) {
if (bytes_read < 16) {
// printf("Buffer too small\n");
return -1;
}
int64_t d = strtoll(buffer + (bytes_read - 20), NULL, 10);
struct timespec current_time;
clock_gettime(CLOCK_REALTIME, ¤t_time);
// // Handle nanosecond underflow
int64_t delta = current_time.tv_sec * 1000000000 + current_time.tv_nsec - d;
// Step 6: Print the time difference
return delta;
}
int set_nonblocking(int sockfd) {
int opt = 1;
if (ff_ioctl(sockfd, FIONBIO, &opt)) {
perror("ioctl FIONBIO");
return -1;
}
return 0;
}
int loop(void* arg) {
int n = ff_epoll_wait(epfd, events, MAX_EVENTS, -1);
for (int i = 0; i < n; i++) {
if (events[i].events & EPOLLOUT) {
// Ready to write (send request)
int sent = ff_send(sockfd, request, strlen(request), 0);
if (sent == -1) {
perror("send");
break;
}
// printf("Sent HTTP request:\n%s", request);
ev.events = EPOLLIN | EPOLLET; // Now, wait for the response
ff_epoll_ctl(epfd, EPOLL_CTL_MOD, sockfd, &ev);
} else if (events[i].events & EPOLLIN) {
// Ready to read (response received)
char buffer[BUFFER_SIZE];
int bytes_read;
while ((bytes_read = ff_recv(sockfd, buffer, sizeof(buffer) - 1, 0)) > 0) {
br += bytes_read;
++buffer_reads;
bytes_read += 1;
buffer[bytes_read - 1] = '\0';
last_latency = get_time_difference(buffer, bytes_read);
if (start_lat < 10 || start_lat > 10000000)
start_lat = last_latency;
}
if (bytes_read == 0) {
// Connection closed by the server
float avg_bytes = (float)br / (float)buffer_reads;
printf("avg bytes: %.6f\n", avg_bytes);
printf("buffer_reads: %ld\n", buffer_reads);
printf("bytes_read: %ld\n", br);
printf("last_latency: %d\n", last_latency);
printf("first_latency: %d\n", start_lat);
printf("lat_diff: %d\n", last_latency - start_lat);
exit(0);
} else if (bytes_read == -1 && errno != EAGAIN) {
perror("recv");
exit(0);
}
}
}
}
int do_req() {
const char *hostname = "10.0.3.122";
const char *port = "12373";
const char *path = "/";
// Create a socket
sockfd = ff_socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
// Resolve hostname
struct addrinfo hints, *res;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
if (getaddrinfo(hostname, port, &hints, &res) != 0) {
perror("getaddrinfo");
ff_close(sockfd);
exit(EXIT_FAILURE);
}
// Set the socket to non-blocking mode
if (set_nonblocking(sockfd) == -1) {
ff_close(sockfd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
// Start connecting
int connect_status = ff_connect(sockfd, (struct linux_sockaddr *)res->ai_addr, res->ai_addrlen);
if (connect_status == -1 && errno != EINPROGRESS) {
perror("connect");
ff_close(sockfd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
// Set up epoll
epfd = ff_epoll_create(0);
if (epfd == -1) {
perror("epoll_create1");
ff_close(sockfd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
struct epoll_event ev, events[MAX_EVENTS];
ev.events = EPOLLOUT | EPOLLIN | EPOLLET; // Wait for the socket to be writable (connect completion) and readable
ev.data.fd = sockfd;
if (ff_epoll_ctl(epfd, EPOLL_CTL_ADD, sockfd, &ev) == -1) {
perror("epoll_ctl");
ff_close(sockfd);
ff_close(epfd);
freeaddrinfo(res);
exit(EXIT_FAILURE);
}
snprintf(request, sizeof(request), "GET %s HTTP/1.1\r\nHost: %s\r\nConnection: keep-alive\r\n\r\n", path, hostname);
ff_run(loop, NULL);
// Clean up
ff_close(sockfd);
ff_close(epfd);
freeaddrinfo(res);
return 0;
}
int main(int argc, char * argv[])
{
// printf("hello1\n");
ff_init(argc, argv);
// printf("hello\n");
do_req();
return 0;
}
My config is as follows:
[dpdk]
# Hexadecimal bitmask of cores to run on.
lcore_mask=2
# Number of memory channels.
channel=10
# Specify base virtual address to map.
#base_virtaddr=0x7f0000000000
# Promiscuous mode of nic, defualt: enabled.
promiscuous=1
numa_on=1
# TX checksum offload skip, default: disabled.
# We need this switch enabled in the following cases:
# -> The application want to enforce wrong checksum for testing purposes
# -> Some cards advertize the offload capability. However, doesn't calculate checksum.
tx_csum_offoad_skip=0
# TCP segment offload, default: disabled.
tso=0
# HW vlan strip, default: enabled.
vlan_strip=1
# sleep when no pkts incomming
# unit: microseconds
idle_sleep=0
# sent packet delay time(0-100) while send less than 32 pkts.
# default 100 us.
# if set 0, means send pkts immediately.
# if set >100, will dealy 100 us.
# unit: microseconds
pkt_tx_delay=0
# use symmetric Receive-side Scaling(RSS) key, default: disabled.
symmetric_rss=0
# PCI device enable list.
# And driver options
#allow=02:00.0
# for multiple PCI devices
#allow=02:00.0,03:00.0
# enabled port list
#
# EBNF grammar:
#
# exp ::= num_list {"," num_list}
# num_list ::= <num> | <range>
# range ::= <num>"-"<num>
# num ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
#
# examples
# 0-3 ports 0, 1,2,3 are enabled
# 1-3,4,7 ports 1,2,3,4,7 are enabled
#
# If use bonding, shoule config the bonding port id in port_list
# and not config slave port id in port_list
# such as, port 0 and port 1 trank to a bonding port 2,
# should set `port_list=2` and config `[port2]` section
port_list=0
# Number of vdev.
nb_vdev=0
# Number of bond.
nb_bond=0
# log level for dpdk, optional
# log_level=0
# Each core write into own pcap file, which is open one time, close one time if enough.
# Support dump the first snaplen bytes of each packet.
# if pcap file is lager than savelen bytes, it will be closed and next file was dumped into.
[pcap]
enable=0
snaplen=96
savelen=16777216
savepath=.
# Port config section
# Correspond to dpdk.port_list's index: port0, port1...
[port0]
addr=10.0.3.234
netmask=255.255.255.0
broadcast=10.0.3.255
gateway=10.0.3.1
# set interface name, Optional parameter.
if_name=ens6
# IPv6 net addr, Optional parameters.
#addr6=ff::02
#prefix_len=64
#gateway6=ff::01
# Multi virtual IPv4/IPv6 net addr, Optional parameters.
# `vip_ifname`: default `f-stack-x`
# `vip_addr`: Separated by semicolons, MAX number 64;
# Only support netmask 255.255.255.255, broadcast x.x.x.255 now, hard code in `ff_veth_setvaddr`.
# `vip_addr6`: Separated by semicolons, MAX number 64.
# `vip_prefix_len`: All addr6 use the same prefix now, default 64.
#vip_ifname=lo0
#vip_addr=192.168.1.3;192.168.1.4;192.168.1.5;192.168.1.6
#vip_addr6=ff::03;ff::04;ff::05;ff::06;ff::07
#vip_prefix_len=64
# lcore list used to handle this port
# the format is same as port_list
#lcore_list=0
# bonding slave port list used to handle this port
# need to config while this port is a bonding port
# the format is same as port_list
#slave_port_list=0,1
# Vdev config section
# orrespond to dpdk.nb_vdev's index: vdev0, vdev1...
# iface : Shouldn't set always.
# path : The vuser device path in container. Required.
# queues : The max queues of vuser. Optional, default 1, greater or equal to the number of processes.
# queue_size : Queue size.Optional, default 256.
# mac : The mac address of vuser. Optional, default random, if vhost use phy NIC, it should be set to the phy NIC's mac.
# cq : Optional, if queues = 1, default 0; if queues > 1 default 1.
#[vdev0]
##iface=/usr/local/var/run/openvswitch/vhost-user0
#path=/var/run/openvswitch/vhost-user0
#queues=1
#queue_size=256
#mac=00:00:00:00:00:01
#cq=0
# bond config section
# See http://doc.dpdk.org/guides/prog_guide/link_bonding_poll_mode_drv_lib.html
#[bond0]
#mode=4
#slave=0000:0a:00.0,slave=0000:0a:00.1
#primary=0000:0a:00.0
#mac=f0:98:38:xx:xx:xx
## opt argument
#socket_id=0
#xmit_policy=l23
#lsc_poll_period_ms=0
#up_delay=0
#down_delay=0
# Kni config: if enabled and method=reject,
# all packets that do not belong to the following tcp_port and udp_port
# will transmit to kernel; if method=accept, all packets that belong to
# the following tcp_port and udp_port will transmit to kernel.
#[kni]
#enable=1
#method=reject
## The format is same as port_list
#tcp_port=80,443
#udp_port=53
# FreeBSD network performance tuning configurations.
# Most native FreeBSD configurations are supported.
[freebsd.boot]
# If use rack/bbr which depend HPTS, you should set a greater value of hz, such as 1000000 means a tick is 1us.
hz=100
# Block out a range of descriptors to avoid overlap
# with the kernel's descriptor space.
# You can increase this value according to your app.
fd_reserve=1024
kern.ipc.maxsockets=262144
net.inet.tcp.syncache.hashsize=4096
net.inet.tcp.syncache.bucketlimit=100
net.inet.tcp.tcbhashsize=65536
kern.ncallout=262144
kern.features.inet6=1
[freebsd.sysctl]
kern.ipc.somaxconn=32768
kern.ipc.maxsockbuf=16777216
net.link.ether.inet.maxhold=5
net.inet.tcp.fast_finwait2_recycle=1
net.inet.tcp.sendspace=1677721
net.inet.tcp.recvspace=1677721
net.inet.tcp.nolocaltimewait=1
net.inet.tcp.cc.algorithm=bbr
net.inet.tcp.sendbuf_max=16777216
net.inet.tcp.recvbuf_max=16777216
net.inet.tcp.sendbuf_auto=1
net.inet.tcp.recvbuf_auto=1
net.inet.tcp.sendbuf_inc=16384
net.inet.tcp.recvbuf_inc=524288
net.inet.tcp.sack.enable=1
net.inet.tcp.blackhole=1
net.inet.tcp.msl=2000
net.inet.tcp.delayed_ack=0
net.inet.tcp.rfc1323=1
net.inet.udp.blackhole=1
net.inet.ip.redirect=0
net.inet.ip.forwarding=0
net.inet6.ip6.auto_linklocal=1
net.inet6.ip6.accept_rtadv=2
net.inet6.icmp6.rediraccept=1
net.inet6.ip6.forwarding=0
# set default stacks:freebsd, rack or bbr, may be you need increase the value of parameter 'freebsd.boot.hz' while use rack or bbr.
net.inet.tcp.functions_default=freebsd
# need by bbr, should enable it.
net.inet.tcp.hpts.skip_swi=1
# Interval between calls to hpts_timeout_dir. default min 250us, max 256-512ms, default 512ms.
net.inet.tcp.hpts.minsleep=0
# [25600-51200]
net.inet.tcp.hpts.maxsleep=51200
The native linux approach takes 2s, however the F-stack approach takes around 7.5s consistently.
Can anyone provide some advice on why F-stack is so much slower here?
The text was updated successfully, but these errors were encountered:
Hello, my usecase for F-Stack is that I want to optimize for packet read speed for TCP.
I've setup a simple test that has a python server that will send out timestamp packets:
And then I setup a simple receiving using native linux sockets and F-stack, to compare how long it takes to proceses each of 5M messages.
Then, I write something similar using F-stack:
My config is as follows:
The native linux approach takes 2s, however the F-stack approach takes around 7.5s consistently.
Can anyone provide some advice on why F-stack is so much slower here?
The text was updated successfully, but these errors were encountered: