From e1420625725acda6507a667758bbcbfd0b2b295b Mon Sep 17 00:00:00 2001 From: "Bruce A. Mah" Date: Mon, 5 Jan 2015 15:19:57 -0800 Subject: [PATCH] Add -X to restrict SCTP binding to a subset of interfaces. Contains an alternate implementation of previously-submitted patches to set the maximum segment size and no-delay options. As a result of this change, SCTP functionality on Linux will generally require the libsctp library (on CentOS and similar distributions this is provided by the lksctp-tools RPM). Part of #131. Submitted by: Bruce Simpson --- configure.ac | 5 +- docs/dev.rst | 4 +- src/iperf.h | 10 ++- src/iperf3.1 | 22 ++++- src/iperf_api.c | 52 ++++++++++- src/iperf_api.h | 7 +- src/iperf_error.c | 8 +- src/iperf_locale.c | 6 +- src/iperf_sctp.c | 213 ++++++++++++++++++++++++++++++++++++++++++++- src/iperf_sctp.h | 5 ++ 10 files changed, 317 insertions(+), 15 deletions(-) diff --git a/configure.ac b/configure.ac index 50b4819fe..6f1f110b2 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -# iperf, Copyright (c) 2014, The Regents of the University of +# iperf, Copyright (c) 2014, 2015, The Regents of the University of # California, through Lawrence Berkeley National Laboratory (subject # to receipt of any required approvals from the U.S. Dept. of # Energy). All rights reserved. @@ -86,7 +86,8 @@ AC_C_CONST # Check for SCTP support AC_CHECK_HEADERS([sys/socket.h]) AC_CHECK_HEADERS([netinet/sctp.h], - AC_DEFINE([HAVE_SCTP], [1], [Have SCTP support.]), + AC_DEFINE([HAVE_SCTP], [1], [Have SCTP support.]) + AC_SEARCH_LIBS(sctp_bindx, [sctp]), [], [#ifdef HAVE_SYS_SOCKET_H #include diff --git a/docs/dev.rst b/docs/dev.rst index edf9ea69d..3a8702027 100644 --- a/docs/dev.rst +++ b/docs/dev.rst @@ -228,8 +228,8 @@ Code Authors The main authors of iperf3 are (in alphabetical order): Jon Dugan, Seth Elliott, Bruce A. Mah, Jeff Poskanzer, Kaustubh Prabhu. Additional code contributions have come from (also in alphabetical -order): Mark Ashley, Aaron Brown, Aeneas Jaißle, Susant Sahani, Brian -Tierney. +order): Mark Ashley, Aaron Brown, Aeneas Jaißle, Susant Sahani, +Bruce Simpson, Brian Tierney. iperf3 contains some original code from iperf2. The authors of iperf2 are (in alphabetical order): Jon Dugan, John Estabrook, Jim Ferbuson, diff --git a/src/iperf.h b/src/iperf.h index 8a9e9419f..ede93b584 100644 --- a/src/iperf.h +++ b/src/iperf.h @@ -116,6 +116,7 @@ struct iperf_settings iperf_size_t bytes; /* number of bytes to send */ iperf_size_t blocks; /* number of blocks (packets) to send */ char unit_format; /* -f */ + int num_ostreams; /* SCTP initmsg settings */ }; struct iperf_test; @@ -186,6 +187,12 @@ struct iperf_textline { TAILQ_ENTRY(iperf_textline) textlineentries; }; +struct xbind_entry { + char *name; + struct addrinfo *ai; + TAILQ_ENTRY(xbind_entry) link; +}; + struct iperf_test { char role; /* 'c' lient or 's' erver */ @@ -194,7 +201,8 @@ struct iperf_test struct protocol *protocol; signed char state; char *server_hostname; /* -c option */ - char *bind_address; /* -B option */ + char *bind_address; /* first -B option */ + TAILQ_HEAD(xbind_addrhead, xbind_entry) xbind_addrs; /* all -X opts */ int bind_port; /* --cport option */ int server_port; int omit; /* duration of omit period (-O flag) */ diff --git a/src/iperf3.1 b/src/iperf3.1 index 5d14345a9..da49278fe 100644 --- a/src/iperf3.1 +++ b/src/iperf3.1 @@ -130,10 +130,10 @@ run in reverse mode (server sends, client receives) window size / socket buffer size (this gets sent to the server and used on that side too) .TP .BR -M ", " --set-mss " \fIn\fR" -set TCP maximum segment size (MTU - 40 bytes) +set TCP/SCTP maximum segment size (MTU - 40 bytes) .TP .BR -N ", " --no-delay " " -set TCP no delay, disabling Nagle's Algorithm +set TCP/SCTP no delay, disabling Nagle's Algorithm .TP .BR -4 ", " --version4 " " only use IPv4 @@ -147,6 +147,24 @@ set the IP 'type of service' .BR -L ", " --flowlabel " \fIn\fR" set the IPv6 flow label (currently only supported on Linux) .TP +.BR -X ", " --xbind " \fIname\fR" +Bind SCTP associations to a specific subset of links using sctp_bindx(3). +The \fB--B\fR flag will be ignored if this flag is specified. +Normally SCTP will include the protocol addresses of all active links +on the local host when setting up an association. Specifying at least +one \fB--X\fR name will disable this behaviour. +This flag must be specified for each link to be included in the +association, and is supported for both iperf servers and clients +(the latter are supported by passing the first \fB--X\fR argument to bind(2)). +Hostnames are accepted as arguments and are resolved using +getaddrinfo(3). +If the \fB--4\fR or \fB--6\fR flags are specified, names +which do not resolve to addresses within the +specified protocol family will be ignored. +.TP +.BR --nstreams " \fIn\fR" +Set number of SCTP streams. +.TP .BR -Z ", " --zerocopy " " Use a "zero copy" method of sending data, such as sendfile(2), instead of the usual write(2). diff --git a/src/iperf_api.c b/src/iperf_api.c index 30b7693e8..069ea9d8d 100644 --- a/src/iperf_api.c +++ b/src/iperf_api.c @@ -636,6 +636,8 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) #endif /* HAVE_TCP_CONGESTION */ #if defined(HAVE_SCTP) {"sctp", no_argument, NULL, OPT_SCTP}, + {"nstreams", required_argument, NULL, OPT_NUMSTREAMS}, + {"xbind", required_argument, NULL, 'X'}, #endif {"pidfile", required_argument, NULL, 'I'}, {"logfile", required_argument, NULL, OPT_LOGFILE}, @@ -652,10 +654,11 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) char* comma; #endif /* HAVE_CPU_AFFINITY */ char* slash; + struct xbind_entry *xbe; blksize = 0; server_flag = client_flag = rate_flag = duration_flag = 0; - while ((flag = getopt_long(argc, argv, "p:f:i:D1VJvsc:ub:t:n:k:l:P:Rw:B:M:N46S:L:ZO:F:A:T:C:dI:h", longopts, NULL)) != -1) { + while ((flag = getopt_long(argc, argv, "p:f:i:D1VJvsc:ub:t:n:k:l:P:Rw:B:M:N46S:L:ZO:F:A:T:C:dI:hX:", longopts, NULL)) != -1) { switch (flag) { case 'p': test->server_port = atoi(optarg); @@ -719,6 +722,14 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) #endif /* HAVE_SCTP */ break; + case OPT_NUMSTREAMS: +#if defined(linux) || defined(__FreeBSD__) + test->settings->num_ostreams = unit_atoi(optarg); + client_flag = 1; +#else /* linux */ + i_errno = IEUNIMP; + return -1; +#endif /* linux */ case 'b': slash = strchr(optarg, '/'); if (slash) { @@ -818,6 +829,20 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) return -1; #endif /* HAVE_FLOWLABEL */ break; + case 'X': + xbe = (struct xbind_entry *)malloc(sizeof(struct xbind_entry)); + if (!xbe) { + i_errno = IESETSCTPBINDX; + return -1; + } + memset(xbe, 0, sizeof(*xbe)); + xbe->name = strdup(optarg); + if (!xbe->name) { + i_errno = IESETSCTPBINDX; + return -1; + } + TAILQ_INSERT_TAIL(&test->xbind_addrs, xbe, link); + break; case 'Z': if (!has_sendfile()) { i_errno = IENOSENDFILE; @@ -1732,6 +1757,7 @@ iperf_defaults(struct iperf_test *testp) testp->diskfile_name = (char*) 0; testp->affinity = -1; testp->server_affinity = -1; + TAILQ_INIT(&testp->xbind_addrs); #if defined(HAVE_CPUSET_SETAFFINITY) CPU_ZERO(&testp->cpumask); #endif /* HAVE_CPUSET_SETAFFINITY */ @@ -1845,6 +1871,18 @@ iperf_free_test(struct iperf_test *test) free(test->server_hostname); if (test->bind_address) free(test->bind_address); + if (!TAILQ_EMPTY(&test->xbind_addrs)) { + struct xbind_entry *xbe; + + while (!TAILQ_EMPTY(&test->xbind_addrs)) { + xbe = TAILQ_FIRST(&test->xbind_addrs); + TAILQ_REMOVE(&test->xbind_addrs, xbe, link); + if (xbe->ai) + freeaddrinfo(xbe->ai); + free(xbe->name); + free(xbe); + } + } free(test->settings); if (test->title) free(test->title); @@ -1885,6 +1923,18 @@ iperf_free_test(struct iperf_test *test) free(t); } + /* sctp_bindx: do not free the arguments, only the resolver results */ + if (!TAILQ_EMPTY(&test->xbind_addrs)) { + struct xbind_entry *xbe; + + TAILQ_FOREACH(xbe, &test->xbind_addrs, link) { + if (xbe->ai) { + freeaddrinfo(xbe->ai); + xbe->ai = NULL; + } + } + } + /* XXX: Why are we setting these values to NULL? */ // test->streams = NULL; test->stats_callback = NULL; diff --git a/src/iperf_api.h b/src/iperf_api.h index 75162b785..23642e301 100644 --- a/src/iperf_api.h +++ b/src/iperf_api.h @@ -48,6 +48,7 @@ struct iperf_stream; #define OPT_GET_SERVER_OUTPUT 3 #define OPT_UDP_COUNTERS_64BIT 4 #define OPT_CLIENT_PORT 5 +#define OPT_NUMSTREAMS 6 /* states */ #define TEST_START 1 @@ -308,8 +309,8 @@ enum { IECLIENTTERM = 119, // The client has terminated IESERVERTERM = 120, // The server has terminated IEACCESSDENIED = 121, // The server is busy running a test. Try again later. - IESETNODELAY = 122, // Unable to set TCP NODELAY (check perror) - IESETMSS = 123, // Unable to set TCP MSS (check perror) + IESETNODELAY = 122, // Unable to set TCP/SCTP NODELAY (check perror) + IESETMSS = 123, // Unable to set TCP/SCTP MSS (check perror) IESETBUF = 124, // Unable to set socket buffer size (check perror) IESETTOS = 125, // Unable to set IP TOS (check perror) IESETCOS = 126, // Unable to set IPv6 traffic class (check perror) @@ -324,6 +325,8 @@ enum { IEPIDFILE = 135, // Unable to write PID file IEV6ONLY = 136, // Unable to set/unset IPV6_V6ONLY (check perror) IESETSCTPDISABLEFRAG = 137, // Unable to set SCTP Fragmentation (check perror) + IESETSCTPNSTREAM= 138, // Unable to set SCTP number of streams (check perror) + IESETSCTPBINDX= 139, // Unable to process sctp_bindx() parameters /* Stream errors */ IECREATESTREAM = 200, // Unable to create a new stream (check herror/perror) IEINITSTREAM = 201, // Unable to initialize stream (check herror/perror) diff --git a/src/iperf_error.c b/src/iperf_error.c index 6587022f3..367ff3ed8 100644 --- a/src/iperf_error.c +++ b/src/iperf_error.c @@ -241,11 +241,11 @@ iperf_strerror(int i_errno) snprintf(errstr, len, "the server is busy running a test. try again later"); break; case IESETNODELAY: - snprintf(errstr, len, "unable to set TCP NODELAY"); + snprintf(errstr, len, "unable to set TCP/SCTP NODELAY"); perr = 1; break; case IESETMSS: - snprintf(errstr, len, "unable to set TCP MSS"); + snprintf(errstr, len, "unable to set TCP/SCTP MSS"); perr = 1; break; case IESETBUF: @@ -347,6 +347,10 @@ iperf_strerror(int i_errno) snprintf(errstr, len, "unable to set SCTP_DISABLE_FRAGMENTS"); perr = 1; break; + case IESETSCTPNSTREAM: + snprintf(errstr, len, "unable to set SCTP_INIT num of SCTP streams\n"); + perr = 1; + break; } if (herr || perr) diff --git a/src/iperf_locale.c b/src/iperf_locale.c index df63922c9..b1278588f 100644 --- a/src/iperf_locale.c +++ b/src/iperf_locale.c @@ -120,6 +120,8 @@ const char usage_longstr[] = "Usage: iperf [-s|-c host] [options]\n" " -c, --client run in client mode, connecting to \n" #if defined(HAVE_SCTP) " --sctp use SCTP rather than TCP\n" + " -X, --xbind bind SCTP association to links\n" + " --nstreams # number of SCTP streams\n" #endif /* HAVE_SCTP */ " -u, --udp use UDP rather than TCP\n" " -b, --bandwidth #[KMG][/#] target bandwidth in bits/sec (0 for unlimited)\n" @@ -137,8 +139,8 @@ const char usage_longstr[] = "Usage: iperf [-s|-c host] [options]\n" #if defined(HAVE_TCP_CONGESTION) " -C, --congestion set TCP congestion control algorithm (Linux and FreeBSD only)\n" #endif /* HAVE_TCP_CONGESTION */ - " -M, --set-mss # set TCP maximum segment size (MTU - 40 bytes)\n" - " -N, --nodelay set TCP no delay, disabling Nagle's Algorithm\n" + " -M, --set-mss # set TCP/SCTP maximum segment size (MTU - 40 bytes)\n" + " -N, --no-delay set TCP/SCTP no delay, disabling Nagle's Algorithm\n" " -4, --version4 only use IPv4\n" " -6, --version6 only use IPv6\n" " -S, --tos N set the IP 'type of service'\n" diff --git a/src/iperf_sctp.c b/src/iperf_sctp.c index eb30f5ce9..4ce21bf4c 100644 --- a/src/iperf_sctp.c +++ b/src/iperf_sctp.c @@ -197,6 +197,12 @@ iperf_sctp_listen(struct iperf_test *test) return -1; } + /* servers must call sctp_bindx() _instead_ of bind() */ + if (!TAILQ_EMPTY(&test->xbind_addrs)) { + freeaddrinfo(res); + if (iperf_sctp_bindx(test, s, IPERF_SCTP_SERVER)) + return -1; + } else if (bind(s, (struct sockaddr *) res->ai_addr, res->ai_addrlen) < 0) { close(s); freeaddrinfo(res); @@ -263,7 +269,55 @@ iperf_sctp_connect(struct iperf_test *test) return -1; } - + if (test->no_delay != 0) { + opt = 1; + if (setsockopt(s, IPPROTO_SCTP, SCTP_NODELAY, &opt, sizeof(opt)) < 0) { + close(s); + freeaddrinfo(server_res); + i_errno = IESETNODELAY; + return -1; + } + } + + if ((test->settings->mss >= 512 && test->settings->mss <= 131072)) { + struct sctp_assoc_value av; + +#ifdef SCTP_FUTURE_ASSOC + av.assoc_id = SCTP_FUTURE_ASSOC; +#else + av.assoc_id = 0; +#endif + av.assoc_value = test->settings->mss; + + if (setsockopt(s, IPPROTO_SCTP, SCTP_MAXSEG, &av, sizeof(av)) < 0) { + close(s); + freeaddrinfo(server_res); + i_errno = IESETMSS; + return -1; + } + } + + if (test->settings->num_ostreams > 0) { + struct sctp_initmsg initmsg; + + memset(&initmsg, 0, sizeof(struct sctp_initmsg)); + initmsg.sinit_num_ostreams = test->settings->num_ostreams; + + if (setsockopt(s, IPPROTO_SCTP, SCTP_INITMSG, &initmsg, sizeof(struct sctp_initmsg)) < 0) { + close(s); + freeaddrinfo(server_res); + i_errno = IESETSCTPNSTREAM; + return -1; + } + } + + /* clients must call bind() followed by sctp_bindx() before connect() */ + if (!TAILQ_EMPTY(&test->xbind_addrs)) { + if (iperf_sctp_bindx(test, s, IPERF_SCTP_CLIENT)) + return -1; + } + + /* TODO support sctp_connectx() to avoid heartbeating. */ if (connect(s, (struct sockaddr *) server_res->ai_addr, server_res->ai_addrlen) < 0 && errno != EINPROGRESS) { close(s); freeaddrinfo(server_res); @@ -307,3 +361,160 @@ iperf_sctp_init(struct iperf_test *test) #endif /* HAVE_SCTP */ } + + +/* iperf_sctp_bindx + * + * handle binding to multiple endpoints (-X parameters) + */ +int +iperf_sctp_bindx(struct iperf_test *test, int s, int is_server) +{ +#if defined(HAVE_SCTP) + struct addrinfo hints; + char portstr[6]; + char *servname; + struct addrinfo *ai, *ai0; + struct sockaddr *xaddrs; + struct xbind_entry *xbe, *xbe0; + char *bp; + size_t xaddrlen; + int nxaddrs; + int retval; + int domain; + + domain = test->settings->domain; + xbe0 = NULL; + retval = 0; + + if (TAILQ_EMPTY(&test->xbind_addrs)) + return retval; /* nothing to do */ + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = (domain == AF_UNSPEC ? AF_INET6 : domain); + hints.ai_socktype = SOCK_STREAM; + servname = NULL; + if (is_server) { + hints.ai_flags |= AI_PASSIVE; + snprintf(portstr, 6, "%d", test->server_port); + servname = portstr; + } + + /* client: must pop first -X address and call bind(). + * sctp_bindx() must see the ephemeral port chosen by bind(). + * we deliberately ignore the -B argument in this case. + */ + if (!is_server) { + struct sockaddr *sa; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + int eport; + + xbe0 = TAILQ_FIRST(&test->xbind_addrs); + TAILQ_REMOVE(&test->xbind_addrs, xbe0, link); + + if (getaddrinfo(xbe0->name, servname, &hints, &xbe0->ai) != 0) { + i_errno = IESETSCTPBINDX; + retval = -1; + goto out; + } + + ai = xbe0->ai; + if (domain != AF_UNSPEC && domain != ai->ai_family) { + i_errno = IESETSCTPBINDX; + retval = -1; + goto out; + } + if (bind(s, (struct sockaddr *)ai->ai_addr, ai->ai_addrlen) < 0) { + i_errno = IESETSCTPBINDX; + retval = -1; + goto out; + } + + /* if only one -X argument, nothing more to do */ + if (TAILQ_EMPTY(&test->xbind_addrs)) + goto out; + + sa = (struct sockaddr *)ai->ai_addr; + if (sa->sa_family == AF_INET) { + sin = (struct sockaddr_in *)ai->ai_addr; + eport = sin->sin_port; + } else if (sa->sa_family == AF_INET6) { + sin6 = (struct sockaddr_in6 *)ai->ai_addr; + eport = sin6->sin6_port; + } else { + i_errno = IESETSCTPBINDX; + retval = -1; + goto out; + } + snprintf(portstr, 6, "%d", ntohs(eport)); + servname = portstr; + } + + /* pass 1: resolve and compute lengths. */ + nxaddrs = 0; + xaddrlen = 0; + TAILQ_FOREACH(xbe, &test->xbind_addrs, link) { + if (xbe->ai != NULL) + freeaddrinfo(xbe->ai); + if (getaddrinfo(xbe->name, servname, &hints, &xbe->ai) != 0) { + i_errno = IESETSCTPBINDX; + retval = -1; + goto out; + } + ai0 = xbe->ai; + for (ai = ai0; ai; ai = ai->ai_next) { + if (domain != AF_UNSPEC && domain != ai->ai_family) + continue; + xaddrlen += ai->ai_addrlen; + ++nxaddrs; + } + } + + /* pass 2: copy into flat buffer. */ + xaddrs = (struct sockaddr *)malloc(xaddrlen); + if (!xaddrs) { + i_errno = IESETSCTPBINDX; + retval = -1; + goto out; + } + bp = (char *)xaddrs; + TAILQ_FOREACH(xbe, &test->xbind_addrs, link) { + ai0 = xbe->ai; + for (ai = ai0; ai; ai = ai->ai_next) { + if (domain != AF_UNSPEC && domain != ai->ai_family) + continue; + memcpy(bp, ai->ai_addr, ai->ai_addrlen); + bp += ai->ai_addrlen; + } + } + + if (sctp_bindx(s, xaddrs, nxaddrs, SCTP_BINDX_ADD_ADDR) == -1) { + close(s); + free(xaddrs); + i_errno = IESETSCTPBINDX; + retval = -1; + goto out; + } + + free(xaddrs); + retval = 0; + +out: + /* client: put head node back. */ + if (!is_server && xbe0) + TAILQ_INSERT_HEAD(&test->xbind_addrs, xbe0, link); + + TAILQ_FOREACH(xbe, &test->xbind_addrs, link) { + if (xbe->ai) { + freeaddrinfo(xbe->ai); + xbe->ai = NULL; + } + } + + return retval; +#else + i_errno = IENOSCTP; + return -1; +#endif /* HAVE_SCTP */ +} diff --git a/src/iperf_sctp.h b/src/iperf_sctp.h index b3f4fc4a1..764c410df 100644 --- a/src/iperf_sctp.h +++ b/src/iperf_sctp.h @@ -60,4 +60,9 @@ int iperf_sctp_connect(struct iperf_test *); int iperf_sctp_init(struct iperf_test *test); +#define IPERF_SCTP_CLIENT 0 +#define IPERF_SCTP_SERVER 1 + +int iperf_sctp_bindx(struct iperf_test *test, int s, int is_server); + #endif