Changes in 4.9.331 uas: add no-uas quirk for Hiksemi usb_disk usb-storage: Add Hiksemi USB3-FW to IGNORE_UAS uas: ignore UAS for Thinkplus chips net: usb: qmi_wwan: Add new usb-id for Dell branded EM7455 ntfs: fix BUG_ON in ntfs_lookup_inode_by_name() mmc: moxart: fix 4-bit bus width and remove 8-bit bus width mm: prevent page_frag_alloc() from corrupting the memory Revert "drm: bridge: analogix/dp: add panel prepare/unprepare in suspend/resume time" Input: melfas_mip4 - fix return value check in mip4_probe() usbnet: Fix memory leak in usbnet_disconnect() nvme: add new line after variable declatation nvme: Fix IOC_PR_CLEAR and IOC_PR_RELEASE ioctls for nvme devices selftests: Fix the if conditions of in test_extra_filter() clk: iproc: Minor tidy up of iproc pll data structures clk: iproc: Do not rely on node name for correct PLL setup Makefile.extrawarn: Move -Wcast-function-type-strict to W=1 ARM: fix function graph tracer and unwinder dependencies fs: fix UAF/GPF bug in nilfs_mdt_destroy dmaengine: xilinx_dma: cleanup for fetching xlnx,num-fstores property dmaengine: xilinx_dma: Report error in case of dma_set_mask_and_coherent API failure ARM: dts: fix Moxa SDIO 'compatible', remove 'sdhci' misnomer net/ieee802154: fix uninit value bug in dgram_sendmsg um: Cleanup syscall_handler_t cast in syscalls_32.h um: Cleanup compiler warning in arch/x86/um/tls_32.c usb: mon: make mmapped memory read only USB: serial: ftdi_sio: fix 300 bps rate for SIO nilfs2: fix NULL pointer dereference at nilfs_bmap_lookup_at_level() nilfs2: fix leak of nilfs_root in case of writer thread creation failure nilfs2: replace WARN_ONs by nilfs_error for checkpoint acquisition failure ceph: don't truncate file in atomic_open random: clamp credited irq bits to maximum mixed ALSA: hda: Fix position reporting on Poulsbo scsi: stex: Properly zero out the passthrough command structure USB: serial: qcserial: add new usb-id for Dell branded EM7455 random: avoid reading two cache lines on irq randomness wifi: mac80211_hwsim: avoid mac80211 warning on bad rate random: restore O_NONBLOCK support Input: xpad - add supported devices as contributed on github Input: xpad - fix wireless 360 controller breaking after suspend random: use expired timer rather than wq for mixing fast pool ALSA: oss: Fix potential deadlock at unregistration ALSA: rawmidi: Drop register_mutex in snd_rawmidi_free() ALSA: usb-audio: Fix potential memory leaks ALSA: usb-audio: Fix NULL dererence at error path iio: dac: ad5593r: Fix i2c read protocol requirements fs: dlm: fix race between test_bit() and queue_work() fs: dlm: handle -EBUSY first in lock arg validation quota: Check next/prev free block number after reading from quota file regulator: qcom_rpm: Fix circular deferral regression parisc: fbdev/stifb: Align graphics memory size to 4MB UM: cpuinfo: Fix a warning for CONFIG_CPUMASK_OFFSTACK PCI: Sanitise firmware BAR assignments behind a PCI-PCI bridge fbdev: smscufx: Fix use-after-free in ufx_ops_open() nilfs2: fix use-after-free bug of struct nilfs_root ext4: avoid crash when inline data creation follows DIO write ext4: fix null-ptr-deref in ext4_write_info ext4: make ext4_lazyinit_thread freezable ext4: place buffer head allocation before handle start ring-buffer: Allow splice to read previous partially read pages ring-buffer: Check pending waiters when doing wake ups as well ring-buffer: Fix race between reset page and reading page KVM: x86/emulator: Fix handing of POP SS to correctly set interruptibility selinux: use "grep -E" instead of "egrep" sh: machvec: Use char[] for section boundaries wifi: ath10k: add peer map clean up for peer delete in ath10k_sta_state() wifi: mac80211: allow bw change during channel switch in mesh wifi: rtl8xxxu: tighten bounds checking in rtl8xxxu_read_efuse() spi: qup: add missing clk_disable_unprepare on error in spi_qup_resume() spi: qup: add missing clk_disable_unprepare on error in spi_qup_pm_resume_runtime() wifi: rtl8xxxu: gen2: Fix mistake in path B IQ calibration net: fs_enet: Fix wrong check in do_pd_setup spi/omap100k:Fix PM disable depth imbalance in omap1_spi100k_probe mISDN: fix use-after-free bugs in l1oip timer handlers tcp: fix tcp_cwnd_validate() to not forget is_cwnd_limited net: rds: don't hold sock lock when cancelling work from rds_tcp_reset_callbacks() bnx2x: fix potential memory leak in bnx2x_tpa_stop() drm/mipi-dsi: Detach devices when removing the host platform/x86: msi-laptop: Fix old-ec check for backlight registering mmc: au1xmmc: Fix an error handling path in au1xmmc_probe() ASoC: eureka-tlv320: Hold reference returned from of_find_xxx API ALSA: dmaengine: increment buffer pointer atomically memory: of: Fix refcount leak bug in of_get_ddr_timings() soc: qcom: smsm: Fix refcount leak bugs in qcom_smsm_probe() soc: qcom: smem_state: Add refcounting for the 'state->of_node' ARM: dts: kirkwood: lsxl: fix serial line ARM: dts: kirkwood: lsxl: remove first ethernet port ARM: Drop CMDLINE_* dependency on ATAGS ARM: dts: exynos: fix polarity of VBUS GPIO of Origen iio: adc: at91-sama5d2_adc: fix AT91_SAMA5D2_MR_TRACKTIM_MAX iio: inkern: only release the device node when done with it iio: ABI: Fix wrong format of differential capacitance channel ABI. clk: tegra: Fix refcount leak in tegra210_clock_init clk: tegra: Fix refcount leak in tegra114_clock_init clk: tegra20: Fix refcount leak in tegra20_clock_init HSI: omap_ssi: Fix refcount leak in ssi_probe HSI: omap_ssi_port: Fix dma_map_sg error check media: exynos4-is: fimc-is: Add of_node_put() when breaking out of loop tty: xilinx_uartps: Fix the ignore_status media: xilinx: vipp: Fix refcount leak in xvip_graph_dma_init RDMA/rxe: Fix "kernel NULL pointer dereference" error RDMA/rxe: Fix the error caused by qp->sk dyndbg: fix module.dyndbg handling dyndbg: let query-modname override actual module name ata: fix ata_id_sense_reporting_enabled() and ata_id_has_sense_reporting() ata: fix ata_id_has_devslp() ata: fix ata_id_has_ncq_autosense() ata: fix ata_id_has_dipm() drivers: serial: jsm: fix some leaks in probe firmware: google: Test spinlock on panic path to avoid lockups serial: 8250: Fix restoring termios speed after suspend mfd: intel_soc_pmic: Fix an error handling path in intel_soc_pmic_i2c_probe() mfd: lp8788: Fix an error handling path in lp8788_probe() mfd: lp8788: Fix an error handling path in lp8788_irq_init() and lp8788_irq_init() mfd: sm501: Add check for platform_driver_register() dmaengine: ioat: stop mod_timer from resurrecting deleted timer in __cleanup() clk: bcm2835: fix bcm2835_clock_rate_from_divisor declaration clk: ti: dra7-atl: Fix reference leak in of_dra7_atl_clk_probe powerpc/math_emu/efp: Include module.h powerpc/pci_dn: Add missing of_node_put() powerpc: Fix SPE Power ISA properties for e500v1 platforms iommu/omap: Fix buffer overflow in debugfs f2fs: fix race condition on setting FI_NO_EXTENT flag ACPI: video: Add Toshiba Satellite/Portege Z830 quirk MIPS: BCM47XX: Cast memcmp() of function to (void *) powercap: intel_rapl: fix UBSAN shift-out-of-bounds issue thermal: intel_powerclamp: Use get_cpu() instead of smp_processor_id() to avoid crash openvswitch: Fix double reporting of drops in dropwatch openvswitch: Fix overreporting of drops in dropwatch tcp: annotate data-race around tcp_md5sig_pool_populated xfrm: Update ipcomp_scratches with NULL when freed Bluetooth: L2CAP: initialize delayed works at l2cap_chan_create() Bluetooth: hci_sysfs: Fix attempting to call device_add multiple times can: bcm: check the result of can_send() in bcm_can_tx() wifi: rt2x00: don't run Rt5592 IQ calibration on MT7620 Bluetooth: L2CAP: Fix user-after-free r8152: Rate limit overflow messages drm: Use size_t type for len variable in drm_copy_field() drm: Prevent drm_copy_field() to attempt copying a NULL pointer platform/x86: msi-laptop: Change DMI match / alias strings to fix module autoloading drm/amdgpu: fix initial connector audio value ARM: dts: imx7d-sdb: config the max pressure for tsc2046 ARM: dts: imx6q: add missing properties for sram ARM: dts: imx6dl: add missing properties for sram ARM: dts: imx6qp: add missing properties for sram ARM: dts: imx6sl: add missing properties for sram media: cx88: Fix a null-ptr-deref bug in buffer_prepare() scsi: 3w-9xxx: Avoid disabling device if failing to enable it HID: roccat: Fix use-after-free in roccat_read() usb: host: xhci: Fix potential memory leak in xhci_alloc_stream_info() usb: musb: Fix musb_gadget.c rxstate overflow bug Revert "usb: storage: Add quirk for Samsung Fit flash" usb: idmouse: fix an uninit-value in idmouse_open perf intel-pt: Fix segfault in intel_pt_print_info() with uClibc net: ieee802154: return -EINVAL for unknown addr type net/ieee802154: don't warn zero-sized raw_sendmsg() ext4: continue to expand file system when the target size doesn't reach inet: fully convert sk->sk_rx_dst to RCU rules thermal: intel_powerclamp: Use first online CPU as control_cpu gcov: support GCC 12.1 and newer compilers Linux 4.9.331 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I105d6215a29d200abe3330f328ce3c2009ba0df9
642 lines
17 KiB
C
642 lines
17 KiB
C
/*
|
|
* Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
|
|
* a BPF program (both classic and extended) to read the first word from an
|
|
* incoming packet (expected to be in network byte-order), calculate a modulus
|
|
* of that number, and then dispatch the packet to the Nth socket using the
|
|
* result. These tests are run for each supported address family and protocol.
|
|
* Additionally, a few edge cases in the implementation are tested.
|
|
*/
|
|
|
|
#include <errno.h>
|
|
#include <error.h>
|
|
#include <fcntl.h>
|
|
#include <linux/bpf.h>
|
|
#include <linux/filter.h>
|
|
#include <linux/unistd.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/tcp.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/epoll.h>
|
|
#include <sys/types.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/resource.h>
|
|
#include <unistd.h>
|
|
|
|
#ifndef ARRAY_SIZE
|
|
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
|
#endif
|
|
|
|
struct test_params {
|
|
int recv_family;
|
|
int send_family;
|
|
int protocol;
|
|
size_t recv_socks;
|
|
uint16_t recv_port;
|
|
uint16_t send_port_min;
|
|
};
|
|
|
|
static size_t sockaddr_size(void)
|
|
{
|
|
return sizeof(struct sockaddr_storage);
|
|
}
|
|
|
|
static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
|
|
{
|
|
struct sockaddr_storage *addr;
|
|
struct sockaddr_in *addr4;
|
|
struct sockaddr_in6 *addr6;
|
|
|
|
addr = malloc(sizeof(struct sockaddr_storage));
|
|
memset(addr, 0, sizeof(struct sockaddr_storage));
|
|
|
|
switch (family) {
|
|
case AF_INET:
|
|
addr4 = (struct sockaddr_in *)addr;
|
|
addr4->sin_family = AF_INET;
|
|
addr4->sin_addr.s_addr = htonl(INADDR_ANY);
|
|
addr4->sin_port = htons(port);
|
|
break;
|
|
case AF_INET6:
|
|
addr6 = (struct sockaddr_in6 *)addr;
|
|
addr6->sin6_family = AF_INET6;
|
|
addr6->sin6_addr = in6addr_any;
|
|
addr6->sin6_port = htons(port);
|
|
break;
|
|
default:
|
|
error(1, 0, "Unsupported family %d", family);
|
|
}
|
|
return (struct sockaddr *)addr;
|
|
}
|
|
|
|
static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
|
|
{
|
|
struct sockaddr *addr = new_any_sockaddr(family, port);
|
|
struct sockaddr_in *addr4;
|
|
struct sockaddr_in6 *addr6;
|
|
|
|
switch (family) {
|
|
case AF_INET:
|
|
addr4 = (struct sockaddr_in *)addr;
|
|
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
|
break;
|
|
case AF_INET6:
|
|
addr6 = (struct sockaddr_in6 *)addr;
|
|
addr6->sin6_addr = in6addr_loopback;
|
|
break;
|
|
default:
|
|
error(1, 0, "Unsupported family %d", family);
|
|
}
|
|
return addr;
|
|
}
|
|
|
|
static void attach_ebpf(int fd, uint16_t mod)
|
|
{
|
|
static char bpf_log_buf[65536];
|
|
static const char bpf_license[] = "GPL";
|
|
|
|
int bpf_fd;
|
|
const struct bpf_insn prog[] = {
|
|
/* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
|
|
{ BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
|
|
/* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
|
|
{ BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
|
|
/* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
|
|
{ BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
|
|
/* BPF_EXIT_INSN() */
|
|
{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
|
|
};
|
|
union bpf_attr attr;
|
|
|
|
memset(&attr, 0, sizeof(attr));
|
|
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
|
|
attr.insn_cnt = ARRAY_SIZE(prog);
|
|
attr.insns = (unsigned long) &prog;
|
|
attr.license = (unsigned long) &bpf_license;
|
|
attr.log_buf = (unsigned long) &bpf_log_buf;
|
|
attr.log_size = sizeof(bpf_log_buf);
|
|
attr.log_level = 1;
|
|
attr.kern_version = 0;
|
|
|
|
bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
|
|
if (bpf_fd < 0)
|
|
error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
|
|
|
|
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
|
|
sizeof(bpf_fd)))
|
|
error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
|
|
|
|
close(bpf_fd);
|
|
}
|
|
|
|
static void attach_cbpf(int fd, uint16_t mod)
|
|
{
|
|
struct sock_filter code[] = {
|
|
/* A = (uint32_t)skb[0] */
|
|
{ BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
|
|
/* A = A % mod */
|
|
{ BPF_ALU | BPF_MOD, 0, 0, mod },
|
|
/* return A */
|
|
{ BPF_RET | BPF_A, 0, 0, 0 },
|
|
};
|
|
struct sock_fprog p = {
|
|
.len = ARRAY_SIZE(code),
|
|
.filter = code,
|
|
};
|
|
|
|
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
|
|
error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
|
|
}
|
|
|
|
static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
|
|
void (*attach_bpf)(int, uint16_t))
|
|
{
|
|
struct sockaddr * const addr =
|
|
new_any_sockaddr(p.recv_family, p.recv_port);
|
|
int i, opt;
|
|
|
|
for (i = 0; i < p.recv_socks; ++i) {
|
|
fd[i] = socket(p.recv_family, p.protocol, 0);
|
|
if (fd[i] < 0)
|
|
error(1, errno, "failed to create recv %d", i);
|
|
|
|
opt = 1;
|
|
if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
|
|
sizeof(opt)))
|
|
error(1, errno, "failed to set SO_REUSEPORT on %d", i);
|
|
|
|
if (i == 0)
|
|
attach_bpf(fd[i], mod);
|
|
|
|
if (bind(fd[i], addr, sockaddr_size()))
|
|
error(1, errno, "failed to bind recv socket %d", i);
|
|
|
|
if (p.protocol == SOCK_STREAM) {
|
|
opt = 4;
|
|
if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
|
|
sizeof(opt)))
|
|
error(1, errno,
|
|
"failed to set TCP_FASTOPEN on %d", i);
|
|
if (listen(fd[i], p.recv_socks * 10))
|
|
error(1, errno, "failed to listen on socket");
|
|
}
|
|
}
|
|
free(addr);
|
|
}
|
|
|
|
static void send_from(struct test_params p, uint16_t sport, char *buf,
|
|
size_t len)
|
|
{
|
|
struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
|
|
struct sockaddr * const daddr =
|
|
new_loopback_sockaddr(p.send_family, p.recv_port);
|
|
const int fd = socket(p.send_family, p.protocol, 0), one = 1;
|
|
|
|
if (fd < 0)
|
|
error(1, errno, "failed to create send socket");
|
|
|
|
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
|
|
error(1, errno, "failed to set reuseaddr");
|
|
|
|
if (bind(fd, saddr, sockaddr_size()))
|
|
error(1, errno, "failed to bind send socket");
|
|
|
|
if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
|
|
error(1, errno, "failed to send message");
|
|
|
|
close(fd);
|
|
free(saddr);
|
|
free(daddr);
|
|
}
|
|
|
|
static void test_recv_order(const struct test_params p, int fd[], int mod)
|
|
{
|
|
char recv_buf[8], send_buf[8];
|
|
struct msghdr msg;
|
|
struct iovec recv_io = { recv_buf, 8 };
|
|
struct epoll_event ev;
|
|
int epfd, conn, i, sport, expected;
|
|
uint32_t data, ndata;
|
|
|
|
epfd = epoll_create(1);
|
|
if (epfd < 0)
|
|
error(1, errno, "failed to create epoll");
|
|
for (i = 0; i < p.recv_socks; ++i) {
|
|
ev.events = EPOLLIN;
|
|
ev.data.fd = fd[i];
|
|
if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
|
|
error(1, errno, "failed to register sock %d epoll", i);
|
|
}
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
msg.msg_iov = &recv_io;
|
|
msg.msg_iovlen = 1;
|
|
|
|
for (data = 0; data < p.recv_socks * 2; ++data) {
|
|
sport = p.send_port_min + data;
|
|
ndata = htonl(data);
|
|
memcpy(send_buf, &ndata, sizeof(ndata));
|
|
send_from(p, sport, send_buf, sizeof(ndata));
|
|
|
|
i = epoll_wait(epfd, &ev, 1, -1);
|
|
if (i < 0)
|
|
error(1, errno, "epoll wait failed");
|
|
|
|
if (p.protocol == SOCK_STREAM) {
|
|
conn = accept(ev.data.fd, NULL, NULL);
|
|
if (conn < 0)
|
|
error(1, errno, "error accepting");
|
|
i = recvmsg(conn, &msg, 0);
|
|
close(conn);
|
|
} else {
|
|
i = recvmsg(ev.data.fd, &msg, 0);
|
|
}
|
|
if (i < 0)
|
|
error(1, errno, "recvmsg error");
|
|
if (i != sizeof(ndata))
|
|
error(1, 0, "expected size %zd got %d",
|
|
sizeof(ndata), i);
|
|
|
|
for (i = 0; i < p.recv_socks; ++i)
|
|
if (ev.data.fd == fd[i])
|
|
break;
|
|
memcpy(&ndata, recv_buf, sizeof(ndata));
|
|
fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
|
|
|
|
expected = (sport % mod);
|
|
if (i != expected)
|
|
error(1, 0, "expected socket %d", expected);
|
|
}
|
|
}
|
|
|
|
static void test_reuseport_ebpf(struct test_params p)
|
|
{
|
|
int i, fd[p.recv_socks];
|
|
|
|
fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
|
|
build_recv_group(p, fd, p.recv_socks, attach_ebpf);
|
|
test_recv_order(p, fd, p.recv_socks);
|
|
|
|
p.send_port_min += p.recv_socks * 2;
|
|
fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
|
|
attach_ebpf(fd[0], p.recv_socks / 2);
|
|
test_recv_order(p, fd, p.recv_socks / 2);
|
|
|
|
for (i = 0; i < p.recv_socks; ++i)
|
|
close(fd[i]);
|
|
}
|
|
|
|
static void test_reuseport_cbpf(struct test_params p)
|
|
{
|
|
int i, fd[p.recv_socks];
|
|
|
|
fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
|
|
build_recv_group(p, fd, p.recv_socks, attach_cbpf);
|
|
test_recv_order(p, fd, p.recv_socks);
|
|
|
|
p.send_port_min += p.recv_socks * 2;
|
|
fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
|
|
attach_cbpf(fd[0], p.recv_socks / 2);
|
|
test_recv_order(p, fd, p.recv_socks / 2);
|
|
|
|
for (i = 0; i < p.recv_socks; ++i)
|
|
close(fd[i]);
|
|
}
|
|
|
|
static void test_extra_filter(const struct test_params p)
|
|
{
|
|
struct sockaddr * const addr =
|
|
new_any_sockaddr(p.recv_family, p.recv_port);
|
|
int fd1, fd2, opt;
|
|
|
|
fprintf(stderr, "Testing too many filters...\n");
|
|
fd1 = socket(p.recv_family, p.protocol, 0);
|
|
if (fd1 < 0)
|
|
error(1, errno, "failed to create socket 1");
|
|
fd2 = socket(p.recv_family, p.protocol, 0);
|
|
if (fd2 < 0)
|
|
error(1, errno, "failed to create socket 2");
|
|
|
|
opt = 1;
|
|
if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
|
|
error(1, errno, "failed to set SO_REUSEPORT on socket 1");
|
|
if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
|
|
error(1, errno, "failed to set SO_REUSEPORT on socket 2");
|
|
|
|
attach_ebpf(fd1, 10);
|
|
attach_ebpf(fd2, 10);
|
|
|
|
if (bind(fd1, addr, sockaddr_size()))
|
|
error(1, errno, "failed to bind recv socket 1");
|
|
|
|
if (!bind(fd2, addr, sockaddr_size()) || errno != EADDRINUSE)
|
|
error(1, errno, "bind socket 2 should fail with EADDRINUSE");
|
|
|
|
free(addr);
|
|
}
|
|
|
|
static void test_filter_no_reuseport(const struct test_params p)
|
|
{
|
|
struct sockaddr * const addr =
|
|
new_any_sockaddr(p.recv_family, p.recv_port);
|
|
const char bpf_license[] = "GPL";
|
|
struct bpf_insn ecode[] = {
|
|
{ BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
|
|
{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
|
|
};
|
|
struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
|
|
union bpf_attr eprog;
|
|
struct sock_fprog cprog;
|
|
int fd, bpf_fd;
|
|
|
|
fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
|
|
|
|
memset(&eprog, 0, sizeof(eprog));
|
|
eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
|
|
eprog.insn_cnt = ARRAY_SIZE(ecode);
|
|
eprog.insns = (unsigned long) &ecode;
|
|
eprog.license = (unsigned long) &bpf_license;
|
|
eprog.kern_version = 0;
|
|
|
|
memset(&cprog, 0, sizeof(cprog));
|
|
cprog.len = ARRAY_SIZE(ccode);
|
|
cprog.filter = ccode;
|
|
|
|
|
|
bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
|
|
if (bpf_fd < 0)
|
|
error(1, errno, "ebpf error");
|
|
fd = socket(p.recv_family, p.protocol, 0);
|
|
if (fd < 0)
|
|
error(1, errno, "failed to create socket 1");
|
|
|
|
if (bind(fd, addr, sockaddr_size()))
|
|
error(1, errno, "failed to bind recv socket 1");
|
|
|
|
errno = 0;
|
|
if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
|
|
sizeof(bpf_fd)) || errno != EINVAL)
|
|
error(1, errno, "setsockopt should have returned EINVAL");
|
|
|
|
errno = 0;
|
|
if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
|
|
sizeof(cprog)) || errno != EINVAL)
|
|
error(1, errno, "setsockopt should have returned EINVAL");
|
|
|
|
free(addr);
|
|
}
|
|
|
|
static void test_filter_without_bind(void)
|
|
{
|
|
int fd1, fd2, opt = 1;
|
|
|
|
fprintf(stderr, "Testing filter add without bind...\n");
|
|
fd1 = socket(AF_INET, SOCK_DGRAM, 0);
|
|
if (fd1 < 0)
|
|
error(1, errno, "failed to create socket 1");
|
|
fd2 = socket(AF_INET, SOCK_DGRAM, 0);
|
|
if (fd2 < 0)
|
|
error(1, errno, "failed to create socket 2");
|
|
if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
|
|
error(1, errno, "failed to set SO_REUSEPORT on socket 1");
|
|
if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
|
|
error(1, errno, "failed to set SO_REUSEPORT on socket 2");
|
|
|
|
attach_ebpf(fd1, 10);
|
|
attach_cbpf(fd2, 10);
|
|
|
|
close(fd1);
|
|
close(fd2);
|
|
}
|
|
|
|
void enable_fastopen(void)
|
|
{
|
|
int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
|
|
int rw_mask = 3; /* bit 1: client side; bit-2 server side */
|
|
int val, size;
|
|
char buf[16];
|
|
|
|
if (fd < 0)
|
|
error(1, errno, "Unable to open tcp_fastopen sysctl");
|
|
if (read(fd, buf, sizeof(buf)) <= 0)
|
|
error(1, errno, "Unable to read tcp_fastopen sysctl");
|
|
val = atoi(buf);
|
|
close(fd);
|
|
|
|
if ((val & rw_mask) != rw_mask) {
|
|
fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
|
|
if (fd < 0)
|
|
error(1, errno,
|
|
"Unable to open tcp_fastopen sysctl for writing");
|
|
val |= rw_mask;
|
|
size = snprintf(buf, 16, "%d", val);
|
|
if (write(fd, buf, size) <= 0)
|
|
error(1, errno, "Unable to write tcp_fastopen sysctl");
|
|
close(fd);
|
|
}
|
|
}
|
|
|
|
static struct rlimit rlim_old;
|
|
|
|
static __attribute__((constructor)) void main_ctor(void)
|
|
{
|
|
getrlimit(RLIMIT_MEMLOCK, &rlim_old);
|
|
|
|
if (rlim_old.rlim_cur != RLIM_INFINITY) {
|
|
struct rlimit rlim_new;
|
|
|
|
rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
|
|
rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
|
|
setrlimit(RLIMIT_MEMLOCK, &rlim_new);
|
|
}
|
|
}
|
|
|
|
static __attribute__((destructor)) void main_dtor(void)
|
|
{
|
|
setrlimit(RLIMIT_MEMLOCK, &rlim_old);
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
fprintf(stderr, "---- IPv4 UDP ----\n");
|
|
/* NOTE: UDP socket lookups traverse a different code path when there
|
|
* are > 10 sockets in a group. Run the bpf test through both paths.
|
|
*/
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8000,
|
|
.send_port_min = 9000});
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 20,
|
|
.recv_port = 8000,
|
|
.send_port_min = 9000});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8001,
|
|
.send_port_min = 9020});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 20,
|
|
.recv_port = 8001,
|
|
.send_port_min = 9020});
|
|
test_extra_filter((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_port = 8002});
|
|
test_filter_no_reuseport((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_port = 8008});
|
|
|
|
fprintf(stderr, "---- IPv6 UDP ----\n");
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET6,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8003,
|
|
.send_port_min = 9040});
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET6,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 20,
|
|
.recv_port = 8003,
|
|
.send_port_min = 9040});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET6,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8004,
|
|
.send_port_min = 9060});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET6,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 20,
|
|
.recv_port = 8004,
|
|
.send_port_min = 9060});
|
|
test_extra_filter((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_port = 8005});
|
|
test_filter_no_reuseport((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_port = 8009});
|
|
|
|
fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 20,
|
|
.recv_port = 8006,
|
|
.send_port_min = 9080});
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8006,
|
|
.send_port_min = 9080});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8007,
|
|
.send_port_min = 9100});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_DGRAM,
|
|
.recv_socks = 20,
|
|
.recv_port = 8007,
|
|
.send_port_min = 9100});
|
|
|
|
/* TCP fastopen is required for the TCP tests */
|
|
enable_fastopen();
|
|
fprintf(stderr, "---- IPv4 TCP ----\n");
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8008,
|
|
.send_port_min = 9120});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8009,
|
|
.send_port_min = 9160});
|
|
test_extra_filter((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_port = 8010});
|
|
test_filter_no_reuseport((struct test_params) {
|
|
.recv_family = AF_INET,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_port = 8011});
|
|
|
|
fprintf(stderr, "---- IPv6 TCP ----\n");
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET6,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8012,
|
|
.send_port_min = 9200});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET6,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8013,
|
|
.send_port_min = 9240});
|
|
test_extra_filter((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_port = 8014});
|
|
test_filter_no_reuseport((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_port = 8015});
|
|
|
|
fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
|
|
test_reuseport_ebpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8016,
|
|
.send_port_min = 9320});
|
|
test_reuseport_cbpf((struct test_params) {
|
|
.recv_family = AF_INET6,
|
|
.send_family = AF_INET,
|
|
.protocol = SOCK_STREAM,
|
|
.recv_socks = 10,
|
|
.recv_port = 8017,
|
|
.send_port_min = 9360});
|
|
|
|
test_filter_without_bind();
|
|
|
|
fprintf(stderr, "SUCCESS\n");
|
|
return 0;
|
|
}
|