Skip to content

Commit

Permalink
tools/ebpf: add xdp xsk_map_fd server (#557)
Browse files Browse the repository at this point in the history
AF_XDP backend can run without root privilege with following
steps:

1. add capabilities for app: sudo setcap 'cap_net_admin+ep
cap_net_raw+ep' ./build/app/RxTxApp
2. start ebpf tool for xdp loading: sudo ./et --prog xdp --ifname
ens785f0,ens785f1
3. start the app without root user or sudo
4. you can exit ebpf tool if you do not create af_xdp sockets any more

---------

Signed-off-by: Ric Li <[email protected]>
  • Loading branch information
ricmli authored Nov 6, 2023
1 parent a131aae commit 9da4df0
Show file tree
Hide file tree
Showing 7 changed files with 264 additions and 34 deletions.
80 changes: 79 additions & 1 deletion lib/src/dev/mt_af_xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/sockios.h>
#include <sys/un.h>
#include <xdp/xsk.h>

#include "../mt_flow.h"
Expand Down Expand Up @@ -339,6 +340,76 @@ static int xdp_rx_prod_init(struct mt_xdp_queue* xq) {
return 0;
}

static int xdp_socket_update_xskmap(struct mt_xdp_queue* xq, const char* ifname) {
enum mtl_port port = xq->port;
uint16_t q = xq->q;
struct sockaddr_un server;
int ret;
int xsks_map_fd = -1;

int sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (sock < 0) {
err("%s(%d,%u), unix socket create fail, %s\n", __func__, port, q, strerror(errno));
return errno;
}

server.sun_family = AF_UNIX;
snprintf(server.sun_path, sizeof(server.sun_path), "/var/run/et_xdp.sock");

if (connect(sock, (struct sockaddr*)&server, sizeof(struct sockaddr_un)) < 0) {
close(sock);
err("%s(%d,%u), connect socket fail, %s\n", __func__, port, q, strerror(errno));
return errno;
}

send(sock, ifname, IFNAMSIZ, 0);

char cms[CMSG_SPACE(sizeof(int))];
struct cmsghdr* cmsg;
struct msghdr msg;
struct iovec iov;
int value;
int len;

iov.iov_base = &value;
iov.iov_len = sizeof(int);

memset(&msg, 0, sizeof(msg));
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = (caddr_t)cms;
msg.msg_controllen = sizeof(cms);

len = recvmsg(sock, &msg, 0);

close(sock);

if (len <= 0) {
err("%s(%d,%u), recvmsg wrong length %d\n", __func__, port, q, len);
return -EINVAL;
}

cmsg = CMSG_FIRSTHDR(&msg);
if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS ||
cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
err("%s(%d,%u), invalid cmsg for map fd\n", __func__, port, q);
return -EINVAL;
}
xsks_map_fd = *(int*)CMSG_DATA(cmsg);
if (xsks_map_fd < 0) {
err("%s(%d,%u), get xsks_map_fd fail, %s\n", __func__, port, q, strerror(errno));
return errno;
}

ret = xsk_socket__update_xskmap(xq->socket, xsks_map_fd);
if (ret) {
err("%s(%d,%u), get xsks_map_fd fail, %d\n", __func__, port, q, ret);
return ret;
}

return 0;
}

static int xdp_socket_init(struct mt_xdp_priv* xdp, struct mt_xdp_queue* xq) {
enum mtl_port port = xq->port;
uint16_t q = xq->q;
Expand All @@ -350,17 +421,24 @@ static int xdp_socket_init(struct mt_xdp_priv* xdp, struct mt_xdp_queue* xq) {
cfg.rx_size = mt_if_nb_rx_desc(impl, port);
cfg.tx_size = mt_if_nb_tx_desc(impl, port);
cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
if (!mt_is_privileged(impl)) /* this will skip load xdp prog */
cfg.libxdp_flags = XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD;
// cfg.bind_flags = XDP_USE_NEED_WAKEUP;

const char* if_name = mt_kernel_if_name(impl, port);
ret = xsk_socket__create(&xq->socket, if_name, q, xq->umem, &xq->rx_cons, &xq->tx_prod,
&cfg);
if (ret < 0) {
if (ret == -EPERM) {
err("%s(%d,%u), please run as inhibit mode or root user\n", __func__, port, q);
}
err("%s(%d,%u), xsk create fail %d\n", __func__, port, q, ret);
return ret;
}

xq->socket_fd = xsk_socket__fd(xq->socket);

if (!mt_is_privileged(impl)) return xdp_socket_update_xskmap(xq, if_name);

return 0;
}

Expand Down
9 changes: 9 additions & 0 deletions lib/src/mt_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,15 @@ mtl_handle mtl_init(struct mtl_init_params* p) {
impl = mt_rte_zmalloc_socket(sizeof(*impl), socket[MTL_PORT_P]);
if (!impl) goto err_exit;

#ifndef WINDOWSENV
if (geteuid() == 0)
impl->privileged = true;
else
impl->privileged = false;
#else
impl->privileged = true;
#endif

rte_memcpy(&impl->user_para, p, sizeof(*p));
impl->var_para.sch_default_sleep_us = 1 * US_PER_MS; /* default 1ms */
/* use sleep zero if sleep us is smaller than this thresh */
Expand Down
5 changes: 5 additions & 0 deletions lib/src/mt_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -1139,12 +1139,17 @@ struct mtl_main_impl {
int mempool_idx;

int arp_timeout_ms;
bool privileged; /* if app running with root privilege */
};

static inline struct mtl_init_params* mt_get_user_params(struct mtl_main_impl* impl) {
return &impl->user_para;
}

static inline bool mt_is_privileged(struct mtl_main_impl* impl) {
return impl->privileged;
}

static inline struct mt_interface* mt_if(struct mtl_main_impl* impl, enum mtl_port port) {
return &impl->inf[port];
}
Expand Down
2 changes: 1 addition & 1 deletion tools/ebpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ vmlinux.h:
SKEL_FILES := $(patsubst %.bpf.c,%.skel.h,$(wildcard *.bpf.c))

et: et.c $(SKEL_FILES)
gcc -Wall -o $@ $(filter %.c,$^) -include $(SKEL_FILES) -l:libbpf.a -lelf -lz
gcc -Wall -o $@ $(filter %.c,$^) -include $(SKEL_FILES) -lxdp -l:libbpf.a -lelf -lz
8 changes: 7 additions & 1 deletion tools/ebpf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This directory contains tools for eBPF.

## Build

Dependencies: libbpf, bpftool, clang, llvm, gcc, libelf, zlib.
Dependencies: libbpf, libxdp, bpftool, clang, llvm, gcc, libelf, zlib.

```bash
make
Expand All @@ -17,3 +17,9 @@ fentry: a simple program to trace udp_send_skb calls, requires kernel > 5.5.
```bash
sudo ./et --prog fentry [--print]
```

xdp: a privileged program to load afxdp bpf program and send the xsk_map_fd to other processes by socket.

```bash
sudo ./et --prog xdp --ifname ens785f0
```
163 changes: 132 additions & 31 deletions tools/ebpf/et.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,18 @@

#include <bpf/libbpf.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <net/if.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/un.h>
#include <unistd.h>

enum et_args_cmd {
ET_ARG_UNKNOWN = 0,
ET_ARG_PRINT_LIBBPF = 0x100, /* start from end of ascii */
ET_ARG_PROG,
ET_ARG_HELP,
};

enum et_prog_type {
ET_PROG_UNKNOWN = 0,
ET_PROG_FENTRY,
ET_PROG_KPROBE,
ET_PROG_TRACEPOINT,
ET_PROG_XDP,
};

static const char* prog_type_str[] = {
[ET_PROG_FENTRY] = "fentry",
[ET_PROG_KPROBE] = "kprobe",
[ET_PROG_TRACEPOINT] = "tracepoint",
[ET_PROG_XDP] = "xdp",
};

struct et_ctx {
enum et_prog_type prog_type;
};
#include <xdp/xsk.h>

static volatile bool stop = false;

Expand Down Expand Up @@ -65,7 +46,7 @@ static int udp_send_handler(void* ctx, void* data, size_t data_sz) {
return 0;
}

static inline int et_fentry_loop() {
static int et_fentry_loop() {
struct ring_buffer* rb = NULL;
struct fentry_bpf* skel;
int ret = 0;
Expand Down Expand Up @@ -109,18 +90,125 @@ static inline int et_fentry_loop() {
return ret;
}

static int send_fd(int sock, int fd) {
struct msghdr msg;
struct iovec iov[1];
struct cmsghdr* cmsg = NULL;
char ctrl_buf[CMSG_SPACE(sizeof(int))];
char data[1];

memset(&msg, 0, sizeof(struct msghdr));
memset(ctrl_buf, 0, CMSG_SPACE(sizeof(int)));

data[0] = ' ';
iov[0].iov_base = data;
iov[0].iov_len = sizeof(data);

msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_controllen = CMSG_SPACE(sizeof(int));
msg.msg_control = ctrl_buf;

cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));

*((int*)CMSG_DATA(cmsg)) = fd;

return sendmsg(sock, &msg, 0);
}

static int et_xdp_loop(struct et_ctx* ctx) {
struct sockaddr_un addr;
int ret = 0;
int xsks_map_fd[ctx->xdp_if_cnt];
int sock = -1, conn;

if (ctx->xdp_if_cnt <= 0) {
printf("please specify interfaces with --ifname <a,b,...>\n");
return -EIO;
}

/* load xdp program for each interface */
for (int i = 0; i < ctx->xdp_if_cnt; i++) {
ret = xsk_setup_xdp_prog(ctx->xdp_ifindex[i], &xsks_map_fd[i]);
if (ret || xsks_map_fd[i] < 0) {
printf("xsk_socket__bind failed\n");
goto cleanup;
}
}

sock = socket(AF_UNIX, SOCK_STREAM, 0);
unlink(ET_XDP_UNIX_SOCKET_PATH);

int flags = fcntl(sock, F_GETFL, 0);
fcntl(sock, F_SETFL, flags | O_NONBLOCK);

memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strcpy(addr.sun_path, ET_XDP_UNIX_SOCKET_PATH);
bind(sock, (struct sockaddr*)&addr, sizeof(addr));

chmod(ET_XDP_UNIX_SOCKET_PATH, 0666); /* allow non-root user to connect */

listen(sock, 1);

while (!stop) {
printf("waiting socket connection...\n");
conn = accept(sock, NULL, 0);
if (conn < 0) {
if (errno != EAGAIN && errno != EWOULDBLOCK) {
perror("accept error");
ret = -1;
goto cleanup;
}
sleep(1);
continue;
}
char ifname[IFNAMSIZ];
int map_fd = -1;
recv(conn, ifname, sizeof(ifname), 0);
printf("request xsk_map_fd for ifname %s\n", ifname);
int ifindex = if_nametoindex(ifname);
for (int i = 0; i < ctx->xdp_if_cnt; i++) {
if (ctx->xdp_ifindex[i] == ifindex) {
map_fd = xsks_map_fd[i];
break;
}
}
if (map_fd < 0) {
printf("xsk_map_fd not found for %s\n", ifname);
goto cleanup;
}
send_fd(conn, map_fd);
close(conn);
printf("map_fd %d sent, close conn\n", map_fd);
}

cleanup:
if (sock >= 0) close(sock);
return ret;
}

static struct option et_args_options[] = {{"print", no_argument, 0, ET_ARG_PRINT_LIBBPF},
{"prog", required_argument, 0, ET_ARG_PROG},
{"ifname", required_argument, 0, ET_ARG_IFNAME},
{"help", no_argument, 0, ET_ARG_HELP},
{0, 0, 0, 0}};

static void et_print_help() {
printf("\n");
printf("##### Usage: #####\n\n");
printf(" Params:\n");
printf(" --help : print this help\n");
printf(" --print : print libbpf output\n");
printf(" --prog <type> : attach to prog <type>\n");
printf(" --help : print this help\n");
printf(" --print : print libbpf output\n");
printf(" --prog <type> : attach to prog <type>\n");
printf(
" --ifname <name1,name2> : interface names which XDP program will be attached "
"to\n");
printf("\n");
}

Expand All @@ -135,11 +223,22 @@ static int et_parse_args(struct et_ctx* ctx, int argc, char** argv) {
case ET_ARG_PROG:
if (strcmp(optarg, "fentry") == 0) {
ctx->prog_type = ET_PROG_FENTRY;
} else if (strcmp(optarg, "xdp") == 0) {
ctx->prog_type = ET_PROG_XDP;
}
break;
case ET_ARG_PRINT_LIBBPF:
libbpf_set_print(libbpf_print_fn);
break;
case ET_ARG_IFNAME:
char* ifname;
ctx->xdp_if_cnt = 0;
ifname = strtok(optarg, ",");
while (ifname) {
ctx->xdp_ifindex[ctx->xdp_if_cnt++] = if_nametoindex(ifname);
ifname = strtok(NULL, ",");
}
break;
case ET_ARG_HELP:
default:
et_print_help();
Expand All @@ -160,7 +259,9 @@ int main(int argc, char** argv) {
case ET_PROG_FENTRY:
et_fentry_loop();
break;

case ET_PROG_XDP:
et_xdp_loop(&ctx);
break;
default:
break;
}
Expand Down
Loading

0 comments on commit 9da4df0

Please sign in to comment.