Skip to content

Commit

Permalink
af_xdp: add run as inhibit mode
Browse files Browse the repository at this point in the history
Signed-off-by: Ric Li <[email protected]>
  • Loading branch information
ricmli committed Nov 3, 2023
1 parent 8b35cc9 commit 783d90b
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 35 deletions.
71 changes: 70 additions & 1 deletion lib/src/dev/mt_af_xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/sockios.h>
#include <sys/un.h>
#include <xdp/xsk.h>

#include "../mt_flow.h"
Expand Down Expand Up @@ -329,6 +330,67 @@ static int xdp_rx_prod_init(struct mt_xdp_queue* xq) {
return 0;
}

static int xdp_socket_update_xskmap(struct mt_xdp_queue* xq) {
enum mtl_port port = xq->port;
uint16_t q = xq->q;
struct sockaddr_un server;
int ret;
int xsks_map_fd = -1;

int sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (sock < 0) {
err("%s(%d,%u), unix socket create fail, %s\n", __func__, port, q, strerror(errno));
return errno;
}

server.sun_family = AF_UNIX;
snprintf(server.sun_path, sizeof(server.sun_path), "/var/run/et_xdp.sock");

if (connect(sock, (struct sockaddr*)&server, sizeof(struct sockaddr_un)) < 0) {
close(sock);
err("%s(%d,%u), connect socket fail, %s\n", __func__, port, q, strerror(errno));
return errno;
}

char cms[CMSG_SPACE(sizeof(int))];
struct cmsghdr* cmsg;
struct msghdr msg;
struct iovec iov;
int value;
int len;

iov.iov_base = &value;
iov.iov_len = sizeof(int);

memset(&msg, 0, sizeof(msg));
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = (caddr_t)cms;
msg.msg_controllen = sizeof(cms);

len = recvmsg(sock, &msg, 0);

if (len <= 0) {
err("%s(%d,%u), recvmsg wrong length %d\n", __func__, port, q, len);
return -EINVAL;
}

cmsg = CMSG_FIRSTHDR(&msg);
xsks_map_fd = *(int*)CMSG_DATA(cmsg);
if (xsks_map_fd < 0) {
err("%s(%d,%u), get xsks_map_fd fail, %s\n", __func__, port, q, strerror(errno));
return errno;
}

ret = xsk_socket__update_xskmap(xq->socket, xsks_map_fd);
if (ret) {
err("%s(%d,%u), get xsks_map_fd fail, %d\n", __func__, port, q, ret);
return ret;
}

return 0;
}

static int xdp_socket_init(struct mt_xdp_priv* xdp, struct mt_xdp_queue* xq) {
enum mtl_port port = xq->port;
uint16_t q = xq->q;
Expand All @@ -340,17 +402,24 @@ static int xdp_socket_init(struct mt_xdp_priv* xdp, struct mt_xdp_queue* xq) {
cfg.rx_size = mt_if_nb_rx_desc(impl, port);
cfg.tx_size = mt_if_nb_tx_desc(impl, port);
cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
if (true /* no root */) /* this will skip load xdp prog */
cfg.libxdp_flags = XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD;
// cfg.bind_flags = XDP_USE_NEED_WAKEUP;

const char* if_name = mt_kernel_if_name(impl, port);
ret = xsk_socket__create(&xq->socket, if_name, q, xq->umem, &xq->rx_cons, &xq->tx_prod,
&cfg);
if (ret < 0) {
if (ret == -EPERM) {
err("%s(%d,%u), please run as inhibit mode or root user\n", __func__, port, q);
}
err("%s(%d,%u), xsk create fail %d\n", __func__, port, q, ret);
return ret;
}

xq->socket_fd = xsk_socket__fd(xq->socket);

if (true /* no root */) return xdp_socket_update_xskmap(xq);

return 0;
}

Expand Down
139 changes: 105 additions & 34 deletions lib/src/mt_socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,27 +83,36 @@ int mt_socket_set_if_ip(const char* if_name, uint8_t ip[MTL_IP_ADDR_LEN],
}

int mt_socket_get_if_gateway(const char* if_name, uint8_t gateway[MTL_IP_ADDR_LEN]) {
char cmd[256];
char out[256];
FILE* fp = fopen("/proc/net/route", "r");
char line[100], iface[IF_NAMESIZE], dest[9], gway[9];

snprintf(cmd, sizeof(cmd), "route -n | grep 'UG' | grep '%s' | awk '{print $2}'",
if_name);
int ret = mt_run_cmd(cmd, out, sizeof(out));
if (ret < 0) return ret;
if (fp == NULL) {
err("%s, open /proc/net/route fail\n", __func__);
return -EIO;
}

uint8_t a, b, c, d;
ret = sscanf(out, "%" SCNu8 ".%" SCNu8 ".%" SCNu8 ".%" SCNu8 "", &a, &b, &c, &d);
if (ret < 0) {
info("%s, cmd: %s fail\n", __func__, cmd);
return ret;
/* skip header line */
if (!fgets(line, sizeof(line), fp)) {
err("%s, empty file\n", __func__);
fclose(fp);
return -EIO;
}

dbg("%s, cmd %s out %s\n", __func__, cmd, out);
gateway[0] = a;
gateway[1] = b;
gateway[2] = c;
gateway[3] = d;
return 0;
while (fgets(line, sizeof(line), fp)) {
sscanf(line, "%s %s %s", iface, dest, gway);
if (strcmp(iface, if_name) == 0 && strcmp(dest, "00000000") == 0) {
for (int i = 0; i < MTL_IP_ADDR_LEN; ++i) {
int byte;
sscanf(gway + (MTL_IP_ADDR_LEN - 1 - i) * 2, "%2x", &byte);
gateway[i] = byte;
}
fclose(fp);
return 0;
}
}

fclose(fp);
return -EIO;
}

int mt_socket_get_if_mac(const char* if_name, struct rte_ether_addr* ea) {
Expand Down Expand Up @@ -385,10 +394,8 @@ int mt_socket_get_mac(struct mtl_main_impl* impl, const char* if_name,
int mt_socket_add_flow(struct mtl_main_impl* impl, enum mtl_port port, uint16_t queue_id,
struct mt_rxq_flow* flow) {
struct ifreq ifr;
struct ethtool_rxnfc cmd;
struct ethtool_rx_flow_spec* fs = (void*)&cmd.data;
int ret, fd;
int flow_id = -1;
int free_loc = -1, flow_id = -1;
uint8_t start_queue = mt_afxdp_start_queue(impl, port);
const char* if_name = mt_kernel_if_name(impl, port);
bool has_ip_flow = true;
Expand All @@ -412,37 +419,100 @@ int mt_socket_add_flow(struct mtl_main_impl* impl, enum mtl_port port, uint16_t
}
}

/* open control socket */
fd = socket(AF_INET, SOCK_DGRAM, 0);
if (fd < 0) {
err("%s(%d), cannot get control socket: %d\n", __func__, port, fd);
return fd;
}

memset(&ifr, 0, sizeof(ifr));
memset(&cmd, 0, sizeof(cmd));
strncpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name) - 1);
struct ethtool_rxnfc cmd;
memset(&cmd, 0, sizeof(cmd));

/* get the free location */
cmd.cmd = ETHTOOL_GRXCLSRLCNT;
ifr.ifr_data = (void*)&cmd;
ret = ioctl(fd, SIOCETHTOOL, &ifr);
if (ret < 0) {
err("%s(%d), cannot get free location: %d\n", __func__, port, ret);
close(fd);
return ret;
}

struct ethtool_rxnfc* cmd_w_rules;
cmd_w_rules = calloc(1, sizeof(*cmd_w_rules) + cmd.rule_cnt * sizeof(uint32_t));
cmd_w_rules->cmd = ETHTOOL_GRXCLSRLALL;
cmd_w_rules->rule_cnt = cmd.rule_cnt;
ifr.ifr_data = (void*)cmd_w_rules;
ret = ioctl(fd, SIOCETHTOOL, &ifr);
if (ret < 0) {
err("%s(%d), cannot get rules: %d\n", __func__, port, ret);
close(fd);
free(cmd_w_rules);
return ret;
}

uint32_t rule_size = cmd_w_rules->data;
free_loc = rule_size - 1;
while (free_loc > 0) {
bool used = false;
for (int i = 0; i < cmd.rule_cnt; i++) {
if (cmd_w_rules->rule_locs[i] == free_loc) {
used = true;
break;
}
}
if (used)
free_loc--;
else {
info("%s(%d), found free location: %d\n", __func__, port, free_loc);
break;
}
}
if (free_loc == 0) {
err("%s(%d), cannot find free location\n", __func__, port);
close(fd);
free(cmd_w_rules);
return -EIO;
}

free(cmd_w_rules);

/* set the flow rule */
memset(&cmd, 0, sizeof(cmd));
cmd.cmd = ETHTOOL_SRXCLSRLINS;
struct ethtool_rx_flow_spec* fs = &cmd.fs;
fs->flow_type = UDP_V4_FLOW;
fs->m_u.udp_ip4_spec.pdst = 0xFFFF;
fs->h_u.udp_ip4_spec.pdst = htons(flow->dst_port);
if (has_ip_flow) {
fs->h_u.udp_ip4_spec.ip4dst = *(uint32_t*)flow->dip_addr;
if (!mt_is_multicast_ip(flow->dip_addr))
fs->h_u.udp_ip4_spec.ip4src = *(uint32_t*)flow->sip_addr;
fs->m_u.udp_ip4_spec.ip4dst = 0xFFFFFFFF;
if (mt_is_multicast_ip(flow->dip_addr)) {
rte_memcpy(&fs->h_u.udp_ip4_spec.ip4dst, flow->dip_addr, MTL_IP_ADDR_LEN);
} else {
fs->m_u.udp_ip4_spec.ip4src = 0xFFFFFFFF;
rte_memcpy(&fs->h_u.udp_ip4_spec.ip4src, flow->dip_addr, MTL_IP_ADDR_LEN);
rte_memcpy(&fs->h_u.udp_ip4_spec.ip4dst, flow->sip_addr, MTL_IP_ADDR_LEN);
}
}
fs->ring_cookie = queue_id + start_queue;
fs->location = 0xFFFFFFFF;
cmd.cmd = ETHTOOL_SRXCLSRLINS;
fd = socket(AF_INET, SOCK_DGRAM, 0);
if (fd < 0) {
err("%s(%d), cannot get control socket: %d\n", __func__, port, fd);
return fd;
}
fs->location = free_loc; /* for some NICs the location must be set */

ifr.ifr_data = (caddr_t)&cmd;
ifr.ifr_data = (void*)&cmd;
ret = ioctl(fd, SIOCETHTOOL, &ifr);
if (ret < 0) {
err("%s(%d), cannot insert classifier: %s, start_queue %u, if %s\n", __func__, port,
strerror(errno), start_queue, if_name);
if (ret == -EPERM)
err("%s(%d), please add capability for the app: sudo setcap 'cap_net_admin+ep' "
"<app>\n",
__func__, port);
close(fd);
return ret;
}
flow_id = cmd.fs.location;
flow_id = fs->location;

close(fd);

Expand All @@ -456,7 +526,7 @@ int mt_socket_remove_flow(struct mtl_main_impl* impl, enum mtl_port port, int fl
const char* if_name = mt_kernel_if_name(impl, port);

if (flow_id <= 0) {
warn("%s(%d), flow_id %d is invalid\n", __func__, port, flow_id);
dbg("%s(%d), flow_id %d is invalid\n", __func__, port, flow_id);
return -EINVAL;
}

Expand All @@ -477,6 +547,7 @@ int mt_socket_remove_flow(struct mtl_main_impl* impl, enum mtl_port port, int fl

ret = ioctl(fd, SIOCETHTOOL, &ifr);
if (ret < 0) {
err("%s(%d), cannot remove classifier: %s\n", __func__, port, strerror(errno));
if (ret == -EPERM)
err("%s(%d), please add capability for the app: sudo setcap 'cap_net_admin+ep' "
"<app>\n",
Expand Down

0 comments on commit 783d90b

Please sign in to comment.