diff --git a/lib/src/dev/mt_af_xdp.c b/lib/src/dev/mt_af_xdp.c index 08dcbf2a9..266ead09b 100644 --- a/lib/src/dev/mt_af_xdp.c +++ b/lib/src/dev/mt_af_xdp.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "../mt_flow.h" @@ -329,6 +330,67 @@ static int xdp_rx_prod_init(struct mt_xdp_queue* xq) { return 0; } +static int xdp_socket_update_xskmap(struct mt_xdp_queue* xq) { + enum mtl_port port = xq->port; + uint16_t q = xq->q; + struct sockaddr_un server; + int ret; + int xsks_map_fd = -1; + + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock < 0) { + err("%s(%d,%u), unix socket create fail, %s\n", __func__, port, q, strerror(errno)); + return errno; + } + + server.sun_family = AF_UNIX; + snprintf(server.sun_path, sizeof(server.sun_path), "/var/run/et_xdp.sock"); + + if (connect(sock, (struct sockaddr*)&server, sizeof(struct sockaddr_un)) < 0) { + close(sock); + err("%s(%d,%u), connect socket fail, %s\n", __func__, port, q, strerror(errno)); + return errno; + } + + char cms[CMSG_SPACE(sizeof(int))]; + struct cmsghdr* cmsg; + struct msghdr msg; + struct iovec iov; + int value; + int len; + + iov.iov_base = &value; + iov.iov_len = sizeof(int); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = (caddr_t)cms; + msg.msg_controllen = sizeof(cms); + + len = recvmsg(sock, &msg, 0); + + if (len <= 0) { + err("%s(%d,%u), recvmsg wrong length %d\n", __func__, port, q, len); + return -EINVAL; + } + + cmsg = CMSG_FIRSTHDR(&msg); + xsks_map_fd = *(int*)CMSG_DATA(cmsg); + if (xsks_map_fd < 0) { + err("%s(%d,%u), get xsks_map_fd fail, %s\n", __func__, port, q, strerror(errno)); + return errno; + } + + ret = xsk_socket__update_xskmap(xq->socket, xsks_map_fd); + if (ret) { + err("%s(%d,%u), get xsks_map_fd fail, %d\n", __func__, port, q, ret); + return ret; + } + + return 0; +} + static int xdp_socket_init(struct mt_xdp_priv* xdp, struct mt_xdp_queue* xq) { enum mtl_port port = xq->port; uint16_t q = xq->q; @@ -340,17 +402,24 @@ static int xdp_socket_init(struct mt_xdp_priv* xdp, struct mt_xdp_queue* xq) { cfg.rx_size = mt_if_nb_rx_desc(impl, port); cfg.tx_size = mt_if_nb_tx_desc(impl, port); cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + if (true /* no root */) /* this will skip load xdp prog */ + cfg.libxdp_flags = XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD; // cfg.bind_flags = XDP_USE_NEED_WAKEUP; const char* if_name = mt_kernel_if_name(impl, port); ret = xsk_socket__create(&xq->socket, if_name, q, xq->umem, &xq->rx_cons, &xq->tx_prod, &cfg); if (ret < 0) { + if (ret == -EPERM) { + err("%s(%d,%u), please run as inhibit mode or root user\n", __func__, port, q); + } err("%s(%d,%u), xsk create fail %d\n", __func__, port, q, ret); return ret; } - xq->socket_fd = xsk_socket__fd(xq->socket); + + if (true /* no root */) return xdp_socket_update_xskmap(xq); + return 0; } diff --git a/lib/src/mt_socket.c b/lib/src/mt_socket.c index 4d72fba43..4b411c5c1 100644 --- a/lib/src/mt_socket.c +++ b/lib/src/mt_socket.c @@ -83,27 +83,36 @@ int mt_socket_set_if_ip(const char* if_name, uint8_t ip[MTL_IP_ADDR_LEN], } int mt_socket_get_if_gateway(const char* if_name, uint8_t gateway[MTL_IP_ADDR_LEN]) { - char cmd[256]; - char out[256]; + FILE* fp = fopen("/proc/net/route", "r"); + char line[100], iface[IF_NAMESIZE], dest[9], gway[9]; - snprintf(cmd, sizeof(cmd), "route -n | grep 'UG' | grep '%s' | awk '{print $2}'", - if_name); - int ret = mt_run_cmd(cmd, out, sizeof(out)); - if (ret < 0) return ret; + if (fp == NULL) { + err("%s, open /proc/net/route fail\n", __func__); + return -EIO; + } - uint8_t a, b, c, d; - ret = sscanf(out, "%" SCNu8 ".%" SCNu8 ".%" SCNu8 ".%" SCNu8 "", &a, &b, &c, &d); - if (ret < 0) { - info("%s, cmd: %s fail\n", __func__, cmd); - return ret; + /* skip header line */ + if (!fgets(line, sizeof(line), fp)) { + err("%s, empty file\n", __func__); + fclose(fp); + return -EIO; } - dbg("%s, cmd %s out %s\n", __func__, cmd, out); - gateway[0] = a; - gateway[1] = b; - gateway[2] = c; - gateway[3] = d; - return 0; + while (fgets(line, sizeof(line), fp)) { + sscanf(line, "%s %s %s", iface, dest, gway); + if (strcmp(iface, if_name) == 0 && strcmp(dest, "00000000") == 0) { + for (int i = 0; i < MTL_IP_ADDR_LEN; ++i) { + int byte; + sscanf(gway + (MTL_IP_ADDR_LEN - 1 - i) * 2, "%2x", &byte); + gateway[i] = byte; + } + fclose(fp); + return 0; + } + } + + fclose(fp); + return -EIO; } int mt_socket_get_if_mac(const char* if_name, struct rte_ether_addr* ea) { @@ -385,10 +394,8 @@ int mt_socket_get_mac(struct mtl_main_impl* impl, const char* if_name, int mt_socket_add_flow(struct mtl_main_impl* impl, enum mtl_port port, uint16_t queue_id, struct mt_rxq_flow* flow) { struct ifreq ifr; - struct ethtool_rxnfc cmd; - struct ethtool_rx_flow_spec* fs = (void*)&cmd.data; int ret, fd; - int flow_id = -1; + int free_loc = -1, flow_id = -1; uint8_t start_queue = mt_afxdp_start_queue(impl, port); const char* if_name = mt_kernel_if_name(impl, port); bool has_ip_flow = true; @@ -412,29 +419,92 @@ int mt_socket_add_flow(struct mtl_main_impl* impl, enum mtl_port port, uint16_t } } + /* open control socket */ + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + err("%s(%d), cannot get control socket: %d\n", __func__, port, fd); + return fd; + } + memset(&ifr, 0, sizeof(ifr)); - memset(&cmd, 0, sizeof(cmd)); strncpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name) - 1); + struct ethtool_rxnfc cmd; + memset(&cmd, 0, sizeof(cmd)); + + /* get the free location */ + cmd.cmd = ETHTOOL_GRXCLSRLCNT; + ifr.ifr_data = (void*)&cmd; + ret = ioctl(fd, SIOCETHTOOL, &ifr); + if (ret < 0) { + err("%s(%d), cannot get free location: %d\n", __func__, port, ret); + close(fd); + return ret; + } + struct ethtool_rxnfc* cmd_w_rules; + cmd_w_rules = calloc(1, sizeof(*cmd_w_rules) + cmd.rule_cnt * sizeof(uint32_t)); + cmd_w_rules->cmd = ETHTOOL_GRXCLSRLALL; + cmd_w_rules->rule_cnt = cmd.rule_cnt; + ifr.ifr_data = (void*)cmd_w_rules; + ret = ioctl(fd, SIOCETHTOOL, &ifr); + if (ret < 0) { + err("%s(%d), cannot get rules: %d\n", __func__, port, ret); + close(fd); + free(cmd_w_rules); + return ret; + } + + uint32_t rule_size = cmd_w_rules->data; + free_loc = rule_size - 1; + while (free_loc > 0) { + bool used = false; + for (int i = 0; i < cmd.rule_cnt; i++) { + if (cmd_w_rules->rule_locs[i] == free_loc) { + used = true; + break; + } + } + if (used) + free_loc--; + else { + info("%s(%d), found free location: %d\n", __func__, port, free_loc); + break; + } + } + if (free_loc == 0) { + err("%s(%d), cannot find free location\n", __func__, port); + close(fd); + free(cmd_w_rules); + return -EIO; + } + + free(cmd_w_rules); + + /* set the flow rule */ + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = ETHTOOL_SRXCLSRLINS; + struct ethtool_rx_flow_spec* fs = &cmd.fs; fs->flow_type = UDP_V4_FLOW; + fs->m_u.udp_ip4_spec.pdst = 0xFFFF; fs->h_u.udp_ip4_spec.pdst = htons(flow->dst_port); if (has_ip_flow) { - fs->h_u.udp_ip4_spec.ip4dst = *(uint32_t*)flow->dip_addr; - if (!mt_is_multicast_ip(flow->dip_addr)) - fs->h_u.udp_ip4_spec.ip4src = *(uint32_t*)flow->sip_addr; + fs->m_u.udp_ip4_spec.ip4dst = 0xFFFFFFFF; + if (mt_is_multicast_ip(flow->dip_addr)) { + rte_memcpy(&fs->h_u.udp_ip4_spec.ip4dst, flow->dip_addr, MTL_IP_ADDR_LEN); + } else { + fs->m_u.udp_ip4_spec.ip4src = 0xFFFFFFFF; + rte_memcpy(&fs->h_u.udp_ip4_spec.ip4src, flow->dip_addr, MTL_IP_ADDR_LEN); + rte_memcpy(&fs->h_u.udp_ip4_spec.ip4dst, flow->sip_addr, MTL_IP_ADDR_LEN); + } } fs->ring_cookie = queue_id + start_queue; - fs->location = 0xFFFFFFFF; - cmd.cmd = ETHTOOL_SRXCLSRLINS; - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - err("%s(%d), cannot get control socket: %d\n", __func__, port, fd); - return fd; - } + fs->location = free_loc; /* for some NICs the location must be set */ - ifr.ifr_data = (caddr_t)&cmd; + ifr.ifr_data = (void*)&cmd; ret = ioctl(fd, SIOCETHTOOL, &ifr); if (ret < 0) { + err("%s(%d), cannot insert classifier: %s, start_queue %u, if %s\n", __func__, port, + strerror(errno), start_queue, if_name); if (ret == -EPERM) err("%s(%d), please add capability for the app: sudo setcap 'cap_net_admin+ep' " "\n", @@ -442,7 +512,7 @@ int mt_socket_add_flow(struct mtl_main_impl* impl, enum mtl_port port, uint16_t close(fd); return ret; } - flow_id = cmd.fs.location; + flow_id = fs->location; close(fd); @@ -456,7 +526,7 @@ int mt_socket_remove_flow(struct mtl_main_impl* impl, enum mtl_port port, int fl const char* if_name = mt_kernel_if_name(impl, port); if (flow_id <= 0) { - warn("%s(%d), flow_id %d is invalid\n", __func__, port, flow_id); + dbg("%s(%d), flow_id %d is invalid\n", __func__, port, flow_id); return -EINVAL; } @@ -477,6 +547,7 @@ int mt_socket_remove_flow(struct mtl_main_impl* impl, enum mtl_port port, int fl ret = ioctl(fd, SIOCETHTOOL, &ifr); if (ret < 0) { + err("%s(%d), cannot remove classifier: %s\n", __func__, port, strerror(errno)); if (ret == -EPERM) err("%s(%d), please add capability for the app: sudo setcap 'cap_net_admin+ep' " "\n",