From 9c67a44edea74b584e123ef2cd096a9594e291db Mon Sep 17 00:00:00 2001 From: Bill Nguyen Date: Wed, 30 Oct 2024 15:57:53 +1100 Subject: [PATCH] refactor net uio driver and improve logging Signed-off-by: Bill Nguyen --- tools/linux/include/uio/net.h | 23 +++ tools/linux/uio_drivers/net/main.c | 241 ++++++++++++----------------- 2 files changed, 120 insertions(+), 144 deletions(-) diff --git a/tools/linux/include/uio/net.h b/tools/linux/include/uio/net.h index 8616aa64..7aae130b 100644 --- a/tools/linux/include/uio/net.h +++ b/tools/linux/include/uio/net.h @@ -12,6 +12,29 @@ typedef struct { /* Any info that the VMM wants to give us go in here */ + + /* We need these as the network virtualisers give us physical addresses + in the data region, so we need the base to get the offsets to actually + read the data. */ uint64_t rx_paddr; uint64_t tx_paddrs[NUM_NETWORK_CLIENTS]; } vmm_net_info_t; + +/* These are where the UIO shared memory regions and IRQs live */ +#define UIO_PATH_SDDF_NET_CONTROL_AND_DATA_QUEUES "/dev/uio0" + +/* This is how the VMM notify us of notifications from virt TX and RX. + Once IRQ is enabled by writing to a UIO FD from these, a read on the same + FD will block until the VMM does a virq_inject on the associated IRQ number + in device tree. */ +#define UIO_PATH_SDDF_NET_INCOMING_TX_IRQ "/dev/uio1" +#define UIO_PATH_SDDF_NET_INCOMING_RX_IRQ "/dev/uio2" + +/* Data passing between the VMM and guest. Currently it is used for passing the + physical addresses of the data regions */ +#define UIO_PATH_SDDF_NET_SHARED_DATA "/dev/uio3" + +/* This is how the guest can signal back to VMM, the physical address of these + UIO regions are unmapped, so a fault is generated */ +#define UIO_PATH_SDDF_NET_TX_FAULT_TO_VMM "/dev/uio4" +#define UIO_PATH_SDDF_NET_RX_FAULT_TO_VMM "/dev/uio5" diff --git a/tools/linux/uio_drivers/net/main.c b/tools/linux/uio_drivers/net/main.c index 94325b74..d80b1797 100644 --- a/tools/linux/uio_drivers/net/main.c +++ b/tools/linux/uio_drivers/net/main.c @@ -73,26 +73,26 @@ struct ifreq ifr; int create_promiscuous_socket(void) { int sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); if (sockfd == -1) { - perror("socket"); + perror("create_promiscuous_socket(): socket()"); + LOG_NET_ERR("could not create socket.\n"); exit(EXIT_FAILURE); } strncpy(ifr.ifr_name, NET_INTERFACE, IFNAMSIZ); - if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) { - perror("ioctl SIOCGIFINDEX"); + perror("create_promiscuous_socket(): ioctl()"); + LOG_NET_ERR("could not get network interface index.\n"); exit(EXIT_FAILURE); } int ifindex = ifr.ifr_ifindex; - // Enable promiscuous mode struct packet_mreq mr; memset(&mr, 0, sizeof(mr)); mr.mr_ifindex = ifindex; mr.mr_type = PACKET_MR_PROMISC; - if (setsockopt(sockfd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) == -1) { - perror("setsockopt PACKET_ADD_MEMBERSHIP"); + perror("create_promiscuous_socket(): setsockopt()"); + LOG_NET_ERR("could not set net device to promiscuous mode.\n"); exit(EXIT_FAILURE); } @@ -101,9 +101,9 @@ int create_promiscuous_socket(void) { bind_address.sll_family = AF_PACKET; bind_address.sll_protocol = htons(ETH_P_ALL); bind_address.sll_ifindex = ifindex; - if (bind(sockfd, (struct sockaddr *)&bind_address, sizeof(bind_address)) == -1) { - perror("bind"); + perror("create_promiscuous_socket(): bind()"); + LOG_NET_ERR("could not set net device to promiscuous mode.\n"); exit(EXIT_FAILURE); } @@ -113,10 +113,9 @@ int create_promiscuous_socket(void) { int create_epoll(void) { epoll_fd = epoll_create1(0); if (epoll_fd == -1) { + perror("create_epoll(): epoll_create1()"); LOG_NET_ERR("can't create the epoll fd.\n"); - exit(1); - } else { - LOG_NET("created epoll fd %d\n", epoll_fd); + exit(EXIT_FAILURE); } return epoll_fd; } @@ -126,20 +125,18 @@ void bind_fd_to_epoll(int fd, int epollfd) { sock_event.events = EPOLLIN; sock_event.data.fd = fd; if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &sock_event) == -1) { - LOG_NET_ERR("can't register fd %d to epoll.\n", fd); - exit(1); - } else { - LOG_NET("registered fd %d to epoll\n", fd); + perror("bind_fd_to_epoll(): epoll_ctl()"); + LOG_NET_ERR("can't register fd %d to epoll fd %d.\n", fd, epollfd); + exit(EXIT_FAILURE); } } int open_uio(const char *abs_path) { int fd = open(abs_path, O_RDWR); if (fd == -1) { + perror("open_uio(): open()"); LOG_NET_ERR("can't open uio @ %s.\n", abs_path); - exit(1); - } else { - LOG_NET("opened uio %s with fd %d\n", abs_path, fd); + exit(EXIT_FAILURE); } return fd; } @@ -147,30 +144,28 @@ int open_uio(const char *abs_path) { char *map_uio(uint64_t length, int uiofd) { void *base = (char *) mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, uiofd, 0); if (base == MAP_FAILED) { + perror("map_uio(): mmap()"); LOG_NET_ERR("can't mmap uio fd %d\n", uiofd); - exit(1); - } else { - LOG_NET("mmap uio success for fd %d\n", uiofd); + exit(EXIT_FAILURE); } - return (char *) base; } void uio_interrupt_ack(int uiofd) { uint32_t enable = 1; if (write(uiofd, &enable, sizeof(uint32_t)) != sizeof(uint32_t)) { - LOG_NET_ERR("Failed to Enable interrupts on uio fd %d\n", uiofd); - exit(1); + perror("uio_interrupt_ack(): write()"); + LOG_NET_ERR("Failed to write enable/ack interrupts on uio fd %d\n", uiofd); + exit(EXIT_FAILURE); } - fsync(uiofd); } void tx_process(void) { while (!net_queue_empty_active(&tx_queue)) { net_buff_desc_t tx_buffer; if (net_dequeue_active(&tx_queue, &tx_buffer) != 0) { - LOG_NET_ERR("couldn't dequeue active TX buffer, err is %d, quitting.\n"); - exit(1); + LOG_NET_ERR("couldn't dequeue active TX buffer, sddf err is %d, quitting.\n"); + exit(EXIT_FAILURE); } // Workout which client it is from, so we can get the offset in data region @@ -189,55 +184,71 @@ void tx_process(void) { } if (!tx_data_paddr_found) { - LOG_NET_ERR("couldn't find corresponding client for DMA addr 0x%p, quitting.\n", dma_tx_addr); - exit(1); + LOG_NET_ERR("couldn't find corresponding client for DMA addr %p, quitting.\n", dma_tx_addr); + exit(EXIT_FAILURE); } char *tx_data_base = tx_datas_drv[tx_client]; char *tx_data = (char *) ((uintptr_t) tx_data_base + tx_data_offset); // Blocking send! - // Prepare the sockaddr_ll structure struct sockaddr_ll sa; memset(&sa, 0, sizeof(sa)); - sa.sll_family = AF_PACKET; // Layer 2 - sa.sll_protocol = htons(ETH_P_ALL); // Set the protocol - sa.sll_ifindex = ifr.ifr_ifindex; // Interface index + sa.sll_family = AF_PACKET; + sa.sll_protocol = htons(ETH_P_ALL); + sa.sll_ifindex = ifr.ifr_ifindex; sa.sll_halen = ETH_ALEN; int sent_bytes = sendto(sock_fd, tx_data, tx_buffer.len, 0, (struct sockaddr*)&sa, sizeof(sa));; if (sent_bytes != tx_buffer.len) { - perror("sendto"); + perror("tx_process(): sendto()"); LOG_NET_ERR("TX sent %d != expected %d. qutting.\n", sent_bytes, tx_buffer.len); - exit(1); + exit(EXIT_FAILURE); } - struct ethhdr *eth_header = (struct ethhdr *) tx_data; - // printf("sent a frame of length %d from client #%d, ", sent_bytes, tx_client); - // printf("source MAC: %02x:%02x:%02x:%02x:%02x:%02x\n", - // eth_header->h_source[0], - // eth_header->h_source[1], - // eth_header->h_source[2], - // eth_header->h_source[3], - // eth_header->h_source[4], - // eth_header->h_source[5] - // ); - // printf("destination MAC: %02x:%02x:%02x:%02x:%02x:%02x\n", - // eth_header->h_dest[0], - // eth_header->h_dest[1], - // eth_header->h_dest[2], - // eth_header->h_dest[3], - // eth_header->h_dest[4], - // eth_header->h_dest[5] - // ); - if (net_enqueue_free(&tx_queue, tx_buffer) != 0) { - LOG_NET_ERR("Couldn't return free TX buffer into the free queue. quiting\n"); - exit(1); + LOG_NET_ERR("Couldn't return free TX buffer into the sddf tx free queue. quiting\n"); + exit(EXIT_FAILURE); } } + + net_request_signal_active(&tx_queue); + *sddf_net_tx_outgoing_irq_fault_vaddr = 0; } +void rx_process(void) { + if (net_queue_empty_free(&rx_queue)) { + LOG_NET_WARN("Received a frame but RX free queue is empty. Dropping the frame.\n"); + return; + } + + net_buff_desc_t buffer; + int err = net_dequeue_free(&rx_queue, &buffer); + if (err) { + LOG_NET_ERR("couldn't dequeue a free RX buf\n"); + exit(EXIT_FAILURE); + } + + // Convert DMA addr from virtualiser to offset + uintptr_t offset = buffer.io_or_offset - vmm_info_passing->rx_paddr; + char *buf_in_sddf_rx_data = (char *) ((uintptr_t) rx_data_drv + offset); + + // Write to the data buffer + int num_bytes = recv(sock_fd, buf_in_sddf_rx_data, ETH_FRAME_LEN, 0); + if (num_bytes < 0) { + perror("rx_process(): recv()"); + LOG_NET_ERR("couldnt recv from raw sock, offset is %p, buf_in_sddf_rx_data is %p\n", offset, buf_in_sddf_rx_data); + exit(EXIT_FAILURE); + } + + // Enqueue it to the active queue + buffer.len = num_bytes; + if (net_enqueue_active(&rx_queue, buffer) != 0) { + LOG_NET_ERR("couldn't enqueue active RX buffer, quitting.\n"); + exit(EXIT_FAILURE); + } + *sddf_net_rx_outgoing_irq_fault_vaddr = 0; +} int main(int argc, char **argv) { @@ -245,15 +256,18 @@ int main(int argc, char **argv) LOG_NET("*** Setting up raw promiscuous socket\n"); sock_fd = create_promiscuous_socket(); - // bind_sock_to_net_inf(sock_fd); - LOG_NET("*** Binding socket to epoll\n"); + LOG_NET("*** Creating epoll\n"); epoll_fd = create_epoll(); + + LOG_NET("*** Binding socket to epoll\n"); bind_fd_to_epoll(sock_fd, epoll_fd); LOG_NET("*** Mapping in sDDF control and data queues\n"); - uio_sddf_net_queues_fd = open_uio("/dev/uio0"); - sddf_net_queues_vaddr = map_uio((NET_DATA_REGION_CAPACITY * 4) + (NET_DATA_REGION_CAPACITY * (1 + NUM_NETWORK_CLIENTS)), uio_sddf_net_queues_fd); + uio_sddf_net_queues_fd = open_uio(UIO_PATH_SDDF_NET_CONTROL_AND_DATA_QUEUES); + // tx active+free + rx active+free common rx data and per client tx data + uint64_t sddf_net_control_and_data_size = (NET_DATA_REGION_CAPACITY * 4) + (NET_DATA_REGION_CAPACITY * (1 + NUM_NETWORK_CLIENTS)); + sddf_net_queues_vaddr = map_uio(sddf_net_control_and_data_size, uio_sddf_net_queues_fd); LOG_NET("*** Setting up sDDF control and data queues\n"); rx_free_drv = sddf_net_queues_vaddr; @@ -261,8 +275,9 @@ int main(int argc, char **argv) tx_free_drv = (char *) ((uint64_t) rx_active_drv + NET_DATA_REGION_CAPACITY); tx_active_drv = (char *) ((uint64_t) tx_free_drv + NET_DATA_REGION_CAPACITY); rx_data_drv = (char *) ((uint64_t) tx_active_drv + NET_DATA_REGION_CAPACITY); - tx_datas_drv[0] = (char *) ((uint64_t) rx_data_drv + (NET_DATA_REGION_CAPACITY)); - tx_datas_drv[1] = (char *) ((uint64_t) rx_data_drv + (NET_DATA_REGION_CAPACITY * 2)); + for (int i = 0; i < NUM_NETWORK_CLIENTS; i++) { + tx_datas_drv[i] = (char *) ((uint64_t) rx_data_drv + (NET_DATA_REGION_CAPACITY * (i + 1))); + } net_queue_init(&rx_queue, (net_queue_t *)rx_free_drv, (net_queue_t *)rx_active_drv, NET_RX_QUEUE_CAPACITY_DRIV); net_queue_init(&tx_queue, (net_queue_t *)tx_free_drv, (net_queue_t *)tx_active_drv, NET_TX_QUEUE_CAPACITY_DRIV); @@ -272,56 +287,49 @@ int main(int argc, char **argv) LOG_NET("tx_free_drv = 0x%p\n", tx_free_drv); LOG_NET("tx_active_drv = 0x%p\n", tx_active_drv); LOG_NET("rx_data_drv = 0x%p\n", rx_data_drv); - LOG_NET("tx_data_drv cli0 = 0x%p\n", tx_datas_drv[0]); - LOG_NET("tx_data_drv cli1 = 0x%p\n", tx_datas_drv[1]); + for (int i = 0; i < NUM_NETWORK_CLIENTS; i++) { + LOG_NET("tx_data_drv cli%d = 0x%p\n", i, tx_datas_drv[i]); + } LOG_NET("*** Setting up UIO TX and RX interrupts from VMM \"incoming\"\n"); - uio_sddf_net_tx_incoming_fd = open_uio("/dev/uio1"); - uio_sddf_net_rx_incoming_fd = open_uio("/dev/uio2"); + uio_sddf_net_tx_incoming_fd = open_uio(UIO_PATH_SDDF_NET_INCOMING_TX_IRQ); + uio_sddf_net_rx_incoming_fd = open_uio(UIO_PATH_SDDF_NET_INCOMING_RX_IRQ); uio_interrupt_ack(uio_sddf_net_tx_incoming_fd); uio_interrupt_ack(uio_sddf_net_rx_incoming_fd); - LOG_NET("*** Binding UIO TX and RX incoming interrupts to epoll\n"); + LOG_NET("*** Binding UIO TX and RX incoming interrupts to epoll\n"); // So we can block on them, instead of polling. bind_fd_to_epoll(uio_sddf_net_tx_incoming_fd, epoll_fd); bind_fd_to_epoll(uio_sddf_net_rx_incoming_fd, epoll_fd); LOG_NET("*** Setting up UIO data passing between VMM and us\n"); - uio_sddf_vmm_net_info_passing_fd = open_uio("/dev/uio3"); + uio_sddf_vmm_net_info_passing_fd = open_uio(UIO_PATH_SDDF_NET_SHARED_DATA); vmm_info_passing = (vmm_net_info_t *) map_uio(PAGE_SIZE_4K, uio_sddf_vmm_net_info_passing_fd); LOG_NET("RX paddr: 0x%p\n", vmm_info_passing->rx_paddr); - LOG_NET("TX cli0 paddr: 0x%p\n", vmm_info_passing->tx_paddrs[0]); - LOG_NET("TX cli1 paddr: 0x%p\n", vmm_info_passing->tx_paddrs[1]); + for (int i = 0; i < NUM_NETWORK_CLIENTS; i++) { + LOG_NET("TX cli%d paddr: 0x%p\n", i, vmm_info_passing->tx_paddrs[i]); + } LOG_NET("*** Setting up UIO TX and RX interrupts to VMM \"outgoing\"\n"); - uio_sddf_net_tx_outgoing_fd = open_uio("/dev/uio4"); - uio_sddf_net_rx_outgoing_fd = open_uio("/dev/uio5"); + uio_sddf_net_tx_outgoing_fd = open_uio(UIO_PATH_SDDF_NET_TX_FAULT_TO_VMM); + uio_sddf_net_rx_outgoing_fd = open_uio(UIO_PATH_SDDF_NET_RX_FAULT_TO_VMM); + // These vaddrs will fault on physical addresses of the UIO regions. sddf_net_tx_outgoing_irq_fault_vaddr = map_uio(PAGE_SIZE_4K, uio_sddf_net_tx_outgoing_fd); sddf_net_rx_outgoing_irq_fault_vaddr = map_uio(PAGE_SIZE_4K, uio_sddf_net_rx_outgoing_fd); LOG_NET("*** Waiting for RX virt to boot up\n"); - net_buff_desc_t rx_buffer; - while (net_queue_empty_free(&rx_queue)) { - LOG_NET("tail of rx free is %d\n", ((net_queue_t *) rx_free_drv)->tail); - } + while (net_queue_empty_free(&rx_queue)); - LOG_NET("*** Fetching a free buffer from the RX free queue\n"); - if (net_dequeue_free(&rx_queue, &rx_buffer) != 0) { - LOG_NET_ERR("couldn't dequeue first free RX buffer, quitting.\n"); - exit(1); - } - - LOG_NET("*** All initialisation successful, now sending all TX active before we block on events\n"); - tx_process(); + LOG_NET("*** All initialisation successful, now sending all pending TX active before we block on events\n"); net_request_signal_active(&tx_queue); - *sddf_net_tx_outgoing_irq_fault_vaddr = 0; + // tx_process(); LOG_NET("*** All pending TX active have been sent thru the raw sock, entering event loop now.\n"); - + LOG_NET("*** You won't see any output from UIO Net anymore. Unless there is a warning or error.\n"); while (1) { int n_events = epoll_wait(epoll_fd, events, MAX_EVENTS, -1); if (n_events == -1) { LOG_NET_ERR("epoll wait failed\n"); - exit(1); + exit(EXIT_FAILURE); } if (n_events == MAX_EVENTS) { LOG_NET_WARN("epoll_wait() returned MAX_EVENTS, there maybe dropped events!\n"); @@ -335,70 +343,15 @@ int main(int argc, char **argv) if (events[i].data.fd == sock_fd) { // Oh hey got a frame from the network device! - // Convert DMA addr from virtualiser to offset - // printf("got stuff from sock\n"); - - uintptr_t offset = rx_buffer.io_or_offset - vmm_info_passing->rx_paddr; - char *buf_in_sddf_rx_data = (char *) ((uintptr_t) rx_data_drv + offset); - - // Write to the data buffer - int num_bytes = recv(sock_fd, buf_in_sddf_rx_data, ETH_FRAME_LEN, 0); - if (num_bytes < 0) { - LOG_NET_ERR("couldnt recv from raw sock\n"); - exit(1); - } - - // printf("recv a frame of size %d\n", num_bytes); - // struct ethhdr *eth_header = (struct ethhdr *) buf_in_sddf_rx_data; - // printf("source MAC: %02x:%02x:%02x:%02x:%02x:%02x\n", - // eth_header->h_source[0], - // eth_header->h_source[1], - // eth_header->h_source[2], - // eth_header->h_source[3], - // eth_header->h_source[4], - // eth_header->h_source[5] - // ); - // printf("destination MAC: %02x:%02x:%02x:%02x:%02x:%02x\n", - // eth_header->h_dest[0], - // eth_header->h_dest[1], - // eth_header->h_dest[2], - // eth_header->h_dest[3], - // eth_header->h_dest[4], - // eth_header->h_dest[5] - // ); - - // Enqueue it to the active queue - rx_buffer.len = num_bytes; - if (net_enqueue_active(&rx_queue, rx_buffer) != 0) { - LOG_NET_ERR("couldn't enqueue active RX buffer, quitting.\n"); - exit(1); - } - - // Prepare a buffer for the next recv - if (net_dequeue_free(&rx_queue, &rx_buffer) != 0) { - LOG_NET_ERR("couldn't dequeue free RX buffer for next recv, quitting.\n"); - exit(1); - } - - // Signal to virtualiser - - // if (num_bytes != 60) { - // printf("signalling rx virt with buffer size %d\n", num_bytes); - // } - *sddf_net_rx_outgoing_irq_fault_vaddr = 0; + rx_process(); } else if (events[i].data.fd == uio_sddf_net_tx_incoming_fd) { // Got virt TX ntfn from VMM, send it thru the raw socket tx_process(); - - net_request_signal_active(&tx_queue); uio_interrupt_ack(uio_sddf_net_tx_incoming_fd); - *sddf_net_tx_outgoing_irq_fault_vaddr = 0; - } else if (events[i].data.fd == uio_sddf_net_rx_incoming_fd) { // Got RX virt ntfn from VMM. // Don't care, we are grabbing the free bufs ourselves. - // printf("Got virt RX ntfn from VMM\n"); uio_interrupt_ack(uio_sddf_net_rx_incoming_fd); } else { LOG_NET_WARN("epoll_wait() returned event on unknown fd %d\n", events[i].data.fd); @@ -406,6 +359,6 @@ int main(int argc, char **argv) } } - LOG_NET_WARN("event loop has break??\n"); + LOG_NET_WARN("Exit\n"); return 0; }