Skip to content

Commit

Permalink
ARP - workaround for apparent bug in netlink causing repeated EBUSY. (#…
Browse files Browse the repository at this point in the history
…1612)

* ARP - workaround for apparent bug in netlink causing repeated EBUSY. Removed unnecessary logging. Some bugs fixed

* Removed redundant definition
  • Loading branch information
nbridge-jump authored and mmcgee-jump committed Apr 25, 2024
1 parent fe551c1 commit d53c22e
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 36 deletions.
1 change: 1 addition & 0 deletions src/waltz/ip/Local.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ $(call add-objs,fd_ip fd_netlink,fd_waltz)
$(call make-unit-test,test_netlink,test_netlink,fd_waltz fd_util)
$(call make-unit-test,test_ip,test_ip,fd_waltz fd_util)
$(call make-unit-test,test_routing,test_routing,fd_waltz fd_util)
$(call make-unit-test,test_routing_load,test_routing_load,fd_waltz fd_util)
$(call make-unit-test,test_arp,test_arp,fd_waltz fd_util)

$(call run-unit-test,test_netlink)
Expand Down
96 changes: 80 additions & 16 deletions src/waltz/ip/fd_ip.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ fd_ip_footprint( ulong arp_entries,
ulong l;

l = FD_LAYOUT_INIT;
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, sizeof(fd_ip_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, sizeof(fd_nl_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, arp_entries * sizeof(fd_nl_arp_entry_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, route_entries * sizeof(fd_nl_route_entry_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, sizeof(fd_ip_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, sizeof(fd_nl_t) );
/* allocate enough space for two of each kind of table */
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, 2 * arp_entries * sizeof(fd_nl_arp_entry_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, 2 * route_entries * sizeof(fd_nl_route_entry_t) );

return FD_LAYOUT_FINI( l, FD_IP_ALIGN );
}
Expand Down Expand Up @@ -53,10 +54,10 @@ fd_ip_new( void * shmem,
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, sizeof(fd_nl_t) );

ulong ofs_arp_table = FD_ULONG_ALIGN_UP( l, FD_IP_ALIGN );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, arp_entries * sizeof(fd_nl_arp_entry_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, 2 * arp_entries * sizeof(fd_nl_arp_entry_t) );

ulong ofs_route_table = FD_ULONG_ALIGN_UP( l, FD_IP_ALIGN );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, route_entries * sizeof(fd_nl_route_entry_t) );
l = FD_LAYOUT_APPEND( l, FD_IP_ALIGN, 2 * route_entries * sizeof(fd_nl_route_entry_t) );

ulong mem_sz = FD_LAYOUT_FINI( l, FD_IP_ALIGN );

Expand All @@ -69,6 +70,8 @@ fd_ip_new( void * shmem,
ip->ofs_netlink = ofs_netlink;
ip->ofs_arp_table = ofs_arp_table;
ip->ofs_route_table = ofs_route_table;
ip->arp_table_idx = 0;
ip->route_table_idx = 0;

/* set magic last, after a fence */
FD_COMPILER_MFENCE();
Expand Down Expand Up @@ -139,36 +142,88 @@ fd_ip_netlink_get( fd_ip_t * ip ) {
}


/* get pointer to start of routing table */
/* get pointer to start of current routing table */
fd_ip_route_entry_t *
fd_ip_route_table_get( fd_ip_t * ip ) {
ulong mem = (ulong)ip;

return (fd_ip_route_entry_t*)( mem + ip->ofs_route_table );
/* find the first table from the offset */
fd_ip_route_entry_t * first_table = (fd_ip_route_entry_t*)( mem + ip->ofs_route_table );

/* find the table index */
uint idx = ip->route_table_idx;

/* offset to the current table */
return first_table + ( idx * ip->num_route_entries );
}


/* get pointer to start of alternate routing table */
fd_ip_route_entry_t *
fd_ip_route_table_get_alt( fd_ip_t * ip ) {
ulong mem = (ulong)ip;

/* find the first table from the offset */
fd_ip_route_entry_t * first_table = (fd_ip_route_entry_t*)( mem + ip->ofs_route_table );

/* find the table index of the alternate table */
uint idx = !ip->route_table_idx;

/* offset to the alternate table */
return first_table + ( idx * ip->num_route_entries );
}


/* get pointer to start of arp table */
/* get pointer to start of current arp table */
fd_ip_arp_entry_t *
fd_ip_arp_table_get( fd_ip_t * ip ) {
ulong mem = (ulong)ip;

return (fd_ip_arp_entry_t*)( mem + ip->ofs_arp_table );
/* find the table from the offset */
fd_ip_arp_entry_t * first_table = (fd_ip_arp_entry_t*)( mem + ip->ofs_arp_table );

/* find the table index */
uint idx = ip->arp_table_idx;

/* offset to the current table */
return first_table + ( idx * ip->num_arp_entries );
}


/* get pointer to start of alternate arp table */
fd_ip_arp_entry_t *
fd_ip_arp_table_get_alt( fd_ip_t * ip ) {
ulong mem = (ulong)ip;

/* find the table from the offset */
fd_ip_arp_entry_t * first_table = (fd_ip_arp_entry_t*)( mem + ip->ofs_arp_table );

/* find the table index */
uint idx = !ip->arp_table_idx;

/* offset to the alternate table */
return first_table + ( idx * ip->num_arp_entries );
}


void
fd_ip_arp_fetch( fd_ip_t * ip ) {
fd_ip_arp_entry_t * arp_table = fd_ip_arp_table_get( ip );
/* we fetch into a temp space. If it is successful we switch temp and current */

/* get pointer to alt table */
fd_ip_arp_entry_t * alt_arp_table = fd_ip_arp_table_get_alt( ip );
ulong arp_table_cap = ip->num_arp_entries;
fd_nl_t * netlink = fd_ip_netlink_get( ip );

long num_entries = fd_nl_load_arp_table( netlink, arp_table, arp_table_cap );
long num_entries = fd_nl_load_arp_table( netlink, alt_arp_table, arp_table_cap );

if( num_entries < 0L ) {
if( num_entries <= 0L ) {
/* don't switch */
return;
}

/* success - switch to other table */
ip->arp_table_idx ^= 1;
ip->cur_num_arp_entries = (ulong)num_entries;
}

Expand Down Expand Up @@ -256,16 +311,25 @@ fd_ip_arp_gen_arp_probe( uchar * buf,

void
fd_ip_route_fetch( fd_ip_t * ip ) {
fd_ip_route_entry_t * route_table = fd_ip_route_table_get( ip );
fd_ip_route_entry_t * alt_route_table = fd_ip_route_table_get_alt( ip );
ulong route_table_cap = ip->num_route_entries;
fd_nl_t * netlink = fd_ip_netlink_get( ip );

long num_entries = fd_nl_load_route_table( netlink, route_table, route_table_cap );
long num_entries = fd_nl_load_route_table( netlink, alt_route_table, route_table_cap );

if( FD_UNLIKELY( num_entries < 0 ) ) {
/* as a workaround for some systems that return EBUSY, but succeed
* immediately after, we will retry once */
num_entries = fd_nl_load_route_table( netlink, alt_route_table, route_table_cap );
}

if( num_entries < 0L ) {
if( FD_UNLIKELY( num_entries <= 0L ) ) {
/* don't switch */
return;
}

/* switch to new table */
ip->route_table_idx ^= 1U;
ip->cur_num_route_entries = (ulong)num_entries;
}

Expand Down
24 changes: 22 additions & 2 deletions src/waltz/ip/fd_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ struct fd_ip {
ulong cur_num_arp_entries;
ulong cur_num_route_entries;

/* table index */
/* there are two arp tables and two route tables */
/* these indicate whether the current table is the first or second */
uint arp_table_idx;
uint route_table_idx;

ulong ofs_netlink;
ulong ofs_arp_table;
ulong ofs_route_table;
Expand Down Expand Up @@ -139,20 +145,34 @@ fd_nl_t *
fd_ip_netlink_get( fd_ip_t * ip );


/* get pointer to start of routing table
/* get pointer to start of current routing table
this is used internally
probably best not to modify the data */
fd_ip_route_entry_t *
fd_ip_route_table_get( fd_ip_t * ip );


/* get pointer to start of arp table
/* get pointer to start of alternate routing table
this is used internally
probably best not to modify the data */
fd_ip_route_entry_t *
fd_ip_route_table_get_alt( fd_ip_t * ip );


/* get pointer to start of current arp table
this is used internally
probably best not to modify the data */
fd_ip_arp_entry_t *
fd_ip_arp_table_get( fd_ip_t * ip );


/* get pointer to start of alternate arp table
this is used internally
probably best not to modify the data */
fd_ip_arp_entry_t *
fd_ip_arp_table_get_alt( fd_ip_t * ip );


/* fetch the ARP table from the kernel
The table is written into the workspace
Expand Down
32 changes: 15 additions & 17 deletions src/waltz/ip/fd_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,9 @@ fd_nl_read_socket( int fd, uchar * buf, ulong buf_sz ) {
long len = -1;
do {
len = recv( fd, buf, buf_sz, 0 );
} while( len <= 0 && errno == EINTR );
} while( FD_UNLIKELY( ( len < 0 && errno == EINTR ) || len == 0 ) );

if( len < 0 ) {
if( FD_UNLIKELY( len < 0 ) ) {
if( errno == EAGAIN ) {
/* EAGAIN means no data. We can simply try again later */
return -1;
Expand Down Expand Up @@ -381,14 +381,20 @@ fd_nl_load_route_table( fd_nl_t * nl,
return -1; /* return failure */
}

if( h->nlmsg_type == NLMSG_ERROR ) {
if( FD_UNLIKELY( h->nlmsg_type == NLMSG_ERROR ) ) {
struct nlmsgerr * err = (struct nlmsgerr*)NLMSG_DATA(h);

/* acknowledgements have no error */
if( !err->error ) {
if( FD_LIKELY( !err->error ) ) {
continue;
}

if( FD_LIKELY( err->error == -EBUSY ) ) {
/* a workaround for some systems which return EBUSY, we will
* not log, but instead retry one more time */
return -1;
}

FD_LOG_WARNING(( "netlink returned data with error: %d %s", -err->error, strerror( -err->error) ));

/* error occurred */
Expand Down Expand Up @@ -488,9 +494,8 @@ fd_nl_load_route_table( fd_nl_t * nl,
FD_NL_RT_FLAGS_OIF;
uint rqd1 = FD_NL_RT_FLAGS_NH_IP_ADDR |
FD_NL_RT_FLAGS_OIF;
uint rqd_mask = rqd0 | rqd1;
uint flags = entry->flags & rqd_mask;
if( flags == rqd0 || flags == rqd1 ) {
uint flags = entry->flags;
if( ( flags & rqd0 ) == rqd0 || ( flags & rqd1 ) == rqd1 ) {
entry->flags |= FD_NL_RT_FLAGS_USED;
route_entry_idx++;
}
Expand Down Expand Up @@ -661,18 +666,14 @@ fd_nl_load_arp_table( fd_nl_t * nl,
NUD_PROBE being reprobed, so it's probably valid
NUD_PERMANENT a static entry, so use it */

struct rtattr * saved_rat = rat;
long saved_ratmsglen = ratmsglen;
while( RTA_OK( rat, ratmsglen ) ) {
uchar * rta_data = RTA_DATA( rat );
ulong rta_data_sz = RTA_PAYLOAD( rat );

switch( rat->rta_type ) {
case NDA_DST:
if( rta_data_sz != 4 ) {
FD_LOG_WARNING(( "Neighbor entry has IP address with other than"
" 4 byte address" ));
fd_nl_dump_rat( saved_rat, saved_ratmsglen );
/* we only support IPv4 */
entry->flags |= FD_NL_ARP_FLAGS_UNSUPPORTED;
} else {
uint dst_ip_addr;
Expand All @@ -685,11 +686,8 @@ fd_nl_load_arp_table( fd_nl_t * nl,

case NDA_LLADDR:
if( FD_UNLIKELY( rta_data_sz != 6 ) ) {
if( rta_data_sz != 0 ) {
FD_LOG_WARNING(( "Neighbor entry has LL address with other than"
" 6 byte MAC address" ));
fd_nl_dump_rat( saved_rat, saved_ratmsglen );
}
/* we only support ethernet, but these entries are found */
/* on some machines, so silently skip them */
entry->flags |= FD_NL_ARP_FLAGS_UNSUPPORTED;
} else {
memcpy( &entry->mac_addr[0], rta_data, 6 );
Expand Down
2 changes: 1 addition & 1 deletion src/waltz/ip/fd_netlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

/* Defined the buffer space used in netlink calls
We are not expecting many routing entries or ARP cache entries */
#define FD_NL_BUF_SZ 4096UL
#define FD_NL_BUF_SZ (1UL<<16UL)

struct fd_nl {
int fd; /* netlink socket */
Expand Down
Loading

0 comments on commit d53c22e

Please sign in to comment.