diff --git a/src/app/fdctl/run/tiles/fd_replay.c b/src/app/fdctl/run/tiles/fd_replay.c index 3db2257385..b21e14d48f 100644 --- a/src/app/fdctl/run/tiles/fd_replay.c +++ b/src/app/fdctl/run/tiles/fd_replay.c @@ -864,7 +864,6 @@ prepare_new_block_execution( fd_replay_tile_ctx_t * ctx, xid.ul[0] = fork->slot_ctx.slot_bank.slot; /* push a new transaction on the stack */ fd_funk_start_write( ctx->funk ); - FD_TEST( !ctx->funk->speed_load ); fork->slot_ctx.funk_txn = fd_funk_txn_prepare(ctx->funk, fork->slot_ctx.funk_txn, &xid, 1); fd_funk_end_write( ctx->funk ); diff --git a/src/flamenco/snapshot/fd_snapshot.c b/src/flamenco/snapshot/fd_snapshot.c index 022a191af3..d1b9527b9d 100644 --- a/src/flamenco/snapshot/fd_snapshot.c +++ b/src/flamenco/snapshot/fd_snapshot.c @@ -143,10 +143,6 @@ fd_snapshot_load( const char * snapshotfile, } fd_funk_start_write( slot_ctx->acc_mgr->funk ); - /* Speed load currently has long term memory usage consequences - which are unacceptable. Consider turning it back on when we have a - better design. */ - fd_funk_speed_load_mode( slot_ctx->acc_mgr->funk, 0 ); fd_funk_txn_t * par_txn = slot_ctx->funk_txn; fd_funk_txn_t * child_txn = slot_ctx->funk_txn; @@ -212,6 +208,5 @@ fd_snapshot_load( const char * snapshotfile, fd_rewards_recalculate_partitioned_rewards( slot_ctx ); - fd_funk_speed_load_mode( slot_ctx->acc_mgr->funk, 0 ); fd_funk_end_write( slot_ctx->acc_mgr->funk ); } diff --git a/src/funk/fd_funk.c b/src/funk/fd_funk.c index 94567d99c2..5e1bc8472a 100644 --- a/src/funk/fd_funk.c +++ b/src/funk/fd_funk.c @@ -151,11 +151,6 @@ fd_funk_new( void * shmem, partvec->num_part = 0U; funk->partvec_gaddr = fd_wksp_gaddr_fast( wksp, partvec ); - /* Speed load mode is off by default */ - fd_funk_speed_load_mode( funk, 0 ); - funk->speed_bump_gaddr = 0UL; - funk->speed_bump_remain = 0UL; - funk->write_lock = 0UL; FD_COMPILER_MFENCE(); @@ -472,8 +467,3 @@ fd_funk_check_write( fd_funk_t * funk ) { ulong val = funk->write_lock; if( FD_UNLIKELY(!(val&1UL)) ) FD_LOG_CRIT(( "missing call to fd_funk_start_write" )); } - -void -fd_funk_speed_load_mode( fd_funk_t * funk, int flag ) { - funk->speed_load = flag; -} diff --git a/src/funk/fd_funk.h b/src/funk/fd_funk.h index 978c4cab60..ae8c127592 100644 --- a/src/funk/fd_funk.h +++ b/src/funk/fd_funk.h @@ -258,11 +258,6 @@ struct __attribute__((aligned(FD_FUNK_ALIGN))) fd_funk_private { ulong alloc_gaddr; /* Non-zero wksp gaddr with tag wksp tag */ - int speed_load; /* Is "speed load mode" active */ - /* Address and size of remaining bump allocation space */ - ulong speed_bump_gaddr; - ulong speed_bump_remain; - /* Padding to FD_FUNK_ALIGN here */ }; @@ -490,14 +485,6 @@ fd_funk_last_publish_descendant( fd_funk_t * funk, /* Misc */ -/* Enable/disable "speed load mode". When in this mode, record values - are bump allocated and never freed. This speeds up the case where - we are initializing the database with a vast number of - mostly read-only records. */ - -void -fd_funk_speed_load_mode( fd_funk_t * funk, int flag ); - /* fd_funk_verify verifies the integrity of funk. Returns FD_FUNK_SUCCESS if funk appears to be intact and FD_FUNK_ERR_INVAL otherwise (logs details). Assumes funk is a current local join (NULL diff --git a/src/funk/fd_funk_base.h b/src/funk/fd_funk_base.h index a65fd6f0d7..ed27288c45 100644 --- a/src/funk/fd_funk_base.h +++ b/src/funk/fd_funk_base.h @@ -230,17 +230,16 @@ fd_funk_txn_xid_set_root( fd_funk_txn_xid_t * x ) { return x; } -/* fd_funk_xid_key_pair_hash provides a family of hashes that hash a - (xid,key) pair to by p to a uniform quasi-random 64-bit integer. - seed selects the particular hash function to use and can be an - arbitrary 64-bit value. Returns the hash. The hash functions are - high quality but not cryptographically secure. Assumes p is in the - caller's address space and valid. */ +/* fd_funk_xid_key_pair_hash produces a 64-bit hash case for a + xid_key_pair. Assumes p is in the caller's address space and valid. */ FD_FN_PURE static inline ulong fd_funk_xid_key_pair_hash( fd_funk_xid_key_pair_t const * p, ulong seed ) { - return fd_funk_txn_xid_hash( p->xid, seed ) ^ fd_funk_rec_key_hash( p->key, seed ); + /* We ignore the xid part of the key because we need all the instances + of a given record key to appear in the same hash + chain. fd_funk_rec_query_global depends on this. */ + return fd_funk_rec_key_hash( p->key, seed ); } /* fd_funk_xid_key_pair_eq returns 1 if (xid,key) pair pointed to by pa diff --git a/src/funk/fd_funk_rec.c b/src/funk/fd_funk_rec.c index 5095a54825..1a02cce62b 100644 --- a/src/funk/fd_funk_rec.c +++ b/src/funk/fd_funk_rec.c @@ -46,43 +46,59 @@ fd_funk_rec_query_global( fd_funk_t * funk, fd_funk_txn_t const * txn, fd_funk_rec_key_t const * key, fd_funk_txn_t const ** txn_out ) { - if( FD_UNLIKELY( (!funk) | (!key) ) ) return NULL; fd_wksp_t * wksp = fd_funk_wksp( funk ); + fd_funk_txn_t * txn_map = fd_funk_txn_map( funk, wksp ); fd_funk_rec_t * rec_map = fd_funk_rec_map( funk, wksp ); - if( txn ) { /* Query txn and its in-prep ancestors */ + /* For record ele in all records in chain that match key. (This + code was adapted from the map_giant template ... ideally would + use a map chain iterator ala map_para template). */ - fd_funk_txn_t * txn_map = fd_funk_txn_map( funk, wksp ); + /* Note: the iteration order will be such that the record + for a key in a descendent of a transaction will be presented + before a record for that key in that transaction. This allows us + to succeed on the first hit (the newest transaction). It is + NECESSARY that fd_funk_rec_map_insert preserve this property. */ - ulong txn_max = funk->txn_max; + fd_funk_rec_map_private_t * priv = fd_funk_rec_map_private( rec_map ); + ulong hash = fd_funk_rec_key_hash( key, priv->seed ); + ulong * head = fd_funk_rec_map_private_list( priv ) + ( hash & (priv->list_cnt-1UL) ); + ulong * cur = head; - ulong txn_idx = (ulong)(txn - txn_map); + for(;;) { + ulong ele_idx = fd_funk_rec_map_private_unbox_idx( *cur ); + if( fd_funk_rec_map_private_is_null( ele_idx ) ) break; + fd_funk_rec_t * ele = rec_map + ele_idx; + if( FD_LIKELY( hash == ele->map_hash ) && FD_LIKELY( fd_funk_rec_key_eq( key, ele->pair.key ) ) ) { - if( FD_UNLIKELY( (txn_idx>=txn_max) /* Out of map (incl NULL) */ | (txn!=(txn_map+txn_idx)) /* Bad alignment */ ) ) - return NULL; + /* For cur_txn in path from [txn] to [root] where root is NULL */ - /* TODO: const correct and/or fortify? */ - do { - fd_funk_xid_key_pair_t pair[1]; fd_funk_xid_key_pair_init( pair, fd_funk_txn_xid( txn ), key ); - fd_funk_rec_t const * rec = fd_funk_rec_map_query_const( rec_map, pair, NULL ); - if( FD_LIKELY( rec ) ) { - if( FD_UNLIKELY(NULL != txn_out ) ) { - *txn_out = txn; + for( fd_funk_txn_t const * cur_txn = txn; ; cur_txn = fd_funk_txn_parent( cur_txn, txn_map ) ) { + /* If record ele is part of transaction cur_txn, we have a + match. According to the property above, this will be the + youngest descendent in the transaction stack. */ + + int match = FD_UNLIKELY( cur_txn ) ? /* opt for root find (FIXME: eliminate branch with cmov into txn_xid_eq?) */ + fd_funk_txn_xid_eq( &cur_txn->xid, ele->pair.xid ) : + fd_funk_txn_xid_eq_root( ele->pair.xid ); + + if( FD_LIKELY( match ) ) { + if( txn_out ) *txn_out = cur_txn; + return ( FD_UNLIKELY( ele->flags & FD_FUNK_REC_FLAG_ERASE ) ? NULL : ele ); } - return rec; + + if( cur_txn == NULL ) break; } - txn = fd_funk_txn_parent( (fd_funk_txn_t *)txn, txn_map ); - } while( FD_UNLIKELY( txn ) ); + } + cur = &ele->map_next; } - /* Query the last published transaction */ - - fd_funk_xid_key_pair_t pair[1]; fd_funk_xid_key_pair_init( pair, fd_funk_root( funk ), key ); - return fd_funk_rec_map_query_const( rec_map, pair, NULL ); + if( txn_out ) *txn_out = NULL; + return NULL; } void * @@ -538,10 +554,7 @@ fd_funk_rec_write_prepare( fd_funk_t * funk, /* Grow the record to the right size */ rec->flags &= ~FD_FUNK_REC_FLAG_ERASE; if ( fd_funk_val_sz( rec ) < min_val_size ) { - if( funk->speed_load ) - rec = fd_funk_val_speed_load( funk, rec, min_val_size, wksp, opt_err ); - else - rec = fd_funk_val_truncate( rec, min_val_size, fd_funk_alloc( funk, wksp ), wksp, opt_err ); + rec = fd_funk_val_truncate( rec, min_val_size, fd_funk_alloc( funk, wksp ), wksp, opt_err ); } return rec; @@ -605,6 +618,11 @@ fd_funk_rec_verify( fd_funk_t * funk ) { TEST( (rec_idx=rec_max ) ) FD_LOG_CRIT(( "memory corruption detected (bad idx)" )); if( FD_UNLIKELY( fd_funk_txn_idx( rec_map[ rec_idx ].txn_cidx )!=txn_idx ) ) FD_LOG_CRIT(( "memory corruption detected (cycle or bad idx)" )); - if( FD_UNLIKELY( rec_map[ rec_idx ].val_no_free ) ) - FD_LOG_CRIT(( "new record was speed loaded" )); - rec_map[ rec_idx ].txn_cidx = fd_funk_txn_cidx( FD_FUNK_TXN_IDX_NULL ); - - ulong next_idx = rec_map[ rec_idx ].next_idx; - - /* See if (dst_xid,key) already exists */ - - fd_funk_xid_key_pair_t dst_pair[1]; - fd_funk_xid_key_pair_init( dst_pair, dst_xid, fd_funk_rec_key( &rec_map[ rec_idx ] ) ); - - fd_funk_rec_t * dst_rec = fd_funk_rec_map_query( rec_map, dst_pair, NULL ); - - /* At this point, we are either creating a new record or updating - an existing one. In either case, we are going to be keeping - around the src's value for later use and for speed, we do this - zero-copy / in-place. So we stash record value in stack - temporaries and unmap (xid,key). Note this strictly frees 1 - record from the rec_map, guaranteeing at least 1 record free in - the record map below. Note that we can't just reuse rec_idx in - the update case because that could break map queries. */ - - ulong val_sz = (ulong)rec_map[ rec_idx ].val_sz; - ulong val_max = (ulong)rec_map[ rec_idx ].val_max; - ulong val_gaddr = rec_map[ rec_idx ].val_gaddr; - int val_no_free = rec_map[ rec_idx ].val_no_free; - uint part = rec_map[ rec_idx ].part; - ulong flags = rec_map[ rec_idx ].flags; - - fd_funk_part_set_intern( partvec, rec_map, &rec_map[ rec_idx ], FD_FUNK_PART_NULL ); - fd_funk_rec_map_remove( rec_map, fd_funk_rec_pair( &rec_map[ rec_idx ] ) ); - - if( FD_UNLIKELY( !dst_rec ) ) { /* Create a published key */ - - dst_rec = fd_funk_rec_map_insert( rec_map, dst_pair ); /* Guaranteed to succeed at this point due to above remove */ - - ulong dst_rec_idx = (ulong)(dst_rec - rec_map); - ulong dst_prev_idx = *_dst_rec_tail_idx; - - dst_rec->prev_idx = dst_prev_idx; - dst_rec->next_idx = FD_FUNK_REC_IDX_NULL; - dst_rec->txn_cidx = fd_funk_txn_cidx( dst_txn_idx ); - dst_rec->tag = 0U; - - fd_funk_part_init( dst_rec ); - - if( fd_funk_rec_idx_is_null( dst_prev_idx ) ) *_dst_rec_head_idx = dst_rec_idx; - else rec_map[ dst_prev_idx ].next_idx = dst_rec_idx; - - *_dst_rec_tail_idx = dst_rec_idx; - - } else { /* Update a published key */ - - fd_funk_val_flush( dst_rec, alloc, wksp ); /* Free up any preexisting value resources */ - fd_funk_part_set_intern( partvec, rec_map, dst_rec, FD_FUNK_PART_NULL ); + fd_funk_rec_t * rec = &rec_map[ rec_idx ]; + ulong next_rec_idx = rec->next_idx; + + /* See if (dst_xid,key) already exists. Remove it if it does, and then clean up the corpse. + We would like to walk down the hash chain starting at rec, but + fd_funk_txn_merge_all_children temporarily breaks the needed + ordering property. */ + fd_funk_xid_key_pair_t old_pair[1]; + fd_funk_xid_key_pair_init( old_pair, dst_xid, rec->pair.key ); + fd_funk_rec_t * old_rec = (fd_funk_rec_t *) fd_funk_rec_map_remove( rec_map, old_pair ); + if( FD_LIKELY( old_rec ) ) { + /* Remove from the transaction */ + ulong prev_idx = old_rec->prev_idx; + ulong next_idx = old_rec->next_idx; + if( fd_funk_rec_idx_is_null( prev_idx ) ) { + *_dst_rec_head_idx = next_idx; + } else { + rec_map[ prev_idx ].next_idx = next_idx; + } + if( fd_funk_rec_idx_is_null( next_idx ) ) { + *_dst_rec_tail_idx = prev_idx; + } else { + rec_map[ next_idx ].prev_idx = prev_idx; + } + fd_funk_val_flush( old_rec, alloc, wksp ); + old_rec->txn_cidx = fd_funk_txn_cidx( FD_FUNK_TXN_IDX_NULL ); + fd_funk_part_set_intern( partvec, rec_map, old_rec, FD_FUNK_PART_NULL ); } - /* Unstash value metadata from stack temporaries into dst_rec */ - - dst_rec->val_sz = (uint)val_sz; - dst_rec->val_max = (uint)val_max; - dst_rec->val_gaddr = val_gaddr; - dst_rec->val_no_free = val_no_free; - dst_rec->flags = flags; - - /* Use the new partition */ - - fd_funk_part_set_intern( partvec, rec_map, dst_rec, part ); - - /* Advance to the next record */ + /* Add the new record to the transaction. We can update the xid in + place because it is not used for hashing the element. We have + to preserve the original element to preserve the + newest-to-oldest ordering in the hash + chain. fd_funk_rec_query_global relies on this subtle + property. */ + + rec->pair.xid[0] = *dst_xid; + rec->txn_cidx = fd_funk_txn_cidx( dst_txn_idx ); + + if( fd_funk_rec_idx_is_null( *_dst_rec_head_idx ) ) { + *_dst_rec_head_idx = rec_idx; + rec->prev_idx = FD_FUNK_REC_IDX_NULL; + } else { + rec_map[ *_dst_rec_tail_idx ].next_idx = rec_idx; + rec->prev_idx = *_dst_rec_tail_idx; + } + *_dst_rec_tail_idx = rec_idx; + rec->next_idx = FD_FUNK_REC_IDX_NULL; - rec_idx = next_idx; + rec_idx = next_rec_idx; } txn_map[ txn_idx ].rec_head_idx = FD_FUNK_REC_IDX_NULL; @@ -807,15 +782,32 @@ fd_funk_txn_merge_all_children( fd_funk_t * funk, fd_wksp_t * wksp = fd_funk_wksp( funk ); fd_funk_txn_t * map = fd_funk_txn_map( funk, wksp ); + ulong txn_max = funk->txn_max; /* Previously verified */ - ulong txn_max = fd_funk_txn_map_key_max( map ); - - ulong parent_idx = (ulong)(parent_txn - map); - - ASSERT_IN_PREP( parent_idx ); - - ulong child_head_idx = fd_funk_txn_idx( map[ parent_idx ].child_head_cidx ); + ulong parent_idx; + fd_funk_txn_xid_t * parent_xid; + uint * child_head_cidx; + uint * child_tail_cidx; + ulong * rec_head_idx; + ulong * rec_tail_idx; + if( parent_txn == NULL ) { /* Root */ + parent_idx = FD_FUNK_TXN_IDX_NULL; + parent_xid = funk->root; + child_head_cidx = &funk->child_head_cidx; + child_tail_cidx = &funk->child_tail_cidx; + rec_head_idx = &funk->rec_head_idx; + rec_tail_idx = &funk->rec_tail_idx; + } else { + parent_idx = (ulong)(parent_txn - map); + ASSERT_IN_PREP( parent_idx ); + parent_xid = &parent_txn->xid; + child_head_cidx = &parent_txn->child_head_cidx; + child_tail_cidx = &parent_txn->child_tail_cidx; + rec_head_idx = &parent_txn->rec_head_idx; + rec_tail_idx = &parent_txn->rec_tail_idx; + } + ulong child_head_idx = fd_funk_txn_idx( *child_head_cidx ); ulong child_idx = child_head_idx; while( FD_UNLIKELY( !fd_funk_txn_idx_is_null( child_idx ) ) ) { /* opt for incr pub */ /* Merge records from child into parent */ @@ -826,16 +818,22 @@ fd_funk_txn_merge_all_children( fd_funk_t * funk, return FD_FUNK_ERR_TXN; } - fd_funk_txn_update( &parent_txn->rec_head_idx, &parent_txn->rec_tail_idx, parent_idx, &parent_txn->xid, + fd_funk_txn_update( rec_head_idx, rec_tail_idx, parent_idx, parent_xid, child_idx, funk->rec_max, map, fd_funk_rec_map( funk, wksp ), fd_funk_get_partvec( funk, wksp ), fd_funk_alloc( funk, wksp ), wksp ); child_idx = fd_funk_txn_idx( txn->sibling_next_cidx ); fd_funk_txn_map_remove( map, fd_funk_txn_xid( txn ) ); + + /* Update the pointers as we go in case we stop in the middle + */ + *child_head_cidx = fd_funk_txn_cidx( child_idx ); + if( FD_UNLIKELY( !fd_funk_txn_idx_is_null( child_idx ) ) ) { + map[ child_idx ].sibling_prev_cidx = fd_funk_txn_cidx( FD_FUNK_TXN_IDX_NULL ); + } } - parent_txn->child_head_cidx = fd_funk_txn_cidx( FD_FUNK_TXN_IDX_NULL ); - parent_txn->child_tail_cidx = fd_funk_txn_cidx( FD_FUNK_TXN_IDX_NULL ); + *child_tail_cidx = fd_funk_txn_cidx( FD_FUNK_TXN_IDX_NULL ); return FD_FUNK_SUCCESS; } diff --git a/src/funk/fd_funk_txn.h b/src/funk/fd_funk_txn.h index 35106fd006..7613aef8f0 100644 --- a/src/funk/fd_funk_txn.h +++ b/src/funk/fd_funk_txn.h @@ -162,7 +162,7 @@ FD_FN_CONST static inline fd_funk_txn_xid_t const * fd_funk_txn_xid( fd_funk_txn #define FD_FUNK_ACCESSOR(field) \ FD_FN_PURE static inline fd_funk_txn_t * \ -fd_funk_txn_##field( fd_funk_txn_t * txn, \ +fd_funk_txn_##field( fd_funk_txn_t const * txn, \ fd_funk_txn_t * map ) { \ ulong idx = fd_funk_txn_idx( txn->field##_cidx ); \ if( idx==FD_FUNK_TXN_IDX_NULL ) return NULL; \ diff --git a/src/funk/fd_funk_val.c b/src/funk/fd_funk_val.c index bbef7c2c20..bb2d345e6d 100644 --- a/src/funk/fd_funk_val.c +++ b/src/funk/fd_funk_val.c @@ -67,9 +67,8 @@ fd_funk_val_copy( fd_funk_rec_t * rec, rec->val_max = (uint)fd_ulong_min( new_val_max, FD_FUNK_REC_VAL_MAX ); rec->val_gaddr = fd_wksp_gaddr_fast( wksp, new_val ); - if( val && !rec->val_no_free ) fd_alloc_free( alloc, val ); + if( val ) fd_alloc_free( alloc, val ); val = new_val; - rec->val_no_free = 0; } @@ -150,11 +149,10 @@ fd_funk_val_append( fd_funk_rec_t * rec, if( val_sz ) fd_memcpy( new_val, val, val_sz ); /* Copy the existing val */ fd_memset( new_val + val_sz, 0, new_val_max - val_sz ); /* Clear out trailing padding to be on the safe side */ - if( !rec->val_no_free ) fd_alloc_free( alloc, val ); /* Free the old val */ + fd_alloc_free( alloc, val ); /* Free the old val */ rec->val_max = (uint)fd_ulong_min( new_val_max, FD_FUNK_REC_VAL_MAX ); rec->val_gaddr = fd_wksp_gaddr_fast( wksp, new_val ); - rec->val_no_free = 0; val = new_val; @@ -221,8 +219,7 @@ fd_funk_val_truncate( fd_funk_rec_t * rec, rec->val_sz = (uint)new_val_sz; rec->val_max = (uint)fd_ulong_min( new_val_max, FD_FUNK_REC_VAL_MAX ); - if( val && !rec->val_no_free ) fd_alloc_free( alloc, val ); /* Free the old value (if any) */ - rec->val_no_free = 0; + if( val ) fd_alloc_free( alloc, val ); /* Free the old value (if any) */ } else { @@ -257,8 +254,7 @@ fd_funk_val_truncate( fd_funk_rec_t * rec, rec->val_max = (uint)fd_ulong_min( new_val_max, FD_FUNK_REC_VAL_MAX ); rec->val_gaddr = fd_wksp_gaddr_fast( wksp, new_val ); - if( val && !rec->val_no_free ) fd_alloc_free( alloc, val ); /* Free the old value (if any) */ - rec->val_no_free = 0; + if( val ) fd_alloc_free( alloc, val ); /* Free the old value (if any) */ } @@ -268,44 +264,6 @@ fd_funk_val_truncate( fd_funk_rec_t * rec, return rec; } -fd_funk_rec_t * -fd_funk_val_speed_load( fd_funk_t * funk, - fd_funk_rec_t * rec, /* Assumed in caller's address space to a live funk record (NULL returns NULL) */ - ulong new_val_sz, /* Should be in [0,FD_FUNK_REC_VAL_MAX] (returns NULL otherwise) */ - fd_wksp_t * wksp, /* ==fd_funk_wksp( funk ) where funk is current local join */ - int * opt_err ) { /* If non-NULL, *opt_err returns operation error code */ - /* Check input args */ - - if( FD_UNLIKELY( (!rec) | (new_val_sz>FD_FUNK_REC_VAL_MAX) | (!wksp) ) || /* NULL rec,too big,NULL alloc,NULL wksp */ - FD_UNLIKELY( rec->flags & FD_FUNK_REC_FLAG_ERASE ) ) { /* Marked erase */ - fd_int_store_if( !!opt_err, opt_err, FD_FUNK_ERR_INVAL ); - return NULL; - } - - ulong new_max_sz = fd_ulong_align_up( new_val_sz, 8U ); - if( funk->speed_bump_remain < new_max_sz ) { - funk->speed_bump_remain = fd_ulong_max( 64LU<<20LU, new_max_sz ); - funk->speed_bump_gaddr = fd_wksp_alloc( wksp, 8U, funk->speed_bump_remain, funk->wksp_tag ); - if( funk->speed_bump_gaddr == 0UL ) { - funk->speed_bump_remain = 0; - fd_int_store_if( !!opt_err, opt_err, FD_FUNK_ERR_MEM ); - return NULL; - } - fd_memset( fd_wksp_laddr_fast( wksp, funk->speed_bump_gaddr ), 0, funk->speed_bump_remain ); - } - - rec->val_sz = (uint)new_val_sz; - rec->val_max = (uint)new_max_sz; - rec->val_gaddr = funk->speed_bump_gaddr; - rec->val_no_free = 1; - - funk->speed_bump_gaddr += new_max_sz; - funk->speed_bump_remain -= new_max_sz; - - fd_int_store_if( !!opt_err, opt_err, FD_FUNK_SUCCESS ); - return rec; -} - void * fd_funk_val_safe( fd_funk_rec_t const * rec, /* Assumes pointer in caller's address space to a live funk record */ fd_wksp_t const * wksp, diff --git a/src/funk/fd_funk_val.h b/src/funk/fd_funk_val.h index da153c3c6b..0e29fd2668 100644 --- a/src/funk/fd_funk_val.h +++ b/src/funk/fd_funk_val.h @@ -245,26 +245,6 @@ fd_funk_val_truncate( fd_funk_rec_t * rec, /* Assumed in caller's address fd_wksp_t * wksp, /* ==fd_funk_wksp( funk ) where funk is current local join */ int * opt_err ); /* If non-NULL, *opt_err returns operation error code */ -/* fd_funk_val_speed_load sets the record value to space allocated - from the funk speed bump. This space is never freed. This function - is used when loading large snapshots. - - Returns rec on success and NULL on failure. If opt_err is non-NULL, - on return, *opt_err will hold FD_FUNK_SUCCESS if successful or a - FD_FUNK_ERR_* code on failure. Reasons for failure include - FD_FUNK_ERR_INVAL (NULL rec, too large new_val_sz, rec is marked - ERASE) and FD_FUNK_ERR_MEM (allocation failure, need a larger wksp). - On failure, the current value is unchanged. - - Assumes no concurrent operations on rec. */ - -fd_funk_rec_t * /* Returns rec on success, NULL on failure */ -fd_funk_val_speed_load( fd_funk_t * funk, - fd_funk_rec_t * rec, /* Assumed in caller's address space to a live funk record (NULL returns NULL) */ - ulong new_val_sz, /* Should be in [0,FD_FUNK_REC_VAL_MAX] (returns NULL otherwise) */ - fd_wksp_t * wksp, /* ==fd_funk_wksp( funk ) where funk is current local join */ - int * opt_err ); /* If non-NULL, *opt_err returns operation error code */ - /* Misc */ /* fd_funk_val_init sets a record with uninitialized value metadata to @@ -275,7 +255,6 @@ fd_funk_val_init( fd_funk_rec_t * rec ) { /* Assumed record in caller's address rec->val_sz = 0U; rec->val_max = 0U; rec->val_gaddr = 0UL; - rec->val_no_free = 0; return rec; } @@ -287,9 +266,8 @@ fd_funk_val_flush( fd_funk_rec_t * rec, /* Assumed live funk record in calle fd_alloc_t * alloc, /* ==fd_funk_alloc( funk, wksp ) */ fd_wksp_t * wksp ) { /* ==fd_funk_wksp( funk ) where funk is a current local join */ ulong val_gaddr = rec->val_gaddr; - int val_no_free = rec->val_no_free; fd_funk_val_init( rec ); - if( val_gaddr && !val_no_free ) fd_alloc_free( alloc, fd_wksp_laddr_fast( wksp, val_gaddr ) ); + if( val_gaddr ) fd_alloc_free( alloc, fd_wksp_laddr_fast( wksp, val_gaddr ) ); return rec; } diff --git a/src/funk/test_funk_common.c b/src/funk/test_funk_common.c index d991f9f95f..4ef8d22329 100644 --- a/src/funk/test_funk_common.c +++ b/src/funk/test_funk_common.c @@ -153,26 +153,20 @@ txn_publish( funk_t * funk, rec_t * root_rec = rec_query( funk, NULL, rec->key ); - if( !root_rec ) { /* key not published and not erasing, create published key */ - - rec_t * prev = funk->rec_tail; - - rec->txn = NULL; - rec->prev = prev; - rec->next = NULL; - - if( prev ) prev->next = rec; - else funk->rec_head = rec; - funk->rec_tail = rec; - - } else { /* update published key */ + if( root_rec ) { + // Remove old version of record + rec_unmap( funk, rec_leave( funk, root_rec ) ); + } - root_rec->val = rec->val; - root_rec->erase = rec->erase; + rec_t * prev = funk->rec_tail; - rec_unmap( funk, rec ); /* Unmap the record (don't bother leaving b/c we are unmapping everything) */ + rec->txn = NULL; + rec->prev = prev; + rec->next = NULL; - } + if( prev ) prev->next = rec; + else funk->rec_head = rec; + funk->rec_tail = rec; rec = next; } @@ -204,52 +198,20 @@ txn_merge( funk_t * funk, rec_t * dst_rec = rec_query( funk, dst_txn, rec->key ); - if( rec->erase ) { - - if( !dst_rec ) { /* This erases a version of the record one of dst's ancestors, add the erase to dst */ - - rec_t * prev = dst_txn->rec_tail; - - rec->txn = dst_txn; - rec->prev = prev; - rec->next = NULL; - - if( prev ) prev->next = rec; - else dst_txn->rec_head = rec; - dst_txn->rec_tail = rec; - - } else { /* This erases a dst's version record */ - - rec_unmap( funk, rec ); /* Unmap the record (don't bother leaving b/c we are unmapping everything) */ - - if( dst_txn == NULL ) { - rec_unmap( funk, rec_leave( funk, dst_rec ) ); /* Unmap dst rec */ - } else { - dst_rec->erase = 1; - } - - } - - } else if( !dst_rec ) { /* Record not in dst and not erasing, add record in dst */ - - rec_t * prev = dst_txn->rec_tail; - - rec->txn = dst_txn; - rec->prev = prev; - rec->next = NULL; - - if( prev ) prev->next = rec; - else dst_txn->rec_head = rec; - dst_txn->rec_tail = rec; - - } else { /* Record in dst and not erasing, update record in dst */ + if( dst_rec ) { + // Remove old version of record + rec_unmap( funk, rec_leave( funk, dst_rec ) ); + } - dst_rec->val = rec->val; - dst_rec->erase = 0; + rec_t * prev = dst_txn->rec_tail; - rec_unmap( funk, rec ); /* Unmap the record (don't bother leaving b/c we are unmapping everything) */ + rec->txn = dst_txn; + rec->prev = prev; + rec->next = NULL; - } + if( prev ) prev->next = rec; + else dst_txn->rec_head = rec; + dst_txn->rec_tail = rec; rec = next; } diff --git a/src/funk/test_funk_common.hpp b/src/funk/test_funk_common.hpp index 169e7b962c..e77193b810 100644 --- a/src/funk/test_funk_common.hpp +++ b/src/funk/test_funk_common.hpp @@ -316,7 +316,10 @@ struct fake_funk { for (auto i : _txns) if (i.second->_key != ROOT_KEY) list[listlen++] = i.second; - if (!listlen) return; + if (!listlen) { + fd_funk_end_write(_real); + return; + } auto * txn = list[((uint)lrand48())%listlen]; fd_funk_txn_t * txn2 = get_real_txn(txn); @@ -359,7 +362,10 @@ struct fake_funk { list[listlen++] = i.second; no_good: continue; } - if (!listlen) return; + if (!listlen) { + fd_funk_end_write(_real); + return; + } auto * txn = list[((uint)lrand48())%listlen]; fd_funk_txn_t * txn2 = get_real_txn(txn); @@ -428,6 +434,15 @@ struct fake_funk { assert(memcmp(fd_funk_val(rec, _wksp), rec2->data(), rec2->size()) == 0); assert(rec->part == rec2->_part); } + + fd_funk_txn_t * txn_map = fd_funk_txn_map( _real, fd_funk_wksp( _real ) ); + fd_funk_txn_t * txn = fd_funk_txn_query( xid, txn_map ); + auto* rec3 = fd_funk_rec_query_global(_real, txn, rec->pair.key, NULL); + if( ( rec->flags & FD_FUNK_REC_FLAG_ERASE ) ) + assert(rec3 == NULL); + else + assert(rec == rec3); + assert(!rec2->_touched); rec2->_touched = true; } diff --git a/src/funk/test_funk_rec.c b/src/funk/test_funk_rec.c index 1ee752a8c2..30cf0951e4 100644 --- a/src/funk/test_funk_rec.c +++ b/src/funk/test_funk_rec.c @@ -113,8 +113,8 @@ main( int argc, rec_t * rrec = rec_query_global( ref, NULL, rkey ); fd_funk_rec_t const * trec = fd_funk_rec_query_global( tst, NULL, tkey, NULL ); - if( !rrec ) FD_TEST( !trec ); - else FD_TEST( trec && xid_eq( fd_funk_rec_xid( trec ), rrec->txn ? rrec->txn->xid : 0UL ) ); + if( !rrec || rrec->erase ) FD_TEST( !trec ); + else FD_TEST( trec && xid_eq( fd_funk_rec_xid( trec ), rrec->txn ? rrec->txn->xid : 0UL ) ); FD_TEST( fd_funk_rec_test( NULL, NULL )==FD_FUNK_ERR_INVAL ); FD_TEST( fd_funk_rec_test( NULL, trec )==FD_FUNK_ERR_INVAL ); @@ -144,7 +144,7 @@ main( int argc, fd_funk_rec_t * mrec = fd_funk_rec_modify( tst, &rec_map[0] ); - if( fd_funk_rec_map_query( rec_map, fd_funk_rec_pair( &rec_map[0] ), NULL )!=&rec_map[0] ) { + if( fd_funk_rec_map_query_const( rec_map, fd_funk_rec_pair( &rec_map[0] ), NULL )!=&rec_map[0] ) { FD_TEST( err==FD_FUNK_ERR_KEY ); FD_TEST( !mrec ); } else { @@ -228,7 +228,7 @@ main( int argc, rec_t * rrec = rec_query_global( ref, rtxn, rkey ); fd_funk_rec_t const * trec = fd_funk_rec_query_global( tst, ttxn, tkey, NULL ); - if( !rrec ) FD_TEST( !trec ); + if( !rrec || rrec->erase ) FD_TEST( !trec ); else { FD_TEST( trec && xid_eq( fd_funk_rec_xid( trec ), rrec->txn ? rrec->txn->xid : 0UL ) ); int is_frozen = (rrec->txn ? txn_is_frozen( rrec->txn ) : funk_is_frozen( ref )); diff --git a/src/funk/test_funk_txn2.cxx b/src/funk/test_funk_txn2.cxx index 2545334f97..4b87f59cb2 100644 --- a/src/funk/test_funk_txn2.cxx +++ b/src/funk/test_funk_txn2.cxx @@ -7,7 +7,7 @@ int main(int argc, char** argv) { srand(1234); fake_funk ff(&argc, &argv); - for (uint loop = 0; loop < 100U; ++loop) { + for (uint loop = 0; loop < 5000U; ++loop) { for (uint i = 0; i < 10; ++i) ff.random_insert(); ff.verify(); @@ -53,8 +53,11 @@ int main(int argc, char** argv) { for (uint i = 0; i < 10; ++i) ff.random_remove(); ff.verify(); + ff.random_publish_into_parent(); + ff.verify(); ff.random_merge(); ff.verify(); + if( loop % 100 == 0 ) FD_LOG_NOTICE(( "iter %u", loop )); } printf("test passed!\n"); diff --git a/src/util/tmpl/fd_map_giant.c b/src/util/tmpl/fd_map_giant.c index c3c50e4805..907eeb6233 100644 --- a/src/util/tmpl/fd_map_giant.c +++ b/src/util/tmpl/fd_map_giant.c @@ -747,7 +747,10 @@ MAP_(insert)( MAP_T * join, map->key_cnt++; /* Consider eliminating this to help make completely concurrent lockfree? */ /* ... and map the newly allocated element to key (this is also - guaranteed to not have collisions as per contract). */ + guaranteed to not have collisions as per contract). Note that + elements appear in the chain in order of newest to oldest. This + property is NECESSARY for an important optimization in + fd_funk_rec_query_global. */ ulong hash = MAP_KEY_HASH( (key), (map->seed) ); ulong * head = MAP_(private_list)( map ) + ( hash & (map->list_cnt-1UL) );