Skip to content

Commit

Permalink
funk: sped up query_global, eliminated speed load feature
Browse files Browse the repository at this point in the history
  • Loading branch information
asiegel-jt committed Dec 30, 2024
1 parent 18be65e commit c35f75f
Show file tree
Hide file tree
Showing 16 changed files with 196 additions and 294 deletions.
1 change: 0 additions & 1 deletion src/app/fdctl/run/tiles/fd_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,6 @@ prepare_new_block_execution( fd_replay_tile_ctx_t * ctx,
xid.ul[0] = fork->slot_ctx.slot_bank.slot;
/* push a new transaction on the stack */
fd_funk_start_write( ctx->funk );
FD_TEST( !ctx->funk->speed_load );
fork->slot_ctx.funk_txn = fd_funk_txn_prepare(ctx->funk, fork->slot_ctx.funk_txn, &xid, 1);
fd_funk_end_write( ctx->funk );

Expand Down
5 changes: 0 additions & 5 deletions src/flamenco/snapshot/fd_snapshot.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,6 @@ fd_snapshot_load( const char * snapshotfile,
}

fd_funk_start_write( slot_ctx->acc_mgr->funk );
/* Speed load currently has long term memory usage consequences
which are unacceptable. Consider turning it back on when we have a
better design. */
fd_funk_speed_load_mode( slot_ctx->acc_mgr->funk, 0 );

fd_funk_txn_t * par_txn = slot_ctx->funk_txn;
fd_funk_txn_t * child_txn = slot_ctx->funk_txn;
Expand Down Expand Up @@ -212,6 +208,5 @@ fd_snapshot_load( const char * snapshotfile,

fd_rewards_recalculate_partitioned_rewards( slot_ctx );

fd_funk_speed_load_mode( slot_ctx->acc_mgr->funk, 0 );
fd_funk_end_write( slot_ctx->acc_mgr->funk );
}
10 changes: 0 additions & 10 deletions src/funk/fd_funk.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,6 @@ fd_funk_new( void * shmem,
partvec->num_part = 0U;
funk->partvec_gaddr = fd_wksp_gaddr_fast( wksp, partvec );

/* Speed load mode is off by default */
fd_funk_speed_load_mode( funk, 0 );
funk->speed_bump_gaddr = 0UL;
funk->speed_bump_remain = 0UL;

funk->write_lock = 0UL;

FD_COMPILER_MFENCE();
Expand Down Expand Up @@ -472,8 +467,3 @@ fd_funk_check_write( fd_funk_t * funk ) {
ulong val = funk->write_lock;
if( FD_UNLIKELY(!(val&1UL)) ) FD_LOG_CRIT(( "missing call to fd_funk_start_write" ));
}

void
fd_funk_speed_load_mode( fd_funk_t * funk, int flag ) {
funk->speed_load = flag;
}
13 changes: 0 additions & 13 deletions src/funk/fd_funk.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,6 @@ struct __attribute__((aligned(FD_FUNK_ALIGN))) fd_funk_private {

ulong alloc_gaddr; /* Non-zero wksp gaddr with tag wksp tag */

int speed_load; /* Is "speed load mode" active */
/* Address and size of remaining bump allocation space */
ulong speed_bump_gaddr;
ulong speed_bump_remain;

/* Padding to FD_FUNK_ALIGN here */
};

Expand Down Expand Up @@ -490,14 +485,6 @@ fd_funk_last_publish_descendant( fd_funk_t * funk,

/* Misc */

/* Enable/disable "speed load mode". When in this mode, record values
are bump allocated and never freed. This speeds up the case where
we are initializing the database with a vast number of
mostly read-only records. */

void
fd_funk_speed_load_mode( fd_funk_t * funk, int flag );

/* fd_funk_verify verifies the integrity of funk. Returns
FD_FUNK_SUCCESS if funk appears to be intact and FD_FUNK_ERR_INVAL
otherwise (logs details). Assumes funk is a current local join (NULL
Expand Down
13 changes: 6 additions & 7 deletions src/funk/fd_funk_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,17 +230,16 @@ fd_funk_txn_xid_set_root( fd_funk_txn_xid_t * x ) {
return x;
}

/* fd_funk_xid_key_pair_hash provides a family of hashes that hash a
(xid,key) pair to by p to a uniform quasi-random 64-bit integer.
seed selects the particular hash function to use and can be an
arbitrary 64-bit value. Returns the hash. The hash functions are
high quality but not cryptographically secure. Assumes p is in the
caller's address space and valid. */
/* fd_funk_xid_key_pair_hash produces a 64-bit hash case for a
xid_key_pair. Assumes p is in the caller's address space and valid. */

FD_FN_PURE static inline ulong
fd_funk_xid_key_pair_hash( fd_funk_xid_key_pair_t const * p,
ulong seed ) {
return fd_funk_txn_xid_hash( p->xid, seed ) ^ fd_funk_rec_key_hash( p->key, seed );
/* We ignore the xid part of the key because we need all the instances
of a given record key to appear in the same hash
chain. fd_funk_rec_query_global depends on this. */
return fd_funk_rec_key_hash( p->key, seed );
}

/* fd_funk_xid_key_pair_eq returns 1 if (xid,key) pair pointed to by pa
Expand Down
73 changes: 48 additions & 25 deletions src/funk/fd_funk_rec.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,43 +46,59 @@ fd_funk_rec_query_global( fd_funk_t * funk,
fd_funk_txn_t const * txn,
fd_funk_rec_key_t const * key,
fd_funk_txn_t const ** txn_out ) {

if( FD_UNLIKELY( (!funk) | (!key) ) ) return NULL;

fd_wksp_t * wksp = fd_funk_wksp( funk );

fd_funk_txn_t * txn_map = fd_funk_txn_map( funk, wksp );
fd_funk_rec_t * rec_map = fd_funk_rec_map( funk, wksp );

if( txn ) { /* Query txn and its in-prep ancestors */
/* For record ele in all records in chain that match key. (This
code was adapted from the map_giant template ... ideally would
use a map chain iterator ala map_para template). */

fd_funk_txn_t * txn_map = fd_funk_txn_map( funk, wksp );
/* Note: the iteration order will be such that the record
for a key in a descendent of a transaction will be presented
before a record for that key in that transaction. This allows us
to succeed on the first hit (the newest transaction). It is
NECESSARY that fd_funk_rec_map_insert preserve this property. */

ulong txn_max = funk->txn_max;
fd_funk_rec_map_private_t * priv = fd_funk_rec_map_private( rec_map );
ulong hash = fd_funk_rec_key_hash( key, priv->seed );
ulong * head = fd_funk_rec_map_private_list( priv ) + ( hash & (priv->list_cnt-1UL) );
ulong * cur = head;

ulong txn_idx = (ulong)(txn - txn_map);
for(;;) {
ulong ele_idx = fd_funk_rec_map_private_unbox_idx( *cur );
if( fd_funk_rec_map_private_is_null( ele_idx ) ) break;
fd_funk_rec_t * ele = rec_map + ele_idx;
if( FD_LIKELY( hash == ele->map_hash ) && FD_LIKELY( fd_funk_rec_key_eq( key, ele->pair.key ) ) ) {

if( FD_UNLIKELY( (txn_idx>=txn_max) /* Out of map (incl NULL) */ | (txn!=(txn_map+txn_idx)) /* Bad alignment */ ) )
return NULL;
/* For cur_txn in path from [txn] to [root] where root is NULL */

/* TODO: const correct and/or fortify? */
do {
fd_funk_xid_key_pair_t pair[1]; fd_funk_xid_key_pair_init( pair, fd_funk_txn_xid( txn ), key );
fd_funk_rec_t const * rec = fd_funk_rec_map_query_const( rec_map, pair, NULL );
if( FD_LIKELY( rec ) ) {
if( FD_UNLIKELY(NULL != txn_out ) ) {
*txn_out = txn;
for( fd_funk_txn_t const * cur_txn = txn; ; cur_txn = fd_funk_txn_parent( cur_txn, txn_map ) ) {
/* If record ele is part of transaction cur_txn, we have a
match. According to the property above, this will be the
youngest descendent in the transaction stack. */

int match = FD_UNLIKELY( cur_txn ) ? /* opt for root find (FIXME: eliminate branch with cmov into txn_xid_eq?) */
fd_funk_txn_xid_eq( &cur_txn->xid, ele->pair.xid ) :
fd_funk_txn_xid_eq_root( ele->pair.xid );

if( FD_LIKELY( match ) ) {
if( txn_out ) *txn_out = cur_txn;
return ( FD_UNLIKELY( ele->flags & FD_FUNK_REC_FLAG_ERASE ) ? NULL : ele );
}
return rec;

if( cur_txn == NULL ) break;
}
txn = fd_funk_txn_parent( (fd_funk_txn_t *)txn, txn_map );
} while( FD_UNLIKELY( txn ) );

}
cur = &ele->map_next;
}

/* Query the last published transaction */

fd_funk_xid_key_pair_t pair[1]; fd_funk_xid_key_pair_init( pair, fd_funk_root( funk ), key );
return fd_funk_rec_map_query_const( rec_map, pair, NULL );
if( txn_out ) *txn_out = NULL;
return NULL;
}

void *
Expand Down Expand Up @@ -538,10 +554,7 @@ fd_funk_rec_write_prepare( fd_funk_t * funk,
/* Grow the record to the right size */
rec->flags &= ~FD_FUNK_REC_FLAG_ERASE;
if ( fd_funk_val_sz( rec ) < min_val_size ) {
if( funk->speed_load )
rec = fd_funk_val_speed_load( funk, rec, min_val_size, wksp, opt_err );
else
rec = fd_funk_val_truncate( rec, min_val_size, fd_funk_alloc( funk, wksp ), wksp, opt_err );
rec = fd_funk_val_truncate( rec, min_val_size, fd_funk_alloc( funk, wksp ), wksp, opt_err );
}

return rec;
Expand Down Expand Up @@ -605,6 +618,11 @@ fd_funk_rec_verify( fd_funk_t * funk ) {
TEST( (rec_idx<rec_max) && (fd_funk_txn_idx( rec_map[ rec_idx ].txn_cidx )==txn_idx) && rec_map[ rec_idx ].tag==0U );
rec_map[ rec_idx ].tag = 1U;
cnt++;
fd_funk_rec_t const * rec2 = fd_funk_rec_query_global( funk, NULL, rec_map[ rec_idx ].pair.key, NULL );
if( FD_UNLIKELY( rec_map[ rec_idx ].flags & FD_FUNK_REC_FLAG_ERASE ) )
TEST( rec2 == NULL );
else
TEST( rec2 = rec_map + rec_idx );
ulong next_idx = rec_map[ rec_idx ].next_idx;
if( !fd_funk_rec_idx_is_null( next_idx ) ) TEST( rec_map[ next_idx ].prev_idx==rec_idx );
rec_idx = next_idx;
Expand All @@ -620,6 +638,11 @@ fd_funk_rec_verify( fd_funk_t * funk ) {
TEST( (rec_idx<rec_max) && (fd_funk_txn_idx( rec_map[ rec_idx ].txn_cidx )==txn_idx) && rec_map[ rec_idx ].tag==0U );
rec_map[ rec_idx ].tag = 1U;
cnt++;
fd_funk_rec_t const * rec2 = fd_funk_rec_query_global( funk, txn, rec_map[ rec_idx ].pair.key, NULL );
if( FD_UNLIKELY( rec_map[ rec_idx ].flags & FD_FUNK_REC_FLAG_ERASE ) )
TEST( rec2 == NULL );
else
TEST( rec2 = rec_map + rec_idx );
ulong next_idx = rec_map[ rec_idx ].next_idx;
if( !fd_funk_rec_idx_is_null( next_idx ) ) TEST( rec_map[ next_idx ].prev_idx==rec_idx );
rec_idx = next_idx;
Expand Down
16 changes: 4 additions & 12 deletions src/funk/fd_funk_rec.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
rec's flag are reserved to be used in conjunction with the ERASE flag.
- ERASE indicates a record in an in-preparation transaction should be
erased if and when the in-preparation transaction is published. If
set, there will be no value resources used by this record. Will not
be set on a published record. Will not be set if an in-preparation
transaction ancestor has this record with erase set. If set, the
first ancestor transaction encountered (going from youngest to
oldest) will not have erased set.
If the ERASE flag is set, then the five most significant bytes of the
flags field for the record will be used to store user-specified data. */
erased if and when the in-preparation transaction is published. If
set on a published record, it serves as a tombstone.
If set, there will be no value resources used by this record. */

#define FD_FUNK_REC_FLAG_ERASE (1UL<<0)

Expand Down Expand Up @@ -71,8 +65,6 @@ struct __attribute__((aligned(FD_FUNK_REC_ALIGN))) fd_funk_rec {
ulong next_part_idx; /* Record map index of next record in partition chain */
uint part; /* Partition number, FD_FUNK_PART_NULL if none */

int val_no_free; /* If set, do not call alloc_free on the value */

/* Padding to FD_FUNK_REC_ALIGN here (TODO: consider using self index
in the structures to accelerate indexing computations if padding
permits as this structure is currently has 8 bytes of padding) */
Expand Down Expand Up @@ -448,7 +440,7 @@ fd_funk_rec_remove( fd_funk_t * funk,
ulong erase_data );


/* When a record is erased there is metadata stored in the five most
/* When a record is erased there is metadata stored in the five most
significant bytes of a record. These are helpers to make setting
and getting these values simple. The caller is responsible for doing
a check on the flag of the record before using the value of the erase
Expand Down
Loading

0 comments on commit c35f75f

Please sign in to comment.