From 5ac9a7207698434906046dfc3a068cca74a27548 Mon Sep 17 00:00:00 2001 From: Yunhao Zhang Date: Mon, 25 Nov 2024 20:20:43 +0000 Subject: [PATCH] Address some FIXMEs for wen-restart --- src/app/fdctl/run/tiles/fd_gossip.c | 75 +++++++++------- src/app/fdctl/run/tiles/fd_replay.c | 63 +++++++------ src/disco/restart/fd_restart.c | 133 ++++++++++++++++++++-------- src/disco/restart/fd_restart.h | 29 ++++-- 4 files changed, 194 insertions(+), 106 deletions(-) diff --git a/src/app/fdctl/run/tiles/fd_gossip.c b/src/app/fdctl/run/tiles/fd_gossip.c index bb07416b53..9cf0fb7409 100644 --- a/src/app/fdctl/run/tiles/fd_gossip.c +++ b/src/app/fdctl/run/tiles/fd_gossip.c @@ -293,40 +293,7 @@ static void gossip_deliver_fun( fd_crds_data_t * data, void * arg ) { fd_gossip_tile_ctx_t * ctx = (fd_gossip_tile_ctx_t *)arg; - if( fd_crds_data_is_restart_last_voted_fork_slots( data ) ) { - ulong struct_len = sizeof( fd_gossip_restart_last_voted_fork_slots_t ); - /* TODO: handle RunLengthEncoding for bitmap */ - ulong bitmap_len = data->inner.restart_last_voted_fork_slots.offsets.inner.raw_offsets.offsets.bits.bits_len; - if( FD_UNLIKELY( bitmap_len>LAST_VOTED_FORK_MAX_BITMAP_BYTES ) ) { - FD_LOG_WARNING(( "Ignore an invalid gossip message with bitmap length %lu, greater than %lu", bitmap_len, LAST_VOTED_FORK_MAX_BITMAP_BYTES )); - return; - } - - uchar * last_vote_msg_ = fd_chunk_to_laddr( ctx->replay_out_mem, ctx->replay_out_chunk ); - FD_STORE( uint, last_vote_msg_, fd_crds_data_enum_restart_last_voted_fork_slots ); - fd_memcpy( last_vote_msg_+sizeof(uint), &data->inner.restart_last_voted_fork_slots, struct_len ); - fd_memcpy( last_vote_msg_+sizeof(uint)+struct_len, - data->inner.restart_last_voted_fork_slots.offsets.inner.raw_offsets.offsets.bits.bits, - bitmap_len ); - - ulong total_len = sizeof(uint) + struct_len + bitmap_len; - fd_mcache_publish( ctx->replay_out_mcache, ctx->replay_out_depth, ctx->replay_out_seq, 1UL, ctx->replay_out_chunk, - total_len, 0UL, 0, 0 ); - ctx->replay_out_seq = fd_seq_inc( ctx->replay_out_seq, 1UL ); - ctx->replay_out_chunk = fd_dcache_compact_next( ctx->replay_out_chunk, total_len, ctx->replay_out_chunk0, ctx->replay_out_wmark ); - } else if( fd_crds_data_is_restart_heaviest_fork( data ) ) { - uchar * heaviest_fork_msg_ = fd_chunk_to_laddr( ctx->replay_out_mem, ctx->replay_out_chunk ); - FD_STORE( uint, heaviest_fork_msg_, fd_crds_data_enum_restart_heaviest_fork ); - fd_memcpy( heaviest_fork_msg_+sizeof(uint), - &data->inner.restart_heaviest_fork, - sizeof(fd_gossip_restart_heaviest_fork_t) ); - - ulong total_len = sizeof(uint) + sizeof(fd_gossip_restart_heaviest_fork_t); - fd_mcache_publish( ctx->replay_out_mcache, ctx->replay_out_depth, ctx->replay_out_seq, 1UL, ctx->replay_out_chunk, - total_len, 0UL, 0, 0 ); - ctx->replay_out_seq = fd_seq_inc( ctx->replay_out_seq, 1UL ); - ctx->replay_out_chunk = fd_dcache_compact_next( ctx->replay_out_chunk, total_len, ctx->replay_out_chunk0, ctx->replay_out_wmark ); - } else if( fd_crds_data_is_vote( data ) ) { + if( fd_crds_data_is_vote( data ) ) { fd_gossip_vote_t const * gossip_vote = &data->inner.vote; if( verify_vote_txn( gossip_vote ) != 0 ) { return; @@ -382,6 +349,46 @@ gossip_deliver_fun( fd_crds_data_t * data, void * arg ) { // duplicate_shred->chunk_len, 0UL, 0, 0 ); // ctx->duplicate_shred_out_seq = fd_seq_inc( ctx->duplicate_shred_out_seq, 1UL ); // ctx->duplicate_shred_out_chunk = fd_dcache_compact_next( ctx->duplicate_shred_out_chunk, duplicate_shred->chunk_len, ctx->duplicate_shred_out_chunk0, ctx->duplicate_shred_out_wmark ); + } else if( fd_crds_data_is_restart_last_voted_fork_slots( data ) ) { + ulong struct_len = sizeof( fd_gossip_restart_last_voted_fork_slots_t ); + uchar * last_vote_msg_ = fd_chunk_to_laddr( ctx->replay_out_mem, ctx->replay_out_chunk ); + FD_STORE( uint, last_vote_msg_, fd_crds_data_enum_restart_last_voted_fork_slots ); + + ulong bitmap_len = 0; + uchar * bitmap_dst = last_vote_msg_+sizeof(uint)+struct_len; + if ( FD_LIKELY( data->inner.restart_last_voted_fork_slots.offsets.discriminant==fd_restart_slots_offsets_enum_raw_offsets ) ) { + uchar * bitmap_src = data->inner.restart_last_voted_fork_slots.offsets.inner.raw_offsets.offsets.bits.bits; + bitmap_len = data->inner.restart_last_voted_fork_slots.offsets.inner.raw_offsets.offsets.bits.bits_len; + memcpy( bitmap_dst, bitmap_src, bitmap_len ); + } else { + uchar bitmap_src [ LAST_VOTED_FORK_MAX_BITMAP_BYTES ]; + fd_restart_convert_runlength_to_raw_bitmap( &data->inner.restart_last_voted_fork_slots, bitmap_src, &bitmap_len ); + if( FD_UNLIKELY( bitmap_len > LAST_VOTED_FORK_MAX_BITMAP_BYTES ) ) { + FD_LOG_WARNING(( "Ignore an invalid gossip message with bitmap length greater than %lu", LAST_VOTED_FORK_MAX_BITMAP_BYTES )); + return; + } + memcpy( bitmap_dst, bitmap_src, bitmap_len ); + } + /* Copy the struct to the buffer now because it may be modified by fd_restart_convert_runlength_to_raw_bitmap */ + fd_memcpy( last_vote_msg_+sizeof(uint), &data->inner.restart_last_voted_fork_slots, struct_len ); + + ulong total_len = sizeof(uint) + struct_len + bitmap_len; + fd_mcache_publish( ctx->replay_out_mcache, ctx->replay_out_depth, ctx->replay_out_seq, 1UL, ctx->replay_out_chunk, + total_len, 0UL, 0, 0 ); + ctx->replay_out_seq = fd_seq_inc( ctx->replay_out_seq, 1UL ); + ctx->replay_out_chunk = fd_dcache_compact_next( ctx->replay_out_chunk, total_len, ctx->replay_out_chunk0, ctx->replay_out_wmark ); + } else if( fd_crds_data_is_restart_heaviest_fork( data ) ) { + uchar * heaviest_fork_msg_ = fd_chunk_to_laddr( ctx->replay_out_mem, ctx->replay_out_chunk ); + FD_STORE( uint, heaviest_fork_msg_, fd_crds_data_enum_restart_heaviest_fork ); + fd_memcpy( heaviest_fork_msg_+sizeof(uint), + &data->inner.restart_heaviest_fork, + sizeof(fd_gossip_restart_heaviest_fork_t) ); + + ulong total_len = sizeof(uint) + sizeof(fd_gossip_restart_heaviest_fork_t); + fd_mcache_publish( ctx->replay_out_mcache, ctx->replay_out_depth, ctx->replay_out_seq, 1UL, ctx->replay_out_chunk, + total_len, 0UL, 0, 0 ); + ctx->replay_out_seq = fd_seq_inc( ctx->replay_out_seq, 1UL ); + ctx->replay_out_chunk = fd_dcache_compact_next( ctx->replay_out_chunk, total_len, ctx->replay_out_chunk0, ctx->replay_out_wmark ); } } diff --git a/src/app/fdctl/run/tiles/fd_replay.c b/src/app/fdctl/run/tiles/fd_replay.c index 98d23ee974..03820d5de2 100644 --- a/src/app/fdctl/run/tiles/fd_replay.c +++ b/src/app/fdctl/run/tiles/fd_replay.c @@ -470,7 +470,11 @@ during_frag( fd_replay_tile_ctx_t * ctx, FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->pack_in_chunk0, ctx->pack_in_wmark )); } - fd_memcpy( ctx->restart_gossip_msg, fd_chunk_to_laddr( ctx->gossip_in_mem, chunk ), sz ); + if( FD_LIKELY( ctx->in_wen_restart ) ) { + fd_memcpy( ctx->restart_gossip_msg, fd_chunk_to_laddr( ctx->gossip_in_mem, chunk ), sz ); + } else { + FD_LOG_WARNING(( "Received a gossip message for wen-restart while FD is not in wen-restart mode" )); + } return; } @@ -893,23 +897,21 @@ after_frag( fd_replay_tile_ctx_t * ctx, (void)tsorig; if( FD_UNLIKELY( in_idx==GOSSIP_IN_IDX ) ) { - if( FD_UNLIKELY( !ctx->in_wen_restart ) ) { - FD_LOG_WARNING(( "Received gossip messages for wen-restart while FD is not in wen-restart mode" )); - } else { - ulong heaviest_fork_found = 0; - fd_restart_recv_gossip_msg( ctx->restart, ctx->restart_gossip_msg, &heaviest_fork_found ); - if( FD_UNLIKELY( heaviest_fork_found ) ) { - ulong need_repair = 0; - fd_restart_find_heaviest_fork_bank_hash( ctx->restart, ctx->funk, ctx->blockstore, &need_repair ); - if( FD_LIKELY( need_repair ) ) { - /* Send the heaviest fork slot to the store tile for repair and replay */ - uchar * buf = fd_chunk_to_laddr( ctx->store_out_mem, ctx->store_out_chunk ); - FD_STORE( ulong, buf, ctx->restart->heaviest_fork_slot ); - fd_mcache_publish( ctx->store_out_mcache, ctx->store_out_depth, ctx->store_out_seq, 1UL, ctx->store_out_chunk, - sizeof(ulong), 0UL, 0, 0 ); - ctx->store_out_seq = fd_seq_inc( ctx->store_out_seq, 1UL ); - ctx->store_out_chunk = fd_dcache_compact_next( ctx->store_out_chunk, sizeof(ulong), ctx->store_out_chunk0, ctx->store_out_wmark ); - } + if( FD_UNLIKELY( !ctx->in_wen_restart ) ) return; + + ulong heaviest_fork_found = 0; + fd_restart_recv_gossip_msg( ctx->restart, ctx->restart_gossip_msg, &heaviest_fork_found ); + if( FD_UNLIKELY( heaviest_fork_found ) ) { + ulong need_repair = 0; + fd_restart_find_heaviest_fork_bank_hash( ctx->restart, ctx->funk, ctx->blockstore, &need_repair ); + if( FD_LIKELY( need_repair ) ) { + /* Send the heaviest fork slot to the store tile for repair and replay */ + uchar * buf = fd_chunk_to_laddr( ctx->store_out_mem, ctx->store_out_chunk ); + FD_STORE( ulong, buf, ctx->restart->heaviest_fork_slot ); + fd_mcache_publish( ctx->store_out_mcache, ctx->store_out_depth, ctx->store_out_seq, 1UL, ctx->store_out_chunk, + sizeof(ulong), 0UL, 0, 0 ); + ctx->store_out_seq = fd_seq_inc( ctx->store_out_seq, 1UL ); + ctx->store_out_chunk = fd_dcache_compact_next( ctx->store_out_chunk, sizeof(ulong), ctx->store_out_chunk0, ctx->store_out_wmark ); } } return; @@ -1467,8 +1469,21 @@ after_credit( fd_replay_tile_ctx_t * ctx, ulong buf_len = 0; uchar * buf = fd_chunk_to_laddr( ctx->gossip_out_mem, ctx->gossip_out_chunk ); fd_sysvar_slot_history_read( ctx->slot_ctx, fd_scratch_virtual(), ctx->slot_ctx->slot_history ); + + fd_epoch_bank_t * epoch_bank = fd_exec_epoch_ctx_epoch_bank( ctx->slot_ctx->epoch_ctx ); + fd_stakes_t * stakes = &epoch_bank->stakes; + /* FIXME: Restoring funk checkpoint does not give the correct epoch number, + * i.e., the epoch number when the funk checkpoint file is produced. + * For now, we need to redo stakes->epoch in order to avoid a BHM in + * a local setup when repairing blocks. */ + stakes->epoch = ctx->blockstore->smr / epoch_bank->epoch_schedule.slots_per_epoch; + FD_LOG_NOTICE(( "slots_per_epoch=%lu, blockstore smr=%lu", epoch_bank->epoch_schedule.slots_per_epoch, ctx->blockstore->smr )); + FD_LOG_WARNING(( "Reset stakes->epoch=%lu (blockstore root / slots per epoch)", stakes->epoch )); + fd_restart_init( ctx->restart, + stakes->epoch, &ctx->slot_ctx->slot_bank.epoch_stakes, + &epoch_bank->next_epoch_stakes, ctx->tower, ctx->slot_ctx->slot_history, ctx->funk, @@ -1485,17 +1500,6 @@ after_credit( fd_replay_tile_ctx_t * ctx, buf_len, 0UL, 0, 0 ); ctx->gossip_out_seq = fd_seq_inc( ctx->gossip_out_seq, 1UL ); ctx->gossip_out_chunk = fd_dcache_compact_next( ctx->gossip_out_chunk, buf_len, ctx->gossip_out_chunk0, ctx->gossip_out_wmark ); - - /* FIXME: Restoring funk checkpoint does not give the correct epoch number, - * i.e., the epoch number when the funk checkpoint file is produced. - * For now, we need to redo stakes->epoch in order to avoid a BHM in - * a local setup when repairing blocks. */ - fd_epoch_bank_t * epoch_bank = fd_exec_epoch_ctx_epoch_bank( ctx->slot_ctx->epoch_ctx ); - fd_stakes_t * stakes = &epoch_bank->stakes; - stakes->epoch = ctx->blockstore->smr / epoch_bank->epoch_schedule.slots_per_epoch; - FD_LOG_NOTICE(( "slots_per_epoch=%lu, blockstore root=%lu", epoch_bank->epoch_schedule.slots_per_epoch, ctx->blockstore->smr )); - FD_LOG_WARNING(( "Reset stakes->epoch=%lu (blockstore root / slots per epoch)", stakes->epoch )); - } FD_SCRATCH_SCOPE_END; } } @@ -1549,6 +1553,7 @@ during_housekeeping( void * _ctx ) { fd_ghost_publish( ctx->ghost, smr ); } + /* TODO: generate snapshot file instead of checkpointing funk; dump the latest tower sent */ /* FIXME: Decide how to tell FD to checkpoint funk and then halt before restarting FD in wen-restart mode */ if( ctx->in_wen_restart && ctx->curr_slot>ctx->snapshot_slot+250 ) { checkpt( ctx ); diff --git a/src/disco/restart/fd_restart.c b/src/disco/restart/fd_restart.c index 4b19195885..d2fe343ae4 100644 --- a/src/disco/restart/fd_restart.c +++ b/src/disco/restart/fd_restart.c @@ -39,7 +39,9 @@ fd_restart_join( void * restart ) { void fd_restart_init( fd_restart_t * restart, - fd_vote_accounts_t const * accs, + ulong root_epoch, + fd_vote_accounts_t const * vote_acct_current_epoch, + fd_vote_accounts_t const * vote_acct_next_epoch, fd_tower_t const * tower, fd_slot_history_t const * slot_history, fd_funk_t * funk, @@ -48,32 +50,50 @@ fd_restart_init( fd_restart_t * restart, fd_pubkey_t * coordinator_pubkey, uchar * out_buf, ulong * out_buf_len ) { - restart->num_vote_accts = fd_stake_weights_by_node( accs, restart->stake_weights ); - restart->total_stake = 0; - restart->total_active_stake = 0; - restart->tower_root = tower->root; - restart->funk_root = fd_funk_last_publish( funk )->ul[0]; - FD_TEST( restart->num_vote_accts <= MAX_RESTART_PEERS ); - FD_LOG_WARNING(( "fd_restart_init: funk root=%lu, tower root=%lu", restart->funk_root, restart->tower_root )); + /* Save the vote account information of the **current** epoch */ + if( vote_acct_current_epoch->vote_accounts_root==NULL ) FD_LOG_ERR(( "vote account information for epoch#%lu is missing", root_epoch )); + restart->num_vote_accts[0] = fd_stake_weights_by_node( vote_acct_current_epoch, restart->stake_weights[0] ); + restart->total_stake[0] = 0; + restart->total_stake_received[0] = 0; + restart->total_stake_received_and_voted[0] = 0; + + FD_LOG_NOTICE(( "%lu staked voters in the current epoch", restart->num_vote_accts[0] )); + for( ulong i=0; inum_vote_accts[0]; i++ ) { + FD_LOG_NOTICE(( "fd_restart_init: %s holds stake amount=%lu in epoch#%lu", + FD_BASE58_ENC_32_ALLOCA( &restart->stake_weights[0][i].key ), + restart->stake_weights[0][i].stake, root_epoch )); + restart->total_stake[0] += restart->stake_weights[0][i].stake; + } - FD_LOG_NOTICE(( "%lu staked voters", restart->num_vote_accts )); - for( ulong i=0; inum_vote_accts; i++ ) { - FD_LOG_NOTICE(( "fd_restart_init: %s holds stake amount=%lu", - FD_BASE58_ENC_32_ALLOCA( &restart->stake_weights[i].key ), - restart->stake_weights[i].stake )); - restart->total_stake += restart->stake_weights[i].stake; + /* Save the vote account information of the **next** epoch */ + if( vote_acct_next_epoch->vote_accounts_root==NULL ) FD_LOG_ERR(( "vote account information for epoch#%lu is missing", root_epoch+1 )); + restart->num_vote_accts[1] = fd_stake_weights_by_node( vote_acct_next_epoch, restart->stake_weights[1] ); + restart->total_stake[1] = 0; + restart->total_stake_received[1] = 0; + restart->total_stake_received_and_voted[1] = 0; + + FD_LOG_NOTICE(( "%lu staked voters in the next epoch", restart->num_vote_accts[1] )); + for( ulong i=0; inum_vote_accts[1]; i++ ) { + FD_LOG_NOTICE(( "fd_restart_init: %s holds stake amount=%lu in epoch#%lu", + FD_BASE58_ENC_32_ALLOCA( &restart->stake_weights[1][i].key ), + restart->stake_weights[1][i].stake, root_epoch+1 )); + restart->total_stake[1] += restart->stake_weights[1][i].stake; } - fd_gossip_restart_last_voted_fork_slots_t * msg = (fd_gossip_restart_last_voted_fork_slots_t *) fd_type_pun( out_buf ); + /* Decide the last voted slot for the last_voted_fork_slots gossip message being sent out */ + /* TODO: dump the right tower to use when terminating FD and reload the right tower */ /* FIXME: Need to check whether this tower loaded from the funk checkpoint is the right one to use; It seems stale. */ if( fd_tower_votes_cnt( tower->votes ) == 0 ) { FD_LOG_ERR(( "The tower loaded has 0 votes and wen-restart cannot proceed without an appropriate tower" )); } + fd_gossip_restart_last_voted_fork_slots_t * msg = (fd_gossip_restart_last_voted_fork_slots_t *) fd_type_pun( out_buf ); msg->last_voted_slot = fd_tower_votes_peek_tail_const( tower->votes )->slot; if( FD_UNLIKELY( msg->last_voted_slot>=slot_history->next_slot ) ) { FD_LOG_ERR(( "Voted slot should not exceed the end of slot history" )); } + /* Given last_voted_slot, get the block hash for the last_voted_fork_slots gossip message */ + /* TODO: should this be block hash or bank hash? */ fd_blockstore_start_read( blockstore ); fd_hash_t const * vote_block_hash = fd_blockstore_block_hash_query( blockstore, msg->last_voted_slot ); fd_blockstore_end_read( blockstore ); @@ -84,14 +104,15 @@ fd_restart_init( fd_restart_t * restart, fd_memcpy( msg->last_voted_hash.hash, vote_block_hash->hash, sizeof(fd_hash_t) ); } + /* Given last_voted_slot, get the slot history bitmap for the last_voted_fork_slots gossip message */ ulong end_slot = msg->last_voted_slot; ulong start_slot = ( end_slot>LAST_VOTED_FORK_MAX_SLOTS? end_slot-LAST_VOTED_FORK_MAX_SLOTS : 0 ); ulong num_slots = end_slot-start_slot+1; msg->offsets.discriminant = fd_restart_slots_offsets_enum_raw_offsets; msg->offsets.inner.raw_offsets.offsets.has_bits = 1; msg->offsets.inner.raw_offsets.offsets.len = num_slots; - msg->offsets.inner.raw_offsets.offsets.bits.bits_len = ( num_slots+bits_per_uchar )/bits_per_uchar; - *out_buf_len = sizeof(fd_gossip_restart_last_voted_fork_slots_t) + ( num_slots+bits_per_uchar )/bits_per_uchar; + msg->offsets.inner.raw_offsets.offsets.bits.bits_len = ( num_slots-1 )/bits_per_uchar+1; + *out_buf_len = sizeof(fd_gossip_restart_last_voted_fork_slots_t) + ( num_slots-1 )/bits_per_uchar+1; FD_LOG_NOTICE(( "fd_restart_init: encoding %lu bits in bitmap", num_slots )); uchar * bitmap = out_buf + sizeof(fd_gossip_restart_last_voted_fork_slots_t); @@ -112,6 +133,13 @@ fd_restart_init( fd_restart_t * restart, } } + /* Initialize the other fields of fd_restart_t */ + restart->root_epoch = root_epoch; + restart->tower_root = tower->root; + restart->funk_root = fd_funk_last_publish( funk )->ul[0]; + FD_TEST( restart->num_vote_accts[0] <= MAX_RESTART_PEERS ); + FD_LOG_WARNING(( "fd_restart_init: funk root=%lu, tower root=%lu", restart->funk_root, restart->tower_root )); + restart->stage = WR_STAGE_FIND_HEAVIEST_FORK_SLOT_NUM; restart->heaviest_fork_slot = 0; restart->heaviest_fork_ready = 0; @@ -135,16 +163,17 @@ fd_restart_recv_last_voted_fork_slots( fd_restart_t * restart, return; } + /* TODO: do this for both epochs */ ulong stake = ULONG_MAX; fd_pubkey_t * pubkey = &msg->from; - for( ulong i=0; inum_vote_accts; i++ ) { - if( FD_UNLIKELY( memcmp( pubkey->key, restart->stake_weights[i].key.key, sizeof(fd_pubkey_t) )==0 ) ) { - if( FD_UNLIKELY( restart->last_voted_fork_slots_received[i] ) ) { + for( ulong i=0; inum_vote_accts[0]; i++ ) { + if( FD_UNLIKELY( memcmp( pubkey->key, restart->stake_weights[0][i].key.key, sizeof(fd_pubkey_t) )==0 ) ) { + if( FD_UNLIKELY( restart->last_voted_fork_slots_received[0][i] ) ) { FD_LOG_NOTICE(( "Duplicate last_voted_fork_slots message from %s", FD_BASE58_ENC_32_ALLOCA( pubkey ) )); return; } - stake = restart->stake_weights[i].stake; - restart->last_voted_fork_slots_received[i] = 1; + stake = restart->stake_weights[0][i].stake; + restart->last_voted_fork_slots_received[0][i] = 1; break; } } @@ -153,17 +182,15 @@ fd_restart_recv_last_voted_fork_slots( fd_restart_t * restart, return; } - restart->total_active_stake += stake; - ulong percentile = restart->total_active_stake * 100 / restart->total_stake; + restart->total_stake_received[0] += stake; + ulong percentile = restart->total_stake_received[0] * 100 / restart->total_stake[0]; FD_LOG_NOTICE(( "Total active stake: %lu/%lu = %lu%\n", - restart->total_active_stake, - restart->total_stake, + restart->total_stake_received[0], + restart->total_stake[0], percentile)); - if( FD_UNLIKELY( msg->offsets.discriminant==fd_restart_slots_offsets_enum_run_length_encoding ) ) { - FD_LOG_ERR(( "Decoding RunLengthEncoding offsets is not implemented yet" )); - } - + FD_LOG_WARNING(( "last_voted_slot=%lu, offsets_len=%lu", msg->last_voted_slot, msg->offsets.inner.raw_offsets.offsets.len )); + /* Decode the slot history bitmap in the gossip message, and aggregate stake into slot_to_stake accordingly */ for( ulong i=0, last_voted_slot = msg->last_voted_slot; \ ioffsets.inner.raw_offsets.offsets.len; i++ ) { if( FD_UNLIKELY( last_voted_slottower_root+i ) ) break; @@ -175,12 +202,14 @@ fd_restart_recv_last_voted_fork_slots( fd_restart_t * restart, if( FD_LIKELY( bit ) ) { ulong offset = slot-restart->tower_root; restart->slot_to_stake[ offset ] += stake; + /* TODO: repair slots >=stake_threshold along the way instead of waiting till the end? */ } } + /* TODO: take care of both epochs, which requires the 33% threshold */ if( FD_UNLIKELY( percentile>=WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT ) ) { - ulong stake_threshold = restart->total_active_stake - - restart->total_stake * HEAVIEST_FORK_THRESHOLD_DELTA_PERCENT / 100UL; + ulong stake_threshold = restart->total_stake_received[0] + - restart->total_stake[0] * HEAVIEST_FORK_THRESHOLD_DELTA_PERCENT / 100UL; FD_LOG_NOTICE(( "Stake threshold: %lu", stake_threshold )); restart->heaviest_fork_slot = restart->tower_root; @@ -194,6 +223,8 @@ fd_restart_recv_last_voted_fork_slots( fd_restart_t * restart, FD_LOG_ERR(( "Funk root(%lu) is higher than the heaviest fork slot(%lu)", restart->funk_root, restart->heaviest_fork_slot )); } + /* TODO: verify that all slots >=stake_threshold form a single chain in blockstore and contain the funk root */ + /* But, at this point, maybe there are still slots need to be repaired? */ *out_heaviest_fork_found = 1; restart->stage = WR_STAGE_FIND_HEAVIEST_FORK_BANK_HASH; @@ -213,7 +244,7 @@ fd_restart_recv_heaviest_fork( fd_restart_t * restart, sizeof(fd_hash_t) ); restart->coordinator_heaviest_fork_ready = 1; } else { - FD_LOG_WARNING(( "Received a restart_heaviest_fork message from non-coordinator %s", + FD_LOG_WARNING(( "Received and ignored a restart_heaviest_fork message from non-coordinator %s", FD_BASE58_ENC_32_ALLOCA( &msg->from ) )); } } @@ -266,6 +297,7 @@ fd_restart_find_heaviest_fork_bank_hash( fd_restart_t * restart, *out_need_repair = 1; } + /* TODO: if we do the repair along the way, we cannot do this cleanup below... */ /* Cancel txns after the funk root from funk */ fd_funk_start_write( funk ); for( ulong slot=restart->funk_root+1; slot<=restart->heaviest_fork_slot; slot++ ) { @@ -303,8 +335,9 @@ fd_restart_verify_heaviest_fork( fd_restart_t * restart, if( FD_UNLIKELY( memcmp( restart->my_pubkey.key, restart->coordinator_pubkey.key, sizeof(fd_pubkey_t) )==0 ) ) { - // I am the wen-restart coordinator + /* I am the wen-restart coordinator */ if( FD_UNLIKELY( !restart->coordinator_heaviest_fork_sent ) ) { + /* TODO: send this message periodically? */ restart->coordinator_heaviest_fork_sent = 1; fd_gossip_restart_heaviest_fork_t * msg = (fd_gossip_restart_heaviest_fork_t *) fd_type_pun( out_buf ); msg->observed_stake = 0; @@ -313,7 +346,7 @@ fd_restart_verify_heaviest_fork( fd_restart_t * restart, *out_send = 1; } } else if( FD_UNLIKELY( restart->coordinator_heaviest_fork_ready==1 ) ) { - // I am not the wen-restart coordinator + /* I am not the wen-restart coordinator */ if( restart->heaviest_fork_slot!=restart->coordinator_heaviest_fork_slot ) { FD_LOG_ERR(( "Heaviest fork mismatch: my slot=%lu, coordinator slot=%lu", restart->heaviest_fork_slot, restart->coordinator_heaviest_fork_slot )); @@ -326,10 +359,38 @@ fd_restart_verify_heaviest_fork( fd_restart_t * restart, FD_BASE58_ENC_32_ALLOCA( &restart->heaviest_fork_bank_hash ), FD_BASE58_ENC_32_ALLOCA( &restart->coordinator_heaviest_fork_bank_hash ) )); } - /* TODO: generate an incremental snapshot */ + /* TODO: insert a hard fork and generate an incremental snapshot */ restart->stage = WR_STAGE_GENERATE_SNAPSHOT; FD_LOG_ERR(( "Wen-restart succeeds with slot=%lu, bank hash=%s", restart->heaviest_fork_slot, FD_BASE58_ENC_32_ALLOCA( &restart->heaviest_fork_bank_hash ) )); } } } + +void +fd_restart_convert_runlength_to_raw_bitmap( fd_gossip_restart_last_voted_fork_slots_t * msg, + uchar * out_bitmap, + ulong * out_bitmap_len ) { + ulong bit_cnt = 0; + *out_bitmap_len = 0; + fd_memset( out_bitmap, 0, LAST_VOTED_FORK_MAX_BITMAP_BYTES ); + + for ( ulong i=0, bit=1; ioffsets.inner.run_length_encoding.offsets_len; i++ ) { + ushort cnt = msg->offsets.inner.run_length_encoding.offsets[i].bits; + if( FD_UNLIKELY( *out_bitmap_len > LAST_VOTED_FORK_MAX_BITMAP_BYTES ) ) return; + if( bit ) { + for ( ulong pos=bit_cnt; posoffsets.discriminant = fd_restart_slots_offsets_enum_raw_offsets; + msg->offsets.inner.raw_offsets.offsets.has_bits = 1; + msg->offsets.inner.raw_offsets.offsets.len = bit_cnt; + msg->offsets.inner.raw_offsets.offsets.bits.bits_len = *out_bitmap_len; +} + +/* TODO: the coordinator needs to aggregate HeaviestFork messages, just for the information */ diff --git a/src/disco/restart/fd_restart.h b/src/disco/restart/fd_restart.h index 184a761b4e..00f4429244 100644 --- a/src/disco/restart/fd_restart.h +++ b/src/disco/restart/fd_restart.h @@ -14,8 +14,8 @@ #define RESTART_MAGIC_TAG 128UL /* Protocol parameters of wen-restart */ +#define MAX_EPOCHS 2UL #define HEAVIEST_FORK_THRESHOLD_DELTA_PERCENT 38UL -#define REPAIR_THRESHOLD_PERCENT 42UL #define WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT 80UL #define LAST_VOTED_FORK_MAX_SLOTS 0xFFFFUL @@ -42,14 +42,18 @@ struct fd_restart { /* States initialized at the beginning */ ulong funk_root; ulong tower_root; - ulong total_stake; - ulong num_vote_accts; - fd_stake_weight_t stake_weights[ MAX_RESTART_PEERS ]; + ulong root_epoch; + /* TODO: it seems that we can remove tower_root and only use funk_root */ + + ulong total_stake[ MAX_EPOCHS ]; + ulong num_vote_accts[ MAX_EPOCHS ]; + fd_stake_weight_t stake_weights[ MAX_EPOCHS ][ MAX_RESTART_PEERS ]; /* States maintained by the FIND_HEAVIEST_FORK_SLOT_NUM stage */ - ulong total_active_stake; + ulong total_stake_received[ MAX_EPOCHS ]; + ulong total_stake_received_and_voted[ MAX_EPOCHS ]; + uchar last_voted_fork_slots_received[ MAX_EPOCHS ][ MAX_RESTART_PEERS ]; ulong slot_to_stake[ LAST_VOTED_FORK_MAX_SLOTS ]; - uchar last_voted_fork_slots_received[ MAX_RESTART_PEERS ]; /* States maintained by the FIND_HEAVIEST_FORK_BANK_HASH stage */ fd_pubkey_t my_pubkey; @@ -98,7 +102,9 @@ fd_restart_join( void * restart ); in the wen-restart protocol (fd_gossip_restart_last_voted_fork_slots_t). */ void fd_restart_init( fd_restart_t * restart, - fd_vote_accounts_t const * accs, + ulong root_epoch, + fd_vote_accounts_t const * vote_acct_current_epoch, + fd_vote_accounts_t const * vote_acct_next_epoch, fd_tower_t const * tower, fd_slot_history_t const * slot_history, fd_funk_t * funk, @@ -150,4 +156,13 @@ fd_restart_verify_heaviest_fork( fd_restart_t * restart, uchar * out_buf, ulong * out_send ); +/* fd_restart_convert_runlength_to_raw_bitmap converts the bitmap in + a last_voted_fork_slots message from the run length encoding into + raw encoding. It is invoked in the gossip tile before forwarding + this gossip message to the replay tile. Therefore, the replay tile + could assume raw encoding of bitmap when processing the message. */ +void +fd_restart_convert_runlength_to_raw_bitmap( fd_gossip_restart_last_voted_fork_slots_t * msg, + uchar * out_bitmap, + ulong * out_bitmap_len ); #endif