Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use finalized reference block #350

Merged
merged 6 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion contracts/src/core/EigenDAServiceManagerStorage.sol
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ abstract contract EigenDAServiceManagerStorage is IEigenDAServiceManager {
* time that nodes can be active after they have deregistered. The larger it is, the farther back stakes can be used, but the longer operators
* have to serve after they've deregistered.
*/
uint32 public constant BLOCK_STALE_MEASURE = 150;
uint32 public constant BLOCK_STALE_MEASURE = 300;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How was this calculated?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This number has to be greater than FinalizationBlockDelay + BatchInterval/12

If the batch interval is 10 minutes, this evaluates to 150 blocks. I doubled it to ensure we were never running up against this constraint.

I think that 30 minutes has negligible UX implications, given that it is small relative to the 14 days that operators are expected to serve after deregistration.

cc @gpsanant

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, add this to the comment? It's useful to know and to maintain this value in the future.

On the other hand it raises maintenance question as already 2 offchain parameters can affect this onchain parameter (or conversely, this impacts the choice of batch interval). 10 mins batch interval is 10*60/12=50 blocks, it means it cannot be significantly larger

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default FinalizationBlockDelay is 75


/// @notice The current batchId
uint32 public batchId;
Expand Down
3 changes: 3 additions & 0 deletions disperser/batcher/batcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ type Config struct {
BatchSizeMBLimit uint
MaxNumRetriesPerBlob uint

FinalizationBlockDelay uint

TargetNumChunks uint
MaxBlobsToFetchFromStore int
}
Expand Down Expand Up @@ -110,6 +112,7 @@ func NewBatcher(
EncodingQueueLimit: config.EncodingRequestQueueSize,
TargetNumChunks: config.TargetNumChunks,
MaxBlobsToFetchFromStore: config.MaxBlobsToFetchFromStore,
FinalizationBlockDelay: config.FinalizationBlockDelay,
}
encodingWorkerPool := workerpool.New(config.NumConnections)
encodingStreamer, err := NewEncodingStreamer(streamerConfig, queue, chainState, encoderClient, assignmentCoordinator, batchTrigger, encodingWorkerPool, metrics.EncodingStreamerMetrics, logger)
Expand Down
5 changes: 4 additions & 1 deletion disperser/batcher/batcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,12 @@ func makeBatcher(t *testing.T) (*batcherComponents, *bat.Batcher, func() []time.
// Common Components
logger := logging.NewNoopLogger()

finalizationBlockDelay := uint(75)

// Core Components
cst, err := coremock.MakeChainDataMock(10)
assert.NoError(t, err)
cst.On("GetCurrentBlockNumber").Return(uint(10), nil)
cst.On("GetCurrentBlockNumber").Return(uint(10)+finalizationBlockDelay, nil)
asgn := &core.StdAssignmentCoordinator{}
transactor := &coremock.MockTransactor{}
transactor.On("OperatorIDToAddress").Return(gethcommon.Address{}, nil)
Expand All @@ -99,6 +101,7 @@ func makeBatcher(t *testing.T) (*batcherComponents, *bat.Batcher, func() []time.
BatchSizeMBLimit: 100,
SRSOrder: 3000,
MaxNumRetriesPerBlob: 2,
FinalizationBlockDelay: finalizationBlockDelay,
}
timeoutConfig := bat.TimeoutConfig{
EncodingTimeout: 10 * time.Second,
Expand Down
6 changes: 6 additions & 0 deletions disperser/batcher/encoding_streamer.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ type StreamerConfig struct {

// Maximum number of Blobs to fetch from store
MaxBlobsToFetchFromStore int

FinalizationBlockDelay uint
}

type EncodingStreamer struct {
Expand Down Expand Up @@ -208,6 +210,10 @@ func (e *EncodingStreamer) RequestEncoding(ctx context.Context, encoderChan chan
if err != nil {
return fmt.Errorf("failed to get current block number, won't request encoding: %w", err)
} else {
if blockNumber > e.FinalizationBlockDelay {
blockNumber -= e.FinalizationBlockDelay
}

e.mu.Lock()
e.ReferenceBlockNumber = blockNumber
e.mu.Unlock()
Expand Down
13 changes: 7 additions & 6 deletions disperser/batcher/encoding_streamer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ var (
EncodingRequestTimeout: 5 * time.Second,
EncodingQueueLimit: 100,
MaxBlobsToFetchFromStore: 10,
FinalizationBlockDelay: 75,
}
)

Expand Down Expand Up @@ -206,7 +207,7 @@ func TestStreamingEncoding(t *testing.T) {
assert.Nil(t, err)
assert.Equal(t, disperser.Processing, metadata.BlobStatus)

c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10), nil)
c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10)+encodingStreamer.FinalizationBlockDelay, nil)

out := make(chan batcher.EncodingResultOrStatus)
err = encodingStreamer.RequestEncoding(context.Background(), out)
Expand Down Expand Up @@ -322,7 +323,7 @@ func TestEncodingFailure(t *testing.T) {
metadataKey, err := blobStore.StoreBlob(ctx, &blob, uint64(time.Now().UnixNano()))
assert.Nil(t, err)

cst.On("GetCurrentBlockNumber").Return(uint(10), nil)
cst.On("GetCurrentBlockNumber").Return(uint(10)+encodingStreamer.FinalizationBlockDelay, nil)
encoderClient.On("EncodeBlob", tmock.Anything, tmock.Anything, tmock.Anything).Return(nil, nil, errors.New("errrrr"))
// request encoding
out := make(chan batcher.EncodingResultOrStatus)
Expand Down Expand Up @@ -350,7 +351,7 @@ func TestEncodingFailure(t *testing.T) {
func TestPartialBlob(t *testing.T) {
encodingStreamer, c := createEncodingStreamer(t, 10, 1e12, streamerConfig)

c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10), nil)
c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10)+encodingStreamer.FinalizationBlockDelay, nil)

out := make(chan batcher.EncodingResultOrStatus)

Expand Down Expand Up @@ -498,7 +499,7 @@ func TestIncorrectParameters(t *testing.T) {
metadataKey, err := c.blobStore.StoreBlob(ctx, &blob, uint64(time.Now().UnixNano()))
assert.Nil(t, err)

c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10), nil)
c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10)+encodingStreamer.FinalizationBlockDelay, nil)

// request encoding
out := make(chan batcher.EncodingResultOrStatus)
Expand All @@ -519,7 +520,7 @@ func TestIncorrectParameters(t *testing.T) {
func TestInvalidQuorum(t *testing.T) {
encodingStreamer, c := createEncodingStreamer(t, 10, 1e12, streamerConfig)

c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10), nil)
c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10)+encodingStreamer.FinalizationBlockDelay, nil)

out := make(chan batcher.EncodingResultOrStatus)

Expand Down Expand Up @@ -601,7 +602,7 @@ func TestGetBatch(t *testing.T) {
assert.Nil(t, err)
assert.Equal(t, disperser.Processing, metadata2.BlobStatus)

c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10), nil)
c.chainDataMock.On("GetCurrentBlockNumber").Return(uint(10)+encodingStreamer.FinalizationBlockDelay, nil)

// request encoding
out := make(chan batcher.EncodingResultOrStatus)
Expand Down
1 change: 1 addition & 0 deletions disperser/cmd/batcher/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func NewConfig(ctx *cli.Context) (Config, error) {
MaxNumRetriesPerBlob: ctx.GlobalUint(flags.MaxNumRetriesPerBlobFlag.Name),
TargetNumChunks: ctx.GlobalUint(flags.TargetNumChunksFlag.Name),
MaxBlobsToFetchFromStore: ctx.GlobalInt(flags.MaxBlobsToFetchFromStoreFlag.Name),
FinalizationBlockDelay: ctx.GlobalUint(flags.FinalizationBlockDelayFlag.Name),
},
TimeoutConfig: batcher.TimeoutConfig{
EncodingTimeout: ctx.GlobalDuration(flags.EncodingTimeoutFlag.Name),
Expand Down
8 changes: 8 additions & 0 deletions disperser/cmd/batcher/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,13 @@ var (
EnvVar: common.PrefixEnvVar(envVarPrefix, "MAX_BLOBS_TO_FETCH_FROM_STORE"),
Value: 100,
}
FinalizationBlockDelayFlag = cli.UintFlag{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we query Beacon chain for finalized epoch to get the finalized block?

Name: common.PrefixFlag(FlagPrefix, "finalization-block-delay"),
Usage: "The block delay to use for pulling operator state in order to ensure the state is finalized",
Required: false,
EnvVar: common.PrefixEnvVar(envVarPrefix, "FINALIZATION_BLOCK_DELAY"),
Value: 75,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to wait for finalization? What's the downside of using a non-final reference block?
My understanding of the reason why we may want to use an older reference block is to prevent requesting newer blocks than what indexer has indexed. For this purpose, 5~10 blocks seems sufficient.
One downside of having this number too big might a UX concern where operator state is applied that much later. wdyt?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The advantage is that the OperatorState which we use is unlikely to change due to a reorg. Using 15 minutes here makes it very unlikely that we'll ever fail a batch due to a reorg as we were seeing on goerli.

I'm not really sure that a period of 15 minutes has any important UX drawbacks. We are mostly moving in the direction of decreasing the frequency of state updates (AVSSync takes us to stake updates once per week and we may do something similar with operator registrations), so I think 15 minutes here doesn't really pose a big issue.

I may be missing something though?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed on the minimal impact on UX and the advantage of this approach.
UX drawback was the only thing i can think of, but my concern is if there is anything else we might be missing here because we've only tested under the scenario where reference block was very recent, and we haven't seen any issues with operator state being reorg'd.
This is probably fine, but we should definitely test this in the wild for some time.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and we haven't seen any issues with operator state being reorg'd.

Haven't we? I think the assumption has been that the reorg issues we've been seeing on testnet were exactly this.

But yeah, let's see if it causes any unexpected issues. I can't think if any right now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another part is the operator experience where they will be held longer for dispersal after opt-out.
Overall I think it's an improvement for robustness.

Copy link
Contributor

@bxue-l2 bxue-l2 Mar 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

according to this twitter thread, it looks like on average 20 reorg per day. So in the worst case, need to wait 20 blocks. https://twitter.com/terencechain/status/1768664666996355178

Copy link
Collaborator Author

@mooselumph mooselumph Mar 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does 20 reorgs / day mean we need to wait 20 blocks? I don't understand why these numbers would be related.

}
)

var requiredFlags = []cli.Flag{
Expand Down Expand Up @@ -207,6 +214,7 @@ var optionalFlags = []cli.Flag{
MaxNumRetriesPerBlobFlag,
TargetNumChunksFlag,
MaxBlobsToFetchFromStoreFlag,
FinalizationBlockDelayFlag,
}

// Flags contains the list of configuration options available to the binary.
Expand Down
16 changes: 1 addition & 15 deletions inabox/deploy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,6 @@ func (env *Config) generateChurnerVars(ind int, graphUrl, logPath, grpcPort stri
CHURNER_GRAPH_URL: graphUrl,
CHURNER_INDEXER_PULL_INTERVAL: "1s",

CHURNER_STD_LOG_LEVEL: "debug",
CHURNER_FILE_LOG_LEVEL: "debug",
CHURNER_LOG_PATH: logPath,

CHURNER_ENABLE_METRICS: "true",
CHURNER_METRICS_HTTP_PORT: "9095",
}
Expand Down Expand Up @@ -206,12 +202,8 @@ func (env *Config) generateBatcherVars(ind int, key, graphUrl, logPath string) B
BATCHER_BLS_OPERATOR_STATE_RETRIVER: env.EigenDA.OperatorStateRetreiver,
BATCHER_EIGENDA_SERVICE_MANAGER: env.EigenDA.ServiceManager,
BATCHER_SRS_ORDER: "300000",
BATCHER_SRS_LOAD: "300000",
BATCHER_CHAIN_RPC: "",
BATCHER_PRIVATE_KEY: key[2:],
BATCHER_STD_LOG_LEVEL: "debug",
BATCHER_FILE_LOG_LEVEL: "debug",
BATCHER_LOG_PATH: logPath,
BATCHER_GRAPH_URL: graphUrl,
BATCHER_USE_GRAPH: "true",
BATCHER_BATCH_SIZE_LIMIT: "10240", // 10 GiB
Expand All @@ -224,6 +216,7 @@ func (env *Config) generateBatcherVars(ind int, key, graphUrl, logPath string) B
BATCHER_ENCODING_REQUEST_QUEUE_SIZE: "500",
BATCHER_NUM_CONFIRMATIONS: "0",
BATCHER_MAX_BLOBS_TO_FETCH_FROM_STORE: "100",
BATCHER_FINALIZATION_BLOCK_DELAY: "5",
}

env.applyDefaults(&v, "BATCHER", "batcher", ind)
Expand Down Expand Up @@ -306,9 +299,6 @@ func (env *Config) generateOperatorVars(ind int, name, key, churnerUrl, logPath,
NODE_VERBOSE: "true",
NODE_CHAIN_RPC: "",
NODE_PRIVATE_KEY: key[2:],
NODE_STD_LOG_LEVEL: "debug",
NODE_FILE_LOG_LEVEL: "debug",
NODE_LOG_PATH: logPath,
NODE_NUM_BATCH_VALIDATORS: "128",
NODE_PUBLIC_IP_PROVIDER: "mockip",
NODE_PUBLIC_IP_CHECK_INTERVAL: "10s",
Expand Down Expand Up @@ -345,10 +335,6 @@ func (env *Config) generateRetrieverVars(ind int, key string, graphUrl, logPath,
RETRIEVER_CACHE_ENCODED_BLOBS: "false",

RETRIEVER_INDEXER_PULL_INTERVAL: "1s",

RETRIEVER_STD_LOG_LEVEL: "debug",
RETRIEVER_FILE_LOG_LEVEL: "debug",
RETRIEVER_LOG_PATH: logPath,
}

v.RETRIEVER_G2_PATH = ""
Expand Down
60 changes: 33 additions & 27 deletions inabox/deploy/env_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ type DisperserVars struct {

DISPERSER_SERVER_NUM_CONFIRMATIONS string

DISPERSER_SERVER_STD_LOG_LEVEL string

DISPERSER_SERVER_FILE_LOG_LEVEL string
DISPERSER_SERVER_LOG_LEVEL string

DISPERSER_SERVER_LOG_PATH string

DISPERSER_SERVER_LOG_FORMAT string

DISPERSER_SERVER_BUCKET_SIZES string

DISPERSER_SERVER_BUCKET_MULTIPLIERS string
Expand All @@ -43,8 +43,6 @@ type DisperserVars struct {

DISPERSER_SERVER_BUCKET_STORE_SIZE string

DISPERSER_SERVER_ALLOWLIST string

DISPERSER_SERVER_AWS_REGION string

DISPERSER_SERVER_AWS_ACCESS_KEY_ID string
Expand All @@ -64,6 +62,8 @@ type DisperserVars struct {
DISPERSER_SERVER_PER_USER_UNAUTH_BLOB_RATE string

DISPERSER_SERVER_CLIENT_IP_HEADER string

DISPERSER_SERVER_ALLOWLIST string
}

func (vars DisperserVars) getEnvMap() map[string]string {
Expand Down Expand Up @@ -98,8 +98,6 @@ type BatcherVars struct {

BATCHER_SRS_ORDER string

BATCHER_SRS_LOAD string

BATCHER_METRICS_HTTP_PORT string

BATCHER_INDEXER_DATA_DIR string
Expand All @@ -116,22 +114,30 @@ type BatcherVars struct {

BATCHER_FINALIZER_INTERVAL string

BATCHER_FINALIZER_POOL_SIZE string

BATCHER_ENCODING_REQUEST_QUEUE_SIZE string

BATCHER_MAX_NUM_RETRIES_PER_BLOB string

BATCHER_TARGET_NUM_CHUNKS string

BATCHER_MAX_BLOBS_TO_FETCH_FROM_STORE string

BATCHER_FINALIZATION_BLOCK_DELAY string

BATCHER_CHAIN_RPC string

BATCHER_PRIVATE_KEY string

BATCHER_NUM_CONFIRMATIONS string

BATCHER_STD_LOG_LEVEL string

BATCHER_FILE_LOG_LEVEL string
BATCHER_LOG_LEVEL string

BATCHER_LOG_PATH string

BATCHER_LOG_FORMAT string

BATCHER_INDEXER_PULL_INTERVAL string

BATCHER_AWS_REGION string
Expand All @@ -141,8 +147,6 @@ type BatcherVars struct {
BATCHER_AWS_SECRET_ACCESS_KEY string

BATCHER_AWS_ENDPOINT_URL string

BATCHER_MAX_BLOBS_TO_FETCH_FROM_STORE string
}

func (vars BatcherVars) getEnvMap() map[string]string {
Expand Down Expand Up @@ -183,11 +187,13 @@ type EncoderVars struct {

DISPERSER_ENCODER_PRELOAD_ENCODER string

DISPERSER_ENCODER_STD_LOG_LEVEL string
DISPERSER_ENCODER_G2_POWER_OF_2_PATH string

DISPERSER_ENCODER_FILE_LOG_LEVEL string
DISPERSER_ENCODER_LOG_LEVEL string

DISPERSER_ENCODER_LOG_PATH string

DISPERSER_ENCODER_LOG_FORMAT string
}

func (vars EncoderVars) getEnvMap() map[string]string {
Expand Down Expand Up @@ -264,8 +270,6 @@ type OperatorVars struct {

NODE_G2_PATH string

NODE_G2_POWER_OF_2_PATH string

NODE_CACHE_PATH string

NODE_SRS_ORDER string
Expand All @@ -280,17 +284,19 @@ type OperatorVars struct {

NODE_PRELOAD_ENCODER string

NODE_G2_POWER_OF_2_PATH string

NODE_CHAIN_RPC string

NODE_PRIVATE_KEY string

NODE_NUM_CONFIRMATIONS string

NODE_STD_LOG_LEVEL string

NODE_FILE_LOG_LEVEL string
NODE_LOG_LEVEL string

NODE_LOG_PATH string

NODE_LOG_FORMAT string
}

func (vars OperatorVars) getEnvMap() map[string]string {
Expand Down Expand Up @@ -327,8 +333,6 @@ type RetrieverVars struct {

RETRIEVER_G2_PATH string

RETRIEVER_G2_POWER_OF_2_PATH string

RETRIEVER_CACHE_PATH string

RETRIEVER_SRS_ORDER string
Expand All @@ -343,18 +347,20 @@ type RetrieverVars struct {

RETRIEVER_PRELOAD_ENCODER string

RETRIEVER_G2_POWER_OF_2_PATH string

RETRIEVER_CHAIN_RPC string

RETRIEVER_PRIVATE_KEY string

RETRIEVER_NUM_CONFIRMATIONS string

RETRIEVER_STD_LOG_LEVEL string

RETRIEVER_FILE_LOG_LEVEL string
RETRIEVER_LOG_LEVEL string

RETRIEVER_LOG_PATH string

RETRIEVER_LOG_FORMAT string

RETRIEVER_INDEXER_PULL_INTERVAL string
}

Expand Down Expand Up @@ -390,12 +396,12 @@ type ChurnerVars struct {

CHURNER_NUM_CONFIRMATIONS string

CHURNER_STD_LOG_LEVEL string

CHURNER_FILE_LOG_LEVEL string
CHURNER_LOG_LEVEL string

CHURNER_LOG_PATH string

CHURNER_LOG_FORMAT string

CHURNER_INDEXER_PULL_INTERVAL string
}

Expand Down
Loading
Loading