From fec73ebb99f8e2221029fb57dbb54a1f46207cd7 Mon Sep 17 00:00:00 2001 From: danwt <30197399+danwt@users.noreply.github.com> Date: Tue, 16 Apr 2024 15:15:40 +0100 Subject: [PATCH] make sure errors are informational --- da/avail/avail.go | 14 ++++---- da/celestia/celestia.go | 7 ++-- node/node.go | 10 +++--- settlement/dymension/dymension.go | 53 ++++++++++++++++--------------- 4 files changed, 45 insertions(+), 39 deletions(-) diff --git a/da/avail/avail.go b/da/avail/avail.go index 4121e7cec..650f180b1 100644 --- a/da/avail/avail.go +++ b/da/avail/avail.go @@ -281,14 +281,14 @@ func (c *DataAvailabilityLayerClient) submitBatchLoop(dataBlob []byte) da.Result Error: err, }, } - } else { - c.logger.Error("broadcasting batch, emitting DA unhealthy event and trying again", "error", err) - res, err := da.SubmitBatchHealthEventHelper(c.pubsubServer, c.ctx, err) - if err != nil { - return res - } - continue } + err = fmt.Errorf("broadcast data blob: %w", err) + c.logger.Error("broadcasting batch, emitting DA unhealthy event and trying again", "error", err) + res, err := da.SubmitBatchHealthEventHelper(c.pubsubServer, c.ctx, err) + if err != nil { + return res + } + continue } c.logger.Debug("Successfully submitted DA batch") diff --git a/da/celestia/celestia.go b/da/celestia/celestia.go index f500ee240..9580f613e 100644 --- a/da/celestia/celestia.go +++ b/da/celestia/celestia.go @@ -212,10 +212,10 @@ func (c *DataAvailabilityLayerClient) SubmitBatch(batch *types.Batch) da.ResultS return da.ResultSubmitBatch{} default: - c.logger.Info("Submitting DA batch") // TODO(srene): Split batch in multiple blobs if necessary if supported height, commitment, err := c.submit(data) if err != nil { + err = fmt.Errorf("submit batch: %w", err) c.logger.Error("submit DA batch. Emitting health event and trying again", "error", err) res, err := da.SubmitBatchHealthEventHelper(c.pubsubServer, c.ctx, err) if err != nil { @@ -232,9 +232,12 @@ func (c *DataAvailabilityLayerClient) SubmitBatch(batch *types.Batch) da.ResultS Namespace: c.config.NamespaceID.Bytes(), } + c.logger.Info("submitted DA batch") + result := c.CheckBatchAvailability(daMetaData) if result.Code != da.StatusSuccess { - c.logger.Error("Unable to confirm submitted blob availability. Retrying") + err = fmt.Errorf("submitted batch but did not get availability success: %w", err) + c.logger.Error("unable to confirm submitted blob availability, retrying") res, err := da.SubmitBatchHealthEventHelper(c.pubsubServer, c.ctx, err) if err != nil { return res diff --git a/node/node.go b/node/node.go index 2e7d88f37..54384d4b7 100644 --- a/node/node.go +++ b/node/node.go @@ -303,17 +303,17 @@ func (n *Node) GetGenesisChunks() ([]string, error) { func (n *Node) OnStop() { err := n.dalc.Stop() if err != nil { - n.Logger.Error("while stopping data availability layer client", "error", err) + n.Logger.Error("stop data availability layer client", "error", err) } err = n.settlementlc.Stop() if err != nil { - n.Logger.Error("while stopping settlement layer client", "error", err) + n.Logger.Error("stop settlement layer client", "error", err) } err = n.P2P.Close() if err != nil { - n.Logger.Error("while stopping P2P client", "error", err) + n.Logger.Error("stop P2P client", "error", err) } } @@ -383,10 +383,10 @@ func (n *Node) onBaseLayerHealthUpdate(event pubsub.Message) { switch e := event.Data().(type) { case *settlement.EventDataHealth: haveNewErr = e.Error != nil - n.baseLayerHealth.setSettlement(e.Error) + n.baseLayerHealth.setSettlement(fmt.Errorf("settlement layer: %w", e.Error)) case *da.EventDataHealth: haveNewErr = e.Error != nil - n.baseLayerHealth.setDA(e.Error) + n.baseLayerHealth.setDA(fmt.Errorf("data availability layer: %w", e.Error)) } newStatus := n.baseLayerHealth.get() newStatusIsDifferentFromOldOne := (oldStatus == nil) != (newStatus == nil) diff --git a/settlement/dymension/dymension.go b/settlement/dymension/dymension.go index 16d296fb6..442fd749e 100644 --- a/settlement/dymension/dymension.go +++ b/settlement/dymension/dymension.go @@ -230,7 +230,7 @@ func (d *HubClient) PostBatch(batch *types.Batch, daClient da.Client, daResult * err := d.submitBatch(msgUpdateState) if err != nil { - err = fmt.Errorf("submit batch:%w", err) + err = fmt.Errorf("submit batch: %w", err) utilevent.MustPublish(d.ctx, d.pubsub, &settlement.EventDataHealth{Error: err}, settlement.EventHealthStatusList) @@ -262,7 +262,7 @@ func (d *HubClient) PostBatch(batch *types.Batch, daClient da.Client, daResult * return fmt.Errorf("subscription canceled: %w", err) case <-subscription.Out(): utilevent.MustPublish(d.ctx, d.pubsub, &settlement.EventDataHealth{}, settlement.EventHealthStatusList) - d.logger.Debug("batch accepted by settlement layer. emitted healthy event", + d.logger.Debug("batch accepted by settlement layer, emitted healthy event", "startHeight", batch.StartHeight, "endHeight", batch.EndHeight) return nil case <-timer.C: @@ -271,7 +271,7 @@ func (d *HubClient) PostBatch(batch *types.Batch, daClient da.Client, daResult * includedBatch, err := d.waitForBatchInclusion(batch.StartHeight) if err != nil { - err = fmt.Errorf("%w:%w", settlement.ErrBatchNotAccepted, err) + err = fmt.Errorf("wait for batch inclusion: %w: %w", settlement.ErrBatchNotAccepted, err) utilevent.MustPublish(d.ctx, d.pubsub, &settlement.EventDataHealth{Error: err}, settlement.EventHealthStatusList) @@ -303,17 +303,19 @@ func (d *HubClient) PostBatch(batch *types.Batch, daClient da.Client, daResult * func (d *HubClient) GetLatestBatch(rollappID string) (*settlement.ResultRetrieveBatch, error) { var latestStateInfoIndexResp *rollapptypes.QueryGetLatestStateIndexResponse - err := d.RunWithRetry(func() error { - var err error - latestStateInfoIndexResp, err = d.rollappQueryClient.LatestStateIndex(d.ctx, - &rollapptypes.QueryGetLatestStateIndexRequest{RollappId: d.config.RollappID}) + err := d.RunWithRetry( + func() error { + var err error + latestStateInfoIndexResp, err = d.rollappQueryClient.LatestStateIndex(d.ctx, + &rollapptypes.QueryGetLatestStateIndexRequest{RollappId: d.config.RollappID}) - if status.Code(err) == codes.NotFound { - return retry.Unrecoverable(settlement.ErrBatchNotFound) - } + if status.Code(err) == codes.NotFound { + return retry.Unrecoverable(settlement.ErrBatchNotFound) + } - return err - }) + return err + }, + ) if err != nil { return nil, err } @@ -401,8 +403,7 @@ func (d *HubClient) submitBatch(msgUpdateState *rollapptypes.MsgUpdateState) err err := d.RunWithRetry(func() error { txResp, err := d.client.BroadcastTx(d.config.DymAccountName, msgUpdateState) if err != nil || txResp.Code != 0 { - d.logger.Error("sending batch to settlement layer", "error", err) - return err + return fmt.Errorf("broadcast tx: %w", err) } return nil }) @@ -551,17 +552,19 @@ func (d *HubClient) convertStateInfoToResultRetrieveBatch(stateInfo *rollapptype // TODO: bullet proof check as theoretically the tx can stay in the mempool longer then our retry attempts. func (d *HubClient) waitForBatchInclusion(batchStartHeight uint64) (*settlement.ResultRetrieveBatch, error) { var res *settlement.ResultRetrieveBatch - err := d.RunWithRetry(func() error { - latestBatch, err := d.GetLatestBatch(d.config.RollappID) - if err != nil { - return err - } - if latestBatch.Batch.StartHeight != batchStartHeight { - return settlement.ErrBatchNotFound - } - res = latestBatch - return nil - }) + err := d.RunWithRetry( + func() error { + latestBatch, err := d.GetLatestBatch(d.config.RollappID) + if err != nil { + return fmt.Errorf("get latest batch: %w", err) + } + if latestBatch.Batch.StartHeight != batchStartHeight { + return fmt.Errorf("latest batch start height not match expected start height: %w", settlement.ErrBatchNotFound) + } + res = latestBatch + return nil + }, + ) return res, err }