From 20e478994592225290d767f717d19b91465f3112 Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:15:44 -0500 Subject: [PATCH 1/3] Add state-commitment flag to checkpoint-collect-stats This commit adds optional state-commitment flag to util program checkpoint-collect-stats command. When this new flag is provided, program only collects stats of one trie with specified state commitment, instead of all tries in checkpoint file (currently 500 tries). This is to reduce noise when tracking growth of single trie over time. --- cmd/util/cmd/checkpoint-collect-stats/cmd.go | 47 +++++++++++++++++--- ledger/complete/ledger_stats.go | 13 ++++-- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/cmd/util/cmd/checkpoint-collect-stats/cmd.go b/cmd/util/cmd/checkpoint-collect-stats/cmd.go index 29c7bd1c5ef..e454e158504 100644 --- a/cmd/util/cmd/checkpoint-collect-stats/cmd.go +++ b/cmd/util/cmd/checkpoint-collect-stats/cmd.go @@ -17,9 +17,11 @@ import ( "github.com/onflow/atree" + "github.com/onflow/flow-go/cmd/util/ledger/util" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/ledger/common/pathfinder" "github.com/onflow/flow-go/ledger/complete" + "github.com/onflow/flow-go/ledger/complete/mtrie/trie" "github.com/onflow/flow-go/ledger/complete/wal" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/module/metrics" @@ -27,9 +29,14 @@ import ( ) var ( - flagCheckpointDir string - flagOutputDir string - flagMemProfile bool + flagCheckpointDir string + flagStateCommitment string + flagOutputDir string + flagMemProfile bool +) + +const ( + ledgerStatsReportName = "ledger.stats.json" ) var Cmd = &cobra.Command{ @@ -43,6 +50,11 @@ func init() { "Directory to load checkpoint files from") _ = Cmd.MarkFlagRequired("checkpoint-dir") + // state-commitment is optional. + // When provided, this program only gathers stats on trie with matching state commitment. + Cmd.Flags().StringVar(&flagStateCommitment, "state-commitment", "", + "Trie state commitment") + Cmd.Flags().StringVar(&flagOutputDir, "output-dir", "", "Directory to write checkpoint stats to") _ = Cmd.MarkFlagRequired("output-dir") @@ -111,8 +123,33 @@ func run(*cobra.Command, []string) { var key ledger.Key var size, valueSize int + var tries []*trie.MTrie + + if flagStateCommitment != "" { + stateCommitment := util.ParseStateCommitment(flagStateCommitment) + + t, err := led.FindTrieByStateCommit(stateCommitment) + if err != nil { + log.Fatal().Err(err).Msgf("failed to find trie with state commitment %x", stateCommitment) + } + if t == nil { + log.Fatal().Msgf("no trie with state commitment %x", stateCommitment) + } + + tries = append(tries, t) + } else { + ts, err := led.Tries() + if err != nil { + log.Fatal().Err(err).Msg("failed to get tries") + } + + tries = append(tries, ts...) + } + + log.Info().Msgf("collecting stats on %d tries", len(tries)) + valueSizesByType := make(sizesByType, 0) - ledgerStats, err := led.CollectStats(func(p *ledger.Payload) { + ledgerStats, err := complete.CollectStats(tries, func(p *ledger.Payload) { key, err = p.Key() if err != nil { log.Fatal().Err(err).Msg("cannot load a key") @@ -191,7 +228,7 @@ func run(*cobra.Command, []string) { }, } - path := filepath.Join(flagOutputDir, "ledger.stats.json") + path := filepath.Join(flagOutputDir, ledgerStatsReportName) fi, err := os.Create(path) if err != nil { diff --git a/ledger/complete/ledger_stats.go b/ledger/complete/ledger_stats.go index 74062d5718a..c706004fc6c 100644 --- a/ledger/complete/ledger_stats.go +++ b/ledger/complete/ledger_stats.go @@ -6,6 +6,7 @@ import ( "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/ledger/complete/mtrie/flattener" "github.com/onflow/flow-go/ledger/complete/mtrie/node" + "github.com/onflow/flow-go/ledger/complete/mtrie/trie" ) type LedgerStats struct { @@ -16,14 +17,18 @@ type LedgerStats struct { } func (l *Ledger) CollectStats(payloadCallBack func(payload *ledger.Payload)) (*LedgerStats, error) { - visitedNodes := make(map[*node.Node]uint64) - var interimNodeCounter, leafNodeCounter, totalNodeCounter uint64 - tries, err := l.Tries() if err != nil { return nil, err } + return CollectStats(tries, payloadCallBack) +} + +func CollectStats(tries []*trie.MTrie, payloadCallBack func(payload *ledger.Payload)) (*LedgerStats, error) { + visitedNodes := make(map[*node.Node]uint64) + var interimNodeCounter, leafNodeCounter, totalNodeCounter uint64 + bar := progressbar.Default(int64(len(tries)), "collecting ledger stats") for _, trie := range tries { for itr := flattener.NewUniqueNodeIterator(trie.RootNode(), visitedNodes); itr.Next(); { @@ -38,7 +43,7 @@ func (l *Ledger) CollectStats(payloadCallBack func(payload *ledger.Payload)) (*L visitedNodes[n] = totalNodeCounter totalNodeCounter++ } - if err = bar.Add(1); err != nil { + if err := bar.Add(1); err != nil { return nil, err } } From 26eed6fba408b536131d211dd1fdd745a0c6f128 Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:53:36 -0500 Subject: [PATCH 2/3] Support payloads file in checkpoint-collect-stats This commit adds support for input data from payloads file. Payloads file is an alternative input and output for migration. --- cmd/util/cmd/checkpoint-collect-stats/cmd.go | 124 ++++++++++++++----- 1 file changed, 90 insertions(+), 34 deletions(-) diff --git a/cmd/util/cmd/checkpoint-collect-stats/cmd.go b/cmd/util/cmd/checkpoint-collect-stats/cmd.go index e454e158504..0646c0551c4 100644 --- a/cmd/util/cmd/checkpoint-collect-stats/cmd.go +++ b/cmd/util/cmd/checkpoint-collect-stats/cmd.go @@ -31,6 +31,7 @@ import ( var ( flagCheckpointDir string flagStateCommitment string + flagPayloads string flagOutputDir string flagMemProfile bool ) @@ -41,20 +42,26 @@ const ( var Cmd = &cobra.Command{ Use: "checkpoint-collect-stats", - Short: "collects stats on tries stored in a checkpoint", - Run: run, + Short: "collects stats on tries stored in a checkpoint, or payloads from a payloads file", + Long: `checkpoint-collect-stats collects stats on tries stored in a checkpoint, or payloads from a payloads file. +Two kinds of input data are supported: +- checkpoint file(s) ("--checkpoint-dir" with optional "--state-commitment"), or +- payloads file ("--payload-filename")`, + Run: run, } func init() { Cmd.Flags().StringVar(&flagCheckpointDir, "checkpoint-dir", "", "Directory to load checkpoint files from") - _ = Cmd.MarkFlagRequired("checkpoint-dir") // state-commitment is optional. // When provided, this program only gathers stats on trie with matching state commitment. Cmd.Flags().StringVar(&flagStateCommitment, "state-commitment", "", "Trie state commitment") + Cmd.Flags().StringVar(&flagPayloads, "payload-filename", "", + "Payloads file name to load payloads from") + Cmd.Flags().StringVar(&flagOutputDir, "output-dir", "", "Directory to write checkpoint stats to") _ = Cmd.MarkFlagRequired("output-dir") @@ -64,7 +71,7 @@ func init() { } type Stats struct { - LedgerStats *complete.LedgerStats + LedgerStats *complete.LedgerStats `json:",omitempty"` PayloadStats *PayloadStats } @@ -90,10 +97,79 @@ type sizesByType map[string][]float64 func run(*cobra.Command, []string) { + if flagPayloads == "" && flagCheckpointDir == "" { + log.Fatal().Msg("Either --payload-filename or --checkpoint-dir must be provided") + } + if flagPayloads != "" && flagCheckpointDir != "" { + log.Fatal().Msg("Only one of --payload-filename or --checkpoint-dir must be provided") + } + if flagCheckpointDir == "" && flagStateCommitment != "" { + log.Fatal().Msg("--checkpont-dir must be provided when --state-commitment is provided") + } + if flagMemProfile { defer profile.Start(profile.MemProfile).Stop() } + var totalPayloadSize, totalPayloadValueSize uint64 + + valueSizesByType := make(sizesByType, 0) + + payloadCallback := func(p *ledger.Payload) { + key, err := p.Key() + if err != nil { + log.Fatal().Err(err).Msg("cannot load a key") + } + + size := p.Size() + value := p.Value() + valueSize := value.Size() + totalPayloadSize += uint64(size) + totalPayloadValueSize += uint64(valueSize) + valueSizesByType[getType(key)] = append(valueSizesByType[getType(key)], float64(valueSize)) + } + + var ledgerStats *complete.LedgerStats + + useCheckpointFile := flagPayloads == "" + if useCheckpointFile { + ledgerStats = getPayloadStatsFromCheckpoint(payloadCallback) + } else { + getPayloadStatsFromPayloadFile(payloadCallback) + } + + statsByTypes := getStats(valueSizesByType) + + stats := &Stats{ + LedgerStats: ledgerStats, + PayloadStats: &PayloadStats{ + TotalPayloadSize: totalPayloadSize, + TotalPayloadValueSize: totalPayloadValueSize, + StatsByTypes: statsByTypes, + }, + } + + writeStats(stats) +} + +func getPayloadStatsFromPayloadFile(payloadCallBack func(payload *ledger.Payload)) { + memAllocBefore := debug.GetHeapAllocsBytes() + log.Info().Msgf("loading payloads from %v", flagPayloads) + + _, payloads, err := util.ReadPayloadFile(log.Logger, flagPayloads) + if err != nil { + log.Fatal().Err(err).Msg("failed to read payloads") + } + + memAllocAfter := debug.GetHeapAllocsBytes() + log.Info().Msgf("%d payloads are loaded, mem usage: %d", len(payloads), memAllocAfter-memAllocBefore) + + for _, p := range payloads { + payloadCallBack(p) + } +} + +func getPayloadStatsFromCheckpoint(payloadCallBack func(payload *ledger.Payload)) *complete.LedgerStats { memAllocBefore := debug.GetHeapAllocsBytes() log.Info().Msgf("loading checkpoint(s) from %v", flagCheckpointDir) @@ -118,11 +194,6 @@ func run(*cobra.Command, []string) { memAllocAfter := debug.GetHeapAllocsBytes() log.Info().Msgf("the checkpoint is loaded, mem usage: %d", memAllocAfter-memAllocBefore) - var totalPayloadSize, totalPayloadValueSize uint64 - var value ledger.Value - var key ledger.Key - var size, valueSize int - var tries []*trie.MTrie if flagStateCommitment != "" { @@ -148,21 +219,15 @@ func run(*cobra.Command, []string) { log.Info().Msgf("collecting stats on %d tries", len(tries)) - valueSizesByType := make(sizesByType, 0) - ledgerStats, err := complete.CollectStats(tries, func(p *ledger.Payload) { - key, err = p.Key() - if err != nil { - log.Fatal().Err(err).Msg("cannot load a key") - } + ledgerStats, err := complete.CollectStats(tries, payloadCallBack) + if err != nil { + log.Fatal().Err(err).Msg("failed to collect stats") + } - size = p.Size() - value = p.Value() - valueSize = value.Size() - totalPayloadSize += uint64(size) - totalPayloadValueSize += uint64(valueSize) - valueSizesByType[getType(key)] = append(valueSizesByType[getType(key)], float64(valueSize)) - }) + return ledgerStats +} +func getStats(valueSizesByType sizesByType) []RegisterStatsByTypes { statsByTypes := make([]RegisterStatsByTypes, 0) for t, values := range valueSizesByType { @@ -215,19 +280,10 @@ func run(*cobra.Command, []string) { }) } - if err != nil { - log.Fatal().Err(err).Msg("failed to collect stats") - } - - stats := &Stats{ - LedgerStats: ledgerStats, - PayloadStats: &PayloadStats{ - TotalPayloadSize: totalPayloadSize, - TotalPayloadValueSize: totalPayloadValueSize, - StatsByTypes: statsByTypes, - }, - } + return statsByTypes +} +func writeStats(stats *Stats) { path := filepath.Join(flagOutputDir, ledgerStatsReportName) fi, err := os.Create(path) From 5d4ca962fa2d0613e7b416cfec698c8de3d7d392 Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:13:35 -0500 Subject: [PATCH 3/3] Use ReportFileWriterFactory in checkpoint-collect-stats This commit uses ReportFileWriterFactory to write stats output. ReportFileWriterFactory adds datetime suffix to file name to avoid overwriting existing file. --- cmd/util/cmd/checkpoint-collect-stats/cmd.go | 31 +++++--------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/cmd/util/cmd/checkpoint-collect-stats/cmd.go b/cmd/util/cmd/checkpoint-collect-stats/cmd.go index 0646c0551c4..40c6b0f3f4f 100644 --- a/cmd/util/cmd/checkpoint-collect-stats/cmd.go +++ b/cmd/util/cmd/checkpoint-collect-stats/cmd.go @@ -1,11 +1,7 @@ package checkpoint_collect_stats import ( - "bufio" - "encoding/json" "math" - "os" - "path/filepath" "strings" "github.com/montanaflynn/stats" @@ -17,6 +13,7 @@ import ( "github.com/onflow/atree" + "github.com/onflow/flow-go/cmd/util/ledger/reporters" "github.com/onflow/flow-go/cmd/util/ledger/util" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/ledger/common/pathfinder" @@ -37,7 +34,7 @@ var ( ) const ( - ledgerStatsReportName = "ledger.stats.json" + ledgerStatsReportName = "ledger-stats" ) var Cmd = &cobra.Command{ @@ -149,7 +146,7 @@ func run(*cobra.Command, []string) { }, } - writeStats(stats) + writeStats(ledgerStatsReportName, stats) } func getPayloadStatsFromPayloadFile(payloadCallBack func(payload *ledger.Payload)) { @@ -283,24 +280,12 @@ func getStats(valueSizesByType sizesByType) []RegisterStatsByTypes { return statsByTypes } -func writeStats(stats *Stats) { - path := filepath.Join(flagOutputDir, ledgerStatsReportName) +func writeStats(reportName string, stats interface{}) { + rw := reporters.NewReportFileWriterFactory(flagOutputDir, log.Logger). + ReportWriter(reportName) + defer rw.Close() - fi, err := os.Create(path) - if err != nil { - log.Fatal().Err(err).Msg("failed to create path") - } - defer fi.Close() - - writer := bufio.NewWriter(fi) - defer writer.Flush() - - encoder := json.NewEncoder(writer) - - err = encoder.Encode(stats) - if err != nil { - log.Fatal().Err(err).Msg("could not json encode ledger stats") - } + rw.Write(stats) } func getType(key ledger.Key) string {