diff --git a/cli/operator/node.go b/cli/operator/node.go index ec23d34bbd..fa7f8b2326 100644 --- a/cli/operator/node.go +++ b/cli/operator/node.go @@ -268,7 +268,6 @@ var StartNodeCmd = &cobra.Command{ } } - cfg.P2pNetworkConfig.Metrics = metricsReporter cfg.P2pNetworkConfig.MessageValidator = messageValidator cfg.SSVOptions.ValidatorOptions.MessageValidator = messageValidator diff --git a/network/p2p/config.go b/network/p2p/config.go index c659b610ea..dec1102418 100644 --- a/network/p2p/config.go +++ b/network/p2p/config.go @@ -17,7 +17,6 @@ import ( "go.uber.org/zap" "github.com/ssvlabs/ssv/message/validation" - "github.com/ssvlabs/ssv/monitoring/metricsreporter" "github.com/ssvlabs/ssv/network" "github.com/ssvlabs/ssv/network/commons" "github.com/ssvlabs/ssv/networkconfig" @@ -75,8 +74,6 @@ type Config struct { Network networkconfig.NetworkConfig // MessageValidator validates incoming messages. MessageValidator validation.MessageValidator - // Metrics report metrics. - Metrics metricsreporter.MetricsReporter PubsubMsgCacheTTL time.Duration `yaml:"PubsubMsgCacheTTL" env:"PUBSUB_MSG_CACHE_TTL" env-description:"How long a message ID will be remembered as seen"` PubsubOutQueueSize int `yaml:"PubsubOutQueueSize" env:"PUBSUB_OUT_Q_SIZE" env-description:"The size that we assign to the outbound pubsub message queue"` diff --git a/network/p2p/observability.go b/network/p2p/observability.go index 7dcb28501f..b027aa58a5 100644 --- a/network/p2p/observability.go +++ b/network/p2p/observability.go @@ -7,8 +7,9 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" + "go.uber.org/zap" - "github.com/libp2p/go-libp2p/core/peer" + "github.com/libp2p/go-libp2p/core/host" "github.com/ssvlabs/ssv/network/peers" "github.com/ssvlabs/ssv/network/topics" "github.com/ssvlabs/ssv/observability" @@ -45,8 +46,10 @@ func metricName(name string) string { return fmt.Sprintf("%s.%s", observabilityNamespace, name) } -func recordPeerCount(ctx context.Context, peers []peer.ID) func() { +func recordPeerCount(ctx context.Context, logger *zap.Logger, host host.Host) func() { return func() { + peers := host.Network().Peers() + logger.Info("reporting total number of peers", zap.Int("peer_count", len(peers))) peersConnectedGauge.Record(ctx, int64(len(peers))) } } @@ -63,9 +66,11 @@ func recordPeerCountPerTopic(ctx context.Context, ctrl topics.Controller) func() } } -func recordPeerIdentities(ctx context.Context, peers []peer.ID, index peers.Index) func() { +func recordPeerIdentities(ctx context.Context, logger *zap.Logger, host host.Host, index peers.Index) func() { return func() { - for _, pid := range peers { + peersByVersion := make(map[string]int64) + logger.Info("reporting peer identities", zap.Int("peer_count", len(host.Network().Peers()))) + for _, pid := range host.Network().Peers() { nodeVersion := "unknown" ni := index.NodeInfo(pid) if ni != nil { @@ -73,7 +78,10 @@ func recordPeerIdentities(ctx context.Context, peers []peer.ID, index peers.Inde nodeVersion = ni.Metadata.NodeVersion } } - peerIdentityGauge.Record(ctx, 1, metric.WithAttributes(attribute.String("ssv.node.version", nodeVersion))) + peersByVersion[nodeVersion]++ + } + for version, peerCount := range peersByVersion { + peerIdentityGauge.Record(ctx, peerCount, metric.WithAttributes(attribute.String("ssv.node.version", version))) } } } diff --git a/network/p2p/p2p.go b/network/p2p/p2p.go index 26c9274f7f..37d4e09a68 100644 --- a/network/p2p/p2p.go +++ b/network/p2p/p2p.go @@ -51,7 +51,7 @@ const ( connManagerBalancingInterval = 3 * time.Minute connManagerBalancingTimeout = time.Minute peersReportingInterval = 60 * time.Second - peerIdentitiesReportingInterval = 5 * time.Minute + peerIdentitiesReportingInterval = time.Minute topicsReportingInterval = 180 * time.Second maximumIrrelevantPeersToDisconnect = 3 ) @@ -258,14 +258,11 @@ func (n *p2pNetwork) Start(logger *zap.Logger) error { async.Interval(n.ctx, connManagerBalancingInterval, n.peersBalancing(logger)) - // don't report metrics in tests - if n.cfg.Metrics != nil { - async.Interval(n.ctx, peersReportingInterval, recordPeerCount(n.ctx, n.host.Network().Peers())) + async.Interval(n.ctx, peersReportingInterval, recordPeerCount(n.ctx, logger, n.host)) - async.Interval(n.ctx, peerIdentitiesReportingInterval, recordPeerIdentities(n.ctx, n.host.Network().Peers(), n.idx)) + async.Interval(n.ctx, peerIdentitiesReportingInterval, recordPeerIdentities(n.ctx, logger, n.host, n.idx)) - async.Interval(n.ctx, topicsReportingInterval, recordPeerCountPerTopic(n.ctx, n.topicsCtrl)) - } + async.Interval(n.ctx, topicsReportingInterval, recordPeerCountPerTopic(n.ctx, n.topicsCtrl)) if err := n.subscribeToSubnets(logger); err != nil { return err diff --git a/network/p2p/test_utils.go b/network/p2p/test_utils.go index 07e6fc58bb..3153b96996 100644 --- a/network/p2p/test_utils.go +++ b/network/p2p/test_utils.go @@ -179,7 +179,6 @@ func (ln *LocalNet) NewTestP2pNetwork(ctx context.Context, nodeIndex uint64, key cfg.Ctx = ctx cfg.Subnets = "00000000000000000100000400000400" // calculated for topics 64, 90, 114; PAY ATTENTION for future test scenarios which use more than one eth-validator we need to make this field dynamically changing cfg.NodeStorage = nodeStorage - cfg.Metrics = nil cfg.MessageValidator = validation.New( networkconfig.TestNetwork, nodeStorage.ValidatorStore(),