From c226563eec76caa7e973a0742aeaebb832da2daf Mon Sep 17 00:00:00 2001 From: Brendan Playford <34052452+teslashibe@users.noreply.github.com> Date: Wed, 17 Jul 2024 14:07:47 +0100 Subject: [PATCH] fix: calculation of node uptimes by looking at `recentUptime` (#414) * Update workers.go to handle nil pointers * handle errors from db and update breaking changes * Update monitorworkers and refactor code * Fix sendWork nill pointer * update AccumulatedUptime calculation Fix accumulated uptime calculation in NodeData This commit addresses issues in the accumulated uptime calculation for nodes in the network. Previously, the calculation could lead to inflated uptime values due to repeated additions of time since the last known activity markers, regardless of the node's actual activity status. The updated logic now accurately calculates the uptime based on the node's activity periods, ensuring that only the duration of actual activity is added to the accumulated uptime. For nodes that have left the network, the uptime is calculated as the difference between the last left and last joined timestamps. For active nodes, the uptime since the first join is considered, preventing overestimation. This change ensures a more accurate representation of node uptime, enhancing the reliability of network monitoring metrics. - Add precise calculation for nodes that have left the network - Implement dynamic uptime update for active nodes - Prevent repeated addition of the same uptime duration - Ensure accurate tracking of node activity periods * Update node uptime calculation and event tracking The calculation methods for current uptime and accumulated uptime have been commented out and their calls removed from the join and leave event methods. Instead, they are replaced with an update method called when a node leaves. Additionally, before processing an expired connect event, the node is now forced to leave to ensure correct timestamp updating. * Update nodeData with new data The previous version of the code attempted to persist a wrong instance into the `nodeData` map. The update corrects this issue by ensuring that `nodeData` is updated with the new instance `nd`, which contains all recent modifications. * Remove IsActive attribute from NodeEventTracker The IsActive attribute in the NodeEventTracker has been removed because it should not be set by other nodes. It should only be manipulated by the join/leave events tracked by this node. --------- Co-authored-by: Ettore Di Giacinto Co-authored-by: Bob Stevens <35038919+restevens402@users.noreply.github.com> Co-authored-by: John Dutchak --- pkg/pubsub/node_data.go | 69 +++++++++++++++++++------------- pkg/pubsub/node_event_tracker.go | 5 ++- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/pkg/pubsub/node_data.go b/pkg/pubsub/node_data.go index 917bf618..b7a57ee8 100644 --- a/pkg/pubsub/node_data.go +++ b/pkg/pubsub/node_data.go @@ -99,25 +99,25 @@ func NewNodeData(addr multiaddr.Multiaddr, peerId peer.ID, publicKey string, act } } -// CalculateCurrentUptime calculates the current uptime based on Unix timestamps. -func (n *NodeData) CalculateCurrentUptime() { - if n.Activity == ActivityJoined { - n.CurrentUptime = time.Duration(n.LastUpdatedUnix-n.LastJoinedUnix) * time.Second - } else { - n.CurrentUptime = 0 - } - n.CurrentUptimeStr = n.CurrentUptime.String() -} +//// CalculateCurrentUptime calculates the current uptime based on Unix timestamps. +//func (n *NodeData) CalculateCurrentUptime() { +// if n.Activity == ActivityJoined { +// n.CurrentUptime = time.Duration(n.LastUpdatedUnix-n.LastJoinedUnix) * time.Second +// } else { +// n.CurrentUptime = 0 +// } +// n.CurrentUptimeStr = n.CurrentUptime.String() +//} -// CalculateAccumulatedUptime calculates the accumulated uptime based on Unix timestamps. -func (n *NodeData) CalculateAccumulatedUptime() { - if n.FirstJoinedUnix > 0 && n.LastLeftUnix > 0 { - n.AccumulatedUptime = time.Duration(n.LastLeftUnix-n.FirstJoinedUnix) * time.Second - } else { - n.AccumulatedUptime = 0 - } - n.AccumulatedUptimeStr = n.AccumulatedUptime.String() -} +//// CalculateAccumulatedUptime calculates the accumulated uptime based on Unix timestamps. +//func (n *NodeData) CalculateAccumulatedUptime() { +// if n.FirstJoinedUnix > 0 && n.LastLeftUnix > 0 { +// n.AccumulatedUptime = time.Duration(n.LastLeftUnix-n.FirstJoinedUnix) * time.Second +// } else { +// n.AccumulatedUptime = 0 +// } +// n.AccumulatedUptimeStr = n.AccumulatedUptime.String() +//} // Address returns a string representation of the NodeData's multiaddress // and peer ID in the format "/ip4/127.0.0.1/tcp/4001/p2p/QmcgpsyWgH8Y8ajJz1Cu72KnS5uo2Aa2LpzU7kinSupNKC". @@ -159,9 +159,6 @@ func (n *NodeData) Joined() { n.Activity = ActivityJoined n.IsActive = true - n.CalculateCurrentUptime() - n.CalculateAccumulatedUptime() - n.Version = config.Version[1:] logMessage := fmt.Sprintf("[+] %s node joined: %s", map[bool]string{true: "Staked", false: "Unstaked"}[n.IsStaked], n.Address()) @@ -181,13 +178,14 @@ func (n *NodeData) Left() { } n.LastLeftUnix = time.Now().Unix() n.LastUpdatedUnix = n.LastLeftUnix - n.AccumulatedUptime += n.GetCurrentUptime() n.CurrentUptime = 0 n.Activity = ActivityLeft n.IsActive = false + // call this after setting activity flags + n.UpdateAccumulatedUptime() - n.CalculateCurrentUptime() - n.CalculateAccumulatedUptime() + //n.CalculateCurrentUptime() + //n.CalculateAccumulatedUptime() logMessage := fmt.Sprintf("Node left: %s", n.Address()) if n.IsStaked { @@ -220,10 +218,27 @@ func (n *NodeData) GetAccumulatedUptime() time.Duration { // Otherwise, it uses the time since the last joined event. func (n *NodeData) UpdateAccumulatedUptime() { if n.Activity == ActivityLeft { - n.AccumulatedUptime += time.Since(time.Unix(n.LastLeftUnix, 0)) - return + // Calculate the uptime for the most recent active period + recentUptime := n.LastLeftUnix - n.LastJoinedUnix + // Add this to the accumulated uptime + n.AccumulatedUptime += time.Duration(recentUptime) * time.Second + } else if n.Activity == ActivityJoined { + // If the node is currently active, calculate the uptime since it first joined + // This should only be done if the node is active and hasn't been updated yet + currentUptime := time.Now().Unix() - n.FirstJoinedUnix + // Update the accumulated uptime only if it's less than the current uptime + if currentUptime > int64(n.AccumulatedUptime.Seconds()) { + n.AccumulatedUptime = time.Duration(currentUptime) * time.Second + } } - n.AccumulatedUptime += time.Since(time.Unix(n.LastJoinedUnix, 0)) + // Ensure the accumulated uptime does not exceed the maximum possible uptime + if n.FirstJoinedUnix > 0 && n.LastLeftUnix > 0 { + maxAccumulatedUptime := time.Duration(n.LastLeftUnix-n.FirstJoinedUnix) * time.Second + if n.AccumulatedUptime > maxAccumulatedUptime { + n.AccumulatedUptime = maxAccumulatedUptime + } + } + n.AccumulatedUptimeStr = n.AccumulatedUptime.String() } // GetSelfNodeDataJson converts the local node's data into a JSON byte array. diff --git a/pkg/pubsub/node_event_tracker.go b/pkg/pubsub/node_event_tracker.go index 5e809211..f67b9032 100644 --- a/pkg/pubsub/node_event_tracker.go +++ b/pkg/pubsub/node_event_tracker.go @@ -363,7 +363,6 @@ func (net *NodeEventTracker) AddOrUpdateNodeData(nodeData *NodeData, forceGossip nd.Records = nodeData.Records nd.Multiaddrs = nodeData.Multiaddrs nd.EthAddress = nodeData.EthAddress - nd.IsActive = nodeData.IsActive if nd.EthAddress == "" && nodeData.EthAddress != "" { dataChanged = true @@ -386,7 +385,7 @@ func (net *NodeEventTracker) AddOrUpdateNodeData(nodeData *NodeData, forceGossip } nd.LastUpdatedUnix = nodeData.LastUpdatedUnix - net.nodeData.Set(nodeData.PeerId.String(), nodeData) + net.nodeData.Set(nodeData.PeerId.String(), nd) } return nil } @@ -401,6 +400,8 @@ func (net *NodeEventTracker) ClearExpiredBufferEntries() { now := time.Now() for peerID, entry := range net.ConnectBuffer { if now.Sub(entry.ConnectTime) > time.Minute*1 { + // first force a leave event so that timestamps are updated properly + entry.NodeData.Left() // Buffer period expired without a disconnect, process connect entry.NodeData.Joined() net.NodeDataChan <- entry.NodeData