Skip to content

Commit

Permalink
fix: calculation of node uptimes by looking at recentUptime (#414)
Browse files Browse the repository at this point in the history
* Update workers.go to handle nil pointers

* handle errors from db and update breaking changes

* Update monitorworkers and refactor code

* Fix sendWork nill pointer

* update AccumulatedUptime calculation

Fix accumulated uptime calculation in NodeData

This commit addresses issues in the accumulated uptime calculation for nodes in the network. Previously, the calculation could lead to inflated uptime values due to repeated additions of time since the last known activity markers, regardless of the node's actual activity status. The updated logic now accurately calculates the uptime based on the node's activity periods, ensuring that only the duration of actual activity is added to the accumulated uptime. For nodes that have left the network, the uptime is calculated as the difference between the last left and last joined timestamps. For active nodes, the uptime since the first join is considered, preventing overestimation. This change ensures a more accurate representation of node uptime, enhancing the reliability of network monitoring metrics.

- Add precise calculation for nodes that have left the network
- Implement dynamic uptime update for active nodes
- Prevent repeated addition of the same uptime duration
- Ensure accurate tracking of node activity periods

* Update node uptime calculation and event tracking

The calculation methods for current uptime and accumulated uptime have been commented out and their calls removed from the join and leave event methods. Instead, they are replaced with an update method called when a node leaves. Additionally, before processing an expired connect event, the node is now forced to leave to ensure correct timestamp updating.

* Update nodeData with new data

The previous version of the code attempted to persist a wrong instance into the `nodeData` map. The update corrects this issue by ensuring that `nodeData` is updated with the new instance `nd`, which contains all recent modifications.

* Remove IsActive attribute from NodeEventTracker

The IsActive attribute in the NodeEventTracker has been removed because it should not be set by other nodes. It should only be manipulated by the join/leave events tracked by this node.

---------

Co-authored-by: Ettore Di Giacinto <[email protected]>
Co-authored-by: Bob Stevens <[email protected]>
Co-authored-by: John Dutchak <[email protected]>
  • Loading branch information
4 people authored Jul 17, 2024
1 parent 4744de3 commit c226563
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 29 deletions.
69 changes: 42 additions & 27 deletions pkg/pubsub/node_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,25 @@ func NewNodeData(addr multiaddr.Multiaddr, peerId peer.ID, publicKey string, act
}
}

// CalculateCurrentUptime calculates the current uptime based on Unix timestamps.
func (n *NodeData) CalculateCurrentUptime() {
if n.Activity == ActivityJoined {
n.CurrentUptime = time.Duration(n.LastUpdatedUnix-n.LastJoinedUnix) * time.Second
} else {
n.CurrentUptime = 0
}
n.CurrentUptimeStr = n.CurrentUptime.String()
}
//// CalculateCurrentUptime calculates the current uptime based on Unix timestamps.
//func (n *NodeData) CalculateCurrentUptime() {
// if n.Activity == ActivityJoined {
// n.CurrentUptime = time.Duration(n.LastUpdatedUnix-n.LastJoinedUnix) * time.Second
// } else {
// n.CurrentUptime = 0
// }
// n.CurrentUptimeStr = n.CurrentUptime.String()
//}

// CalculateAccumulatedUptime calculates the accumulated uptime based on Unix timestamps.
func (n *NodeData) CalculateAccumulatedUptime() {
if n.FirstJoinedUnix > 0 && n.LastLeftUnix > 0 {
n.AccumulatedUptime = time.Duration(n.LastLeftUnix-n.FirstJoinedUnix) * time.Second
} else {
n.AccumulatedUptime = 0
}
n.AccumulatedUptimeStr = n.AccumulatedUptime.String()
}
//// CalculateAccumulatedUptime calculates the accumulated uptime based on Unix timestamps.
//func (n *NodeData) CalculateAccumulatedUptime() {
// if n.FirstJoinedUnix > 0 && n.LastLeftUnix > 0 {
// n.AccumulatedUptime = time.Duration(n.LastLeftUnix-n.FirstJoinedUnix) * time.Second
// } else {
// n.AccumulatedUptime = 0
// }
// n.AccumulatedUptimeStr = n.AccumulatedUptime.String()
//}

// Address returns a string representation of the NodeData's multiaddress
// and peer ID in the format "/ip4/127.0.0.1/tcp/4001/p2p/QmcgpsyWgH8Y8ajJz1Cu72KnS5uo2Aa2LpzU7kinSupNKC".
Expand Down Expand Up @@ -159,9 +159,6 @@ func (n *NodeData) Joined() {
n.Activity = ActivityJoined
n.IsActive = true

n.CalculateCurrentUptime()
n.CalculateAccumulatedUptime()

n.Version = config.Version[1:]

logMessage := fmt.Sprintf("[+] %s node joined: %s", map[bool]string{true: "Staked", false: "Unstaked"}[n.IsStaked], n.Address())
Expand All @@ -181,13 +178,14 @@ func (n *NodeData) Left() {
}
n.LastLeftUnix = time.Now().Unix()
n.LastUpdatedUnix = n.LastLeftUnix
n.AccumulatedUptime += n.GetCurrentUptime()
n.CurrentUptime = 0
n.Activity = ActivityLeft
n.IsActive = false
// call this after setting activity flags
n.UpdateAccumulatedUptime()

n.CalculateCurrentUptime()
n.CalculateAccumulatedUptime()
//n.CalculateCurrentUptime()
//n.CalculateAccumulatedUptime()

logMessage := fmt.Sprintf("Node left: %s", n.Address())
if n.IsStaked {
Expand Down Expand Up @@ -220,10 +218,27 @@ func (n *NodeData) GetAccumulatedUptime() time.Duration {
// Otherwise, it uses the time since the last joined event.
func (n *NodeData) UpdateAccumulatedUptime() {
if n.Activity == ActivityLeft {
n.AccumulatedUptime += time.Since(time.Unix(n.LastLeftUnix, 0))
return
// Calculate the uptime for the most recent active period
recentUptime := n.LastLeftUnix - n.LastJoinedUnix
// Add this to the accumulated uptime
n.AccumulatedUptime += time.Duration(recentUptime) * time.Second
} else if n.Activity == ActivityJoined {
// If the node is currently active, calculate the uptime since it first joined
// This should only be done if the node is active and hasn't been updated yet
currentUptime := time.Now().Unix() - n.FirstJoinedUnix
// Update the accumulated uptime only if it's less than the current uptime
if currentUptime > int64(n.AccumulatedUptime.Seconds()) {
n.AccumulatedUptime = time.Duration(currentUptime) * time.Second
}
}
n.AccumulatedUptime += time.Since(time.Unix(n.LastJoinedUnix, 0))
// Ensure the accumulated uptime does not exceed the maximum possible uptime
if n.FirstJoinedUnix > 0 && n.LastLeftUnix > 0 {
maxAccumulatedUptime := time.Duration(n.LastLeftUnix-n.FirstJoinedUnix) * time.Second
if n.AccumulatedUptime > maxAccumulatedUptime {
n.AccumulatedUptime = maxAccumulatedUptime
}
}
n.AccumulatedUptimeStr = n.AccumulatedUptime.String()
}

// GetSelfNodeDataJson converts the local node's data into a JSON byte array.
Expand Down
5 changes: 3 additions & 2 deletions pkg/pubsub/node_event_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ func (net *NodeEventTracker) AddOrUpdateNodeData(nodeData *NodeData, forceGossip
nd.Records = nodeData.Records
nd.Multiaddrs = nodeData.Multiaddrs
nd.EthAddress = nodeData.EthAddress
nd.IsActive = nodeData.IsActive

if nd.EthAddress == "" && nodeData.EthAddress != "" {
dataChanged = true
Expand All @@ -386,7 +385,7 @@ func (net *NodeEventTracker) AddOrUpdateNodeData(nodeData *NodeData, forceGossip
}

nd.LastUpdatedUnix = nodeData.LastUpdatedUnix
net.nodeData.Set(nodeData.PeerId.String(), nodeData)
net.nodeData.Set(nodeData.PeerId.String(), nd)
}
return nil
}
Expand All @@ -401,6 +400,8 @@ func (net *NodeEventTracker) ClearExpiredBufferEntries() {
now := time.Now()
for peerID, entry := range net.ConnectBuffer {
if now.Sub(entry.ConnectTime) > time.Minute*1 {
// first force a leave event so that timestamps are updated properly
entry.NodeData.Left()
// Buffer period expired without a disconnect, process connect
entry.NodeData.Joined()
net.NodeDataChan <- entry.NodeData
Expand Down

0 comments on commit c226563

Please sign in to comment.