Merge pull request #35 from accuracy-maker/master

add sumtree,PER and PESR
kyegomez · Dec 24, 2023 · 1bb7da1 · 1bb7da1
2 parents 288b482 + 07541b2
commit 1bb7da1
Show file tree

Hide file tree

Showing 6 changed files with 476 additions and 0 deletions.
diff --git a/tests/rl/test_prioritizedreplybuffer.py b/tests/rl/test_prioritizedreplybuffer.py
@@ -0,0 +1,61 @@
+import pytest
+import random
+import torch
+from zeta.rl.PrioritizedReplayBuffer import PrioritizedReplayBuffer, SumTree # Replace 'your_module' with the actual module where classes are defined
+
+@pytest.fixture
+def replay_buffer():
+ state_size = 4
+ action_size = 2
+ buffer_size = 100
+ device = torch.device("cpu")
+ return PrioritizedReplayBuffer(state_size, action_size, buffer_size, device)
+
+def test_initialization(replay_buffer):
+ assert replay_buffer.eps == 1e-2
+ assert replay_buffer.alpha == 0.1
+ assert replay_buffer.beta == 0.1
+ assert replay_buffer.max_priority == 1.0
+ assert replay_buffer.count == 0
+ assert replay_buffer.real_size == 0
+ assert replay_buffer.size == 100
+ assert replay_buffer.device == torch.device("cpu")
+
+def test_add(replay_buffer):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+ assert replay_buffer.count == 1
+ assert replay_buffer.real_size == 1
+
+def test_sample(replay_buffer):
+ for i in range(10):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+
+ batch, weights, tree_idxs = replay_buffer.sample(5)
+ assert len(batch) == 5
+ assert len(weights) == 5
+ assert len(tree_idxs) == 5
+
+def test_update_priorities(replay_buffer):
+ for i in range(10):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+
+ batch, weights, tree_idxs = replay_buffer.sample(5)
+ new_priorities = torch.rand(5)
+ replay_buffer.update_priorities(tree_idxs, new_priorities)
+
+def test_sample_with_invalid_batch_size(replay_buffer):
+ with pytest.raises(AssertionError):
+ replay_buffer.sample(101)
+
+def test_add_with_max_size(replay_buffer):
+ for i in range(100):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+
+ assert replay_buffer.count == 0
+ assert replay_buffer.real_size == 100
+
+# Additional tests for edge cases, exceptions, and more scenarios can be added as needed.
diff --git a/tests/rl/test_prioritizedsequencereplybuffer.py b/tests/rl/test_prioritizedsequencereplybuffer.py
@@ -0,0 +1,64 @@
+import pytest
+import random
+import torch
+from zeta.rl.PrioritizedSequenceReplayBuffer import PrioritizedSequenceReplayBuffer, SumTree # Replace 'your_module' with the actual module where classes are defined
+
+@pytest.fixture
+def replay_buffer():
+ state_size = 4
+ action_size = 2
+ buffer_size = 100
+ device = torch.device("cpu")
+ return PrioritizedSequenceReplayBuffer(state_size, action_size, buffer_size, device)
+
+def test_initialization(replay_buffer):
+ assert replay_buffer.eps == 1e-5
+ assert replay_buffer.alpha == 0.1
+ assert replay_buffer.beta == 0.1
+ assert replay_buffer.max_priority == 1.0
+ assert replay_buffer.decay_window == 5
+ assert replay_buffer.decay_coff == 0.4
+ assert replay_buffer.pre_priority == 0.7
+ assert replay_buffer.count == 0
+ assert replay_buffer.real_size == 0
+ assert replay_buffer.size == 100
+ assert replay_buffer.device == torch.device("cpu")
+
+def test_add(replay_buffer):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+ assert replay_buffer.count == 1
+ assert replay_buffer.real_size == 1
+
+def test_sample(replay_buffer):
+ for i in range(10):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+
+ batch, weights, tree_idxs = replay_buffer.sample(5)
+ assert len(batch) == 5
+ assert len(weights) == 5
+ assert len(tree_idxs) == 5
+
+def test_update_priorities(replay_buffer):
+ for i in range(10):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+
+ batch, weights, tree_idxs = replay_buffer.sample(5)
+ new_priorities = torch.rand(5)
+ replay_buffer.update_priorities(tree_idxs, new_priorities)
+
+def test_sample_with_invalid_batch_size(replay_buffer):
+ with pytest.raises(AssertionError):
+ replay_buffer.sample(101)
+
+def test_add_with_max_size(replay_buffer):
+ for i in range(100):
+ transition = (torch.rand(4), torch.rand(2), 1.0, torch.rand(4), False)
+ replay_buffer.add(transition)
+
+ assert replay_buffer.count == 0
+ assert replay_buffer.real_size == 100
+
+# Additional tests for edge cases, exceptions, and more scenarios can be added as needed.
diff --git a/tests/rl/test_sumtree.py b/tests/rl/test_sumtree.py
@@ -0,0 +1,56 @@
+import pytest
+from zeta.rl.sumtree import SumTree # Replace 'your_module' with the actual module where SumTree is defined
+
+# Fixture for initializing SumTree instances with a given size
+@pytest.fixture
+def sum_tree():
+ size = 10 # You can change the size as needed
+ return SumTree(size)
+
+# Basic tests
+def test_initialization(sum_tree):
+ assert sum_tree.size == 10
+ assert sum_tree.count == 0
+ assert sum_tree.real_size == 0
+ assert sum_tree.total == 0
+
+def test_update_and_get(sum_tree):
+ sum_tree.add(5, "data1")
+ assert sum_tree.total == 5
+ data_idx, priority, data = sum_tree.get(5)
+ assert data_idx == 0
+ assert priority == 5
+ assert data == "data1"
+
+def test_add_overflow(sum_tree):
+ for i in range(15):
+ sum_tree.add(i, f"data{i}")
+ assert sum_tree.count == 5
+ assert sum_tree.real_size == 10
+
+# Parameterized testing for various scenarios
+@pytest.mark.parametrize("values, expected_total", [
+ ([1, 2, 3, 4, 5], 15),
+ ([10, 20, 30, 40, 50], 150),
+])
+def test_multiple_updates(sum_tree, values, expected_total):
+ for value in values:
+ sum_tree.add(value, None)
+ assert sum_tree.total == expected_total
+
+# Exception testing
+def test_get_with_invalid_cumsum(sum_tree):
+ with pytest.raises(AssertionError):
+ sum_tree.get(20)
+
+# More tests for specific methods
+def test_get_priority(sum_tree):
+ sum_tree.add(10, "data1")
+ priority = sum_tree.get_priority(0)
+ assert priority == 10
+
+def test_repr(sum_tree):
+ expected_repr = f"SumTree(nodes={sum_tree.nodes}, data={sum_tree.data})"
+ assert repr(sum_tree) == expected_repr
+
+# More test cases can be added as needed
diff --git a/zeta/rl/PrioritizedReplayBuffer.py b/zeta/rl/PrioritizedReplayBuffer.py
@@ -0,0 +1,85 @@
+from sumtree import SumTree
+import torch
+import random
+
+class PrioritizedReplayBuffer:
+ def __init__(self, state_size, action_size, buffer_size, device, eps=1e-2, alpha=0.1, beta=0.1):
+ self.tree = SumTree(size=buffer_size)
+
+
+ self.eps = eps 
+ self.alpha = alpha 
+ self.beta = beta 
+ self.max_priority = 1. 
+
+
+ self.state = torch.empty(buffer_size, state_size, dtype=torch.float)
+ self.action = torch.empty(buffer_size, action_size, dtype=torch.float)
+ self.reward = torch.empty(buffer_size, dtype=torch.float)
+ self.next_state = torch.empty(buffer_size, state_size, dtype=torch.float)
+ self.done = torch.empty(buffer_size, dtype=torch.uint8)
+
+ self.count = 0
+ self.real_size = 0
+ self.size = buffer_size
+
+ # device
+ self.device = device
+
+ def add(self, transition):
+ state, action, reward, next_state, done = transition
+
+
+ self.tree.add(self.max_priority, self.count)
+
+ self.state[self.count] = torch.as_tensor(state)
+ self.action[self.count] = torch.as_tensor(action)
+ self.reward[self.count] = torch.as_tensor(reward)
+ self.next_state[self.count] = torch.as_tensor(next_state)
+ self.done[self.count] = torch.as_tensor(done)
+
+
+ self.count = (self.count + 1) % self.size
+ self.real_size = min(self.size, self.real_size + 1)
+
+ def sample(self, batch_size):
+ assert self.real_size >= batch_size, "buffer contains less samples than batch size"
+
+ sample_idxs, tree_idxs = [], []
+ priorities = torch.empty(batch_size, 1, dtype=torch.float)
+
+
+ segment = self.tree.total / batch_size
+ for i in range(batch_size):
+ a, b = segment * i, segment * (i + 1)
+
+ cumsum = random.uniform(a, b)
+
+ tree_idx, priority, sample_idx = self.tree.get(cumsum)
+
+ priorities[i] = priority
+ tree_idxs.append(tree_idx)
+ sample_idxs.append(sample_idx)
+
+ probs = priorities / self.tree.total
+
+ weights = (self.real_size * probs) ** -self.beta
+
+ weights = weights / weights.max()
+ batch = (
+ self.state[sample_idxs].to(self.device),
+ self.action[sample_idxs].to(self.device),
+ self.reward[sample_idxs].to(self.device),
+ self.next_state[sample_idxs].to(self.device),
+ self.done[sample_idxs].to(self.device)
+ )
+ return batch, weights, tree_idxs
+
+ def update_priorities(self, data_idxs, priorities):
+ if isinstance(priorities, torch.Tensor):
+ priorities = priorities.detach().cpu().numpy()
+
+ for data_idx, priority in zip(data_idxs, priorities):
+ priority = (priority + self.eps) ** self.alpha
+ self.tree.update(data_idx, priority)
+ self.max_priority = max(self.max_priority, priority)
diff --git a/zeta/rl/PrioritizedSequenceReplayBuffer.py b/zeta/rl/PrioritizedSequenceReplayBuffer.py
@@ -0,0 +1,112 @@
+from sumtree import SumTree
+import torch
+import random
+
+class PrioritizedSequenceReplayBuffer:
+ def __init__(self,state_size,action_size,buffer_size,device,eps=1e-5,alpha=0.1,beta=0.1,
+ decay_window=5,
+ decay_coff=0.4,
+ pre_priority=0.7):
+ self.tree = SumTree(data_size=buffer_size)
+
+ # PESR params
+ self.eps = eps
+ self.alpha = alpha
+ self.beta = beta
+ self.max_priority = 1.
+ self.decay_window = decay_window
+ self.decay_coff = decay_coff
+ self.pre_priority = pre_priority
+
+ # buffer params
+ self.state = torch.empty(buffer_size, state_size, dtype=torch.float)
+ self.action = torch.empty(buffer_size, action_size, dtype=torch.float)
+ self.reward = torch.empty(buffer_size, dtype=torch.float)
+ self.next_state = torch.empty(buffer_size, state_size, dtype=torch.float)
+ self.done = torch.empty(buffer_size, dtype=torch.uint8)
+
+ self.count = 0
+ self.real_size = 0
+ self.size = buffer_size
+
+ # device
+ self.device = device
+
+ def add(self, transition):
+ state, action, reward, next_state, done = transition
+
+ # store transition index with maximum priority in sum tree
+ self.tree.add(self.max_priority, self.count)
+
+ # store transition in the buffer
+ self.state[self.count] = torch.as_tensor(state)
+ self.action[self.count] = torch.as_tensor(action)
+ self.reward[self.count] = torch.as_tensor(reward)
+ self.next_state[self.count] = torch.as_tensor(next_state)
+ self.done[self.count] = torch.as_tensor(done)
+
+ # update counters
+ self.count = (self.count + 1) % self.size
+ self.real_size = min(self.size, self.real_size + 1)
+
+ def sample(self,batch_size):
+ assert self.real_size >= batch_size, "buffer contains less samples than batch size"
+
+ sample_idxs, tree_idxs = [], []
+ priorities = torch.empty(batch_size, 1, dtype=torch.float)
+
+ segment = self.tree.total_priority / batch_size
+ for i in range(batch_size):
+ a, b = segment * i, segment * (i + 1)
+
+ cumsum = random.uniform(a, b)
+ # sample_idx is a sample index in buffer, needed further to sample actual transitions
+ # tree_idx is a index of a sample in the tree, needed further to update priorities
+ tree_idx, priority, sample_idx = self.tree.get(cumsum)
+
+ priorities[i] = priority
+ tree_idxs.append(tree_idx)
+ sample_idxs.append(sample_idx)
+ """
+ Note:
+ The priorities stored in sumtree are all times alpha
+ """
+ probs = priorities / self.tree.total_priority
+ weights = (self.real_size * probs) ** -self.beta
+ weights = weights / weights.max()
+ batch = (
+ self.state[sample_idxs].to(self.device),
+ self.action[sample_idxs].to(self.device),
+ self.reward[sample_idxs].to(self.device),
+ self.next_state[sample_idxs].to(self.device),
+ self.done[sample_idxs].to(self.device)
+ )
+ return batch, weights, tree_idxs
+
+ def update_priorities(self,data_idxs,abs_td_errors):
+ """
+ when we get the TD-error, we should update the transition priority p_j
+ And update decay_window's transition priorities
+ """
+ if isinstance(abs_td_errors,torch.Tensor):
+ abs_td_errors = abs_td_errors.detach().cpu().numpy()
+
+ for data_idx, td_error in zip(data_idxs,abs_td_errors):
+ # first update the batch: p_j
+ # p_j <- max{|delta_j| + eps, pre_priority * p_j}
+ old_priority = self.pre_priority * self.tree.nodes[data_idx + self.tree.size - 1]
+ priority = (td_error + self.eps) ** self.alpha
+ priority = max(priority,old_priority)
+ self.tree.update(data_idx,priority)
+ self.max_priority = max(self.max_priority,priority)
+
+ # And then apply decay
+ if self.count >= self.decay_window:
+ # count points to the next position
+ # count means the idx in the buffer and number of transition
+ for i in reversed(range(self.decay_window)):
+ idx = (self.count - i - 1) % self.size
+ decayed_priority = priority * (self.decay_coff ** (i + 1))
+ tree_idx = idx + self.tree.size - 1
+ existing_priority = self.tree.nodes[tree_idx]
+ self.tree.update(idx,max(decayed_priority,existing_priority))