-Started Runge Kutta comparison to analytical formula

brunosorban · Aug 10, 2023 · df071bf · df071bf
1 parent dd6c577
commit df071bf
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 18 deletions.
diff --git a/DoubleDQNAgent.py b/DoubleDQNAgent.py
@@ -3,7 +3,7 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from DoubleDQNReplayBuffer import ReplayBuffer
-import rocket
+import rocketDQN
 
 
 def DeepQNetwork(lr, num_actions, input_dims, fc1, fc2):
@@ -80,7 +80,7 @@ def train(self):
     def train_model(self, env, num_episodes, graph):
 
         scores, episodes, avg_scores, obj = [], [], [], []
-        goal = 200
+        goal = 100
         f = 0
         txt = open("saved_networks.txt", "w")
 
@@ -103,7 +103,7 @@ def train_model(self, env, num_episodes, graph):
             avg_scores.append(avg_score)
             print("Episode {0}/{1}, Score: {2} ({3}), AVG Score: {4}".format(i, num_episodes, score, self.epsilon,
                                                                              avg_score))
-            if avg_score >= 0.0 and score >= 0.0:
+            if avg_score >= 50.0 and score >= 100.0:
                 self.q_net.save(("saved_networks/dqn_model{0}".format(f)))
                 self.q_net.save_weights(("saved_networks/dqn_model{0}/net_weights{0}.h5".format(f)))
                 txt.write("Save {0} - Episode {1}/{2}, Score: {3} ({4}), AVG Score: {5}\n".format(f, i, num_episodes,
@@ -120,7 +120,7 @@ def train_model(self, env, num_episodes, graph):
             plt.plot('x', 'Average Score', data=df, marker='', color='orange', linewidth=2, linestyle='dashed',
                      label='AverageScore')
             plt.plot('x', 'Solved Requirement', data=df, marker='', color='red', linewidth=2, linestyle='dashed',
-                     label='Solved Requirement')
+                     label='Target Reward')
             plt.legend()
             plt.savefig('Hopper1D_Train.png')
 
@@ -132,7 +132,7 @@ def test(self, env, num_episodes, file_type, file, graph):
             self.q_net.load_weights(file)
         self.epsilon = 0.0
         scores, episodes, avg_scores, obj = [], [], [], []
-        goal = 0
+        goal = 100
         score = 0.0
         for i in range(num_episodes):
             state = env.reset()
@@ -163,7 +163,7 @@ def test(self, env, num_episodes, file_type, file, graph):
                                                                              avg_score))
 
             if i % 10 == 0:
-                plt.plot(np.linspace(0, rocket.HopperEnv.duration, np.size(S1)), S1)
+                plt.plot(np.linspace(0, rocketDQN.HopperEnv.duration, np.size(S1)), S1)
         plt.savefig('TestRuns.png')
         plt.close()
         if graph:
@@ -173,7 +173,7 @@ def test(self, env, num_episodes, file_type, file, graph):
             plt.plot('x', 'Average Score', data=df, marker='', color='orange', linewidth=2, linestyle='dashed',
                      label='AverageScore')
             plt.plot('x', 'Solved Requirement', data=df, marker='', color='red', linewidth=2, linestyle='dashed',
-                     label='Solved Requirement')
+                     label='Target Reward')
             plt.legend()
             plt.savefig('Hopper1D_Test.png')
 

diff --git a/EnvironmentTest.py b/EnvironmentTest.py
@@ -1,12 +1,12 @@
 import numpy as np
 import matplotlib.pyplot as plt
-import rocket
+import rocketDQN
 
 # Test of the ODE and step() + reset() fcn
 duration = 20
 N = 200
 
-env = environment.HopperEnv()
+env = rocketDQN.HopperEnv()
 
 state = env.reset()
 print(f"The initial state of the hopper is: {state}")
@@ -22,3 +22,30 @@
 print(S[199])
 plt.plot(t, S)
 plt.show()
+
+# Comparison plot of Runge-Kutta solver with various step sizes in comparison to the analytical solution
+step_size = 0.1
+duration = 20
+N = duration/step_size
+env = rocketDQN.HopperEnv()
+
+state = env.reset()
+x0 = state[0]
+print(f"The initial state of the hopper is: {state}")
+
+S = []
+S_kutta = []
+t_kutta = np.linspace(0, duration, int(N))
+
+for i in enumerate(t_kutta):
+    new_state, reward, done = env.step(0, state)
+    state = new_state
+    x = -0.5 * 9.81 * t_kutta[i[0]]**2 + x0
+    S.append(x)
+    S_kutta.append(new_state[0])
+
+plt.plot(t_kutta, S_kutta)
+plt.plot(t_kutta, S)
+print(f"Kutta last value: {S_kutta[-1]}")
+print(f"Analytical last value: {S[-1]}")
+plt.show()
diff --git a/main.py b/main.py
@@ -2,21 +2,21 @@
 # batch size instead of just one experience. Epsilon greedy algorithm is also implemented to allow the network
 # to explore more actions in the beginning of the training session
 
-import rocket
+import rocketDQN
 from DoubleDQNAgent import Agent
 
-env = rocket.HopperEnv()
+env = rocketDQN.HopperEnv()
 
 TRAIN = 0
 TEST = 1
-EPISODES = 100
+EPISODES = 50
 GRAPH = True
 
 ACTION_SPACE_SIZE = env.action_space
 OBSERVATION_SPACE_SIZE = env.observation_space
 
 FILE_TYPE = 'tf'
-FILE = 'saved_networks/dqn_model293'
+FILE = 'saved_networks/dqn_model281'
 
 dqn_agent = Agent(lr=0.00075, discount_factor=0.95, num_actions=ACTION_SPACE_SIZE, epsilon=1.0, batch_size=64,
                   input_dims=OBSERVATION_SPACE_SIZE)

diff --git a/rocket.py → rocketDQN.py b/rocket.py → rocketDQN.py
@@ -82,7 +82,7 @@ class HopperEnv:
     v0 = 0
 
     # Random target state (altitude)
-    xt = random.uniform(0, 8)
+    xt = random.uniform(0.5, 4.5)
 
     # Use Hopper Equations in Hopper Environment
     rocket = Hopper()
@@ -116,7 +116,7 @@ def ode45_step(self, f, y, t, h, action):
 
     def reset(self):
         # New random altitude goal
-        self.xt = random.uniform(0, 8)
+        self.xt = random.uniform(0.5, 4.5)
 
         # Calculate the new initial state vector
         s0 = [self.x0 - self.xt, self.v0]
@@ -141,8 +141,8 @@ def step(self, action, state):
         if y[0] + self.xt < 0:
             y[0] = self.x0 - self.xt
             y[1] = 0
-        elif y[0] + self.xt > 10:
-            y[0] = 10 - self.xt
+        elif y[0] + self.xt > 5:
+            y[0] = 5 - self.xt
             y[1] = 0
 
         # By giving the action in the state we give the Network the current valve position
@@ -154,7 +154,7 @@ def step(self, action, state):
         else:
             rew1 = 0
 
-        if y[0] + self.xt == 10 or y[0] + self.xt == 0:
+        if y[0] + self.xt == 5 or y[0] + self.xt == 0:
             penalty = -self.BOUNDARY_PENALTY
         else:
             penalty = 0