From dd6c5776f0cffe6d0642d90b9c1ecff95dc4856c Mon Sep 17 00:00:00 2001
From: Louis <louis.wiench@tum.de>
Date: Thu, 10 Aug 2023 13:38:18 +0200
Subject: [PATCH] - Implemented Hopper class to resemble the valve actuation
 better

---
 agent.py => DoubleDQNAgent.py                |  4 +-
 replay_buffer.py => DoubleDQNReplayBuffer.py |  0
 EnvironmentTest.py                           |  4 +-
 main.py                                      |  4 +-
 rocket.py                                    | 95 +++++++++++++-------
 5 files changed, 68 insertions(+), 39 deletions(-)
 rename agent.py => DoubleDQNAgent.py (99%)
 rename replay_buffer.py => DoubleDQNReplayBuffer.py (100%)

diff --git a/agent.py b/DoubleDQNAgent.py
similarity index 99%
rename from agent.py
rename to DoubleDQNAgent.py
index a991da5..b419f9c 100644
--- a/agent.py
+++ b/DoubleDQNAgent.py
@@ -2,12 +2,10 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
-import random
-from replay_buffer import ReplayBuffer
+from DoubleDQNReplayBuffer import ReplayBuffer
 import rocket
 
 
-
 def DeepQNetwork(lr, num_actions, input_dims, fc1, fc2):
     q_net = tf.keras.Sequential([
         tf.keras.layers.Dense(fc1, input_shape=(input_dims,), activation='relu'),
diff --git a/replay_buffer.py b/DoubleDQNReplayBuffer.py
similarity index 100%
rename from replay_buffer.py
rename to DoubleDQNReplayBuffer.py
diff --git a/EnvironmentTest.py b/EnvironmentTest.py
index 4a7a1dd..a989952 100644
--- a/EnvironmentTest.py
+++ b/EnvironmentTest.py
@@ -6,7 +6,7 @@
 duration = 20
 N = 200
 
-env = rocket.HopperEnv()
+env = environment.HopperEnv()
 
 state = env.reset()
 print(f"The initial state of the hopper is: {state}")
@@ -15,7 +15,7 @@
 t = np.linspace(0, duration, N)
 
 for i in enumerate(t):
-    new_state, reward, done = env.step(0.298, state)
+    new_state, reward, done = env.step(59, state)
     state = new_state
     S.append(new_state[0])
 
diff --git a/main.py b/main.py
index 4570841..6fa748d 100644
--- a/main.py
+++ b/main.py
@@ -3,7 +3,7 @@
 # to explore more actions in the beginning of the training session
 
 import rocket
-from agent import Agent
+from DoubleDQNAgent import Agent
 
 env = rocket.HopperEnv()
 
@@ -16,7 +16,7 @@
 OBSERVATION_SPACE_SIZE = env.observation_space
 
 FILE_TYPE = 'tf'
-FILE = 'saved_networks/dqn_model9'
+FILE = 'saved_networks/dqn_model293'
 
 dqn_agent = Agent(lr=0.00075, discount_factor=0.95, num_actions=ACTION_SPACE_SIZE, epsilon=1.0, batch_size=64,
                   input_dims=OBSERVATION_SPACE_SIZE)
diff --git a/rocket.py b/rocket.py
index a06da2b..e38c25e 100644
--- a/rocket.py
+++ b/rocket.py
@@ -1,11 +1,63 @@
 import random
 import numpy as np
 
+
+class Hopper:
+    # Physical Properties of the hopper
+    m = 3  # Hopper weight in [kg]
+    MaxThrust = 50  # Maximum possible thrust in [N]
+    p_amb = 101300  # Ambient pressure in [Pa]
+
+    # Properties of working fluid Nitrogen - [N2] --> Assuming ideal gas behaviour
+    gamma = 1.4  # isentropic exponent [-]
+    p0 = 1100000  # Inlet pressure of Valve
+    T1 = 298  # Temperature after valve [K]
+    R_s = 296.8  # specific gas constant [J/kg*K]
+
+    # Properties of the Nozzle
+    d_th = 0.011  # Throat diameter in [m]
+    epsilon = 1.35  # Expansion ratio
+    A_th = np.pi * 0.25 * d_th**2  # Throat Area in [m^2]
+    A_e = epsilon * A_th  # Exit Nozzle area in [m^2]
+
+    def m_dot(self, p1):
+        phi = np.sqrt(self.gamma * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1)))
+        m_dot = (p1 * self.A_th / np.sqrt(self.R_s * self.T1)) * phi
+        return m_dot
+
+    def p_e(self, p1):
+
+        p_e_new = 10000
+        p_e_old = 0
+
+        while abs(p_e_old - p_e_new) > 0.002:
+            p_e_old = p_e_new
+            phi1 = np.sqrt((self.gamma - 1) * 0.5) * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1))
+            phi2 = self.epsilon * np.sqrt(1 - (p_e_old/p1)**((self.gamma-1)/self.gamma))
+            p_e_new = p1 * (phi1 / phi2)**self.gamma
+
+        p_e = p_e_new
+        return p_e
+
+    def v_e(self, p1, p_e):
+        v_e = np.sqrt(2*self.gamma/(self.gamma-1) * self.R_s * self.T1 * (1 - (p_e/p1)**(self.gamma-1)/self.gamma))
+        return v_e
+
+    def thrust(self, p1):
+        p_e = self.p_e(p1)
+        thrust = self.m_dot(p1) * self.v_e(p1, p_e) + (p_e - self.p_amb) * self.A_e
+        return thrust
+
+
+# rocket = Hopper()
+#
+# print(rocket.thrust(17 * 10000 + 100000))
+
+
 class HopperEnv:
     # Physical Properties of the Hopper & the Environment
-    m = 3
-    g = 9.81
-    MaxThrust = 50  # Maximum possible thrust in N
+    g = 9.81  # Gravitational acceleration of the Earth [m/s^2]
+    p_amb = 101300  # Ambient pressure in [Pa]
 
     # Time and Step variable
     duration = 20
@@ -16,36 +68,31 @@ class HopperEnv:
 
     # Counters and Variables
     episode_step = 0
-    margin = 0.05  # Allowed stationary offset of 5cm
 
     # RL related constants for the environment
-    # MOVE_PENALTY = 0
     BOUNDARY_PENALTY = 10
     sigma_1 = 2
-    # HEIGHT_REWARD = 10
-    # HEIGHT_STEP_REWARD = 2
 
     # Action Space and Observation Space Sizes
     action_space = 100
     observation_space = 3
 
-    # Initial and target state
+    # Initial state
     x0 = 0
     v0 = 0
 
-    # New random altitude goal
+    # Random target state (altitude)
     xt = random.uniform(0, 8)
 
-    def thrust_eqn(self, action):
-        thrust = action/100 * self.MaxThrust
-        return thrust
+    # Use Hopper Equations in Hopper Environment
+    rocket = Hopper()
 
     def f(self, t, y, action):
 
         x = y[0]
         v = y[1]
 
-        thrust = HopperEnv.thrust_eqn(self, action)
+        thrust = self.rocket.thrust(action * 10000 + 100000)
 
         cd = 0.6
         A = 0.1 * 0.3
@@ -53,7 +100,7 @@ def f(self, t, y, action):
         F_aero = 0.5 * cd * rho * A * v * abs(v)
 
         x_dot = v
-        v_dot = thrust / self.m - self.g - F_aero / self.m
+        v_dot = thrust / Hopper.m - self.g - F_aero / Hopper.m
 
         return np.array([x_dot, v_dot])
 
@@ -98,7 +145,8 @@ def step(self, action, state):
             y[0] = 10 - self.xt
             y[1] = 0
 
-        y = np.concatenate((y, np.array([action])))  # By giving the action in the state we give the Network the current valve position
+        # By giving the action in the state we give the Network the current valve position
+        y = np.concatenate((y, np.array([action])))
 
         # Define the reward
         if abs(y[0]) < self.sigma_1:
@@ -113,25 +161,8 @@ def step(self, action, state):
 
         reward = rew1 + penalty
 
-        # if self.xt + y[0] < 0:
-        #     reward = -self.BOUNDARY_PENALTY
-        # elif self.xt + y[0] > 10:
-        #     reward = -self.BOUNDARY_PENALTY
-        # elif abs(y[0]) < self.margin:
-        #     reward = self.HEIGHT_REWARD
-        # elif abs(y[0]) < 0.5:
-        #     reward = self.HEIGHT_STEP_REWARD
-        # else:
-        #     reward = -self.MOVE_PENALTY
-
         done = False
         if self.episode_step >= 200:
             done = True
 
         return y, reward, done
-
-    # def render(self):
-    #     img = self.get_image()
-    #     img = img.resize((300, 300))  # resizing so we can see our agent in all its glory.
-    #     cv2.imshow("image", np.array(img))  # show it!
-    #     cv2.waitKey(1)