- Implemented Hopper class to resemble the valve actuation better

brunosorban · Aug 10, 2023 · dd6c577 · dd6c577
1 parent 3db4a38
commit dd6c577
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 39 deletions.
diff --git a/agent.py → DoubleDQNAgent.py b/agent.py → DoubleDQNAgent.py
@@ -2,12 +2,10 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
-import random
-from replay_buffer import ReplayBuffer
+from DoubleDQNReplayBuffer import ReplayBuffer
 import rocket
 
 
-
 def DeepQNetwork(lr, num_actions, input_dims, fc1, fc2):
     q_net = tf.keras.Sequential([
         tf.keras.layers.Dense(fc1, input_shape=(input_dims,), activation='relu'),

diff --git a/replay_buffer.py → DoubleDQNReplayBuffer.py b/replay_buffer.py → DoubleDQNReplayBuffer.py
diff --git a/EnvironmentTest.py b/EnvironmentTest.py
@@ -6,7 +6,7 @@
 duration = 20
 N = 200
 
-env = rocket.HopperEnv()
+env = environment.HopperEnv()
 
 state = env.reset()
 print(f"The initial state of the hopper is: {state}")
@@ -15,7 +15,7 @@
 t = np.linspace(0, duration, N)
 
 for i in enumerate(t):
-    new_state, reward, done = env.step(0.298, state)
+    new_state, reward, done = env.step(59, state)
     state = new_state
     S.append(new_state[0])
 

diff --git a/main.py b/main.py
@@ -3,7 +3,7 @@
 # to explore more actions in the beginning of the training session
 
 import rocket
-from agent import Agent
+from DoubleDQNAgent import Agent
 
 env = rocket.HopperEnv()
 
@@ -16,7 +16,7 @@
 OBSERVATION_SPACE_SIZE = env.observation_space
 
 FILE_TYPE = 'tf'
-FILE = 'saved_networks/dqn_model9'
+FILE = 'saved_networks/dqn_model293'
 
 dqn_agent = Agent(lr=0.00075, discount_factor=0.95, num_actions=ACTION_SPACE_SIZE, epsilon=1.0, batch_size=64,
                   input_dims=OBSERVATION_SPACE_SIZE)

diff --git a/rocket.py b/rocket.py
@@ -1,11 +1,63 @@
 import random
 import numpy as np
 
+
+class Hopper:
+    # Physical Properties of the hopper
+    m = 3  # Hopper weight in [kg]
+    MaxThrust = 50  # Maximum possible thrust in [N]
+    p_amb = 101300  # Ambient pressure in [Pa]
+
+    # Properties of working fluid Nitrogen - [N2] --> Assuming ideal gas behaviour
+    gamma = 1.4  # isentropic exponent [-]
+    p0 = 1100000  # Inlet pressure of Valve
+    T1 = 298  # Temperature after valve [K]
+    R_s = 296.8  # specific gas constant [J/kg*K]
+
+    # Properties of the Nozzle
+    d_th = 0.011  # Throat diameter in [m]
+    epsilon = 1.35  # Expansion ratio
+    A_th = np.pi * 0.25 * d_th**2  # Throat Area in [m^2]
+    A_e = epsilon * A_th  # Exit Nozzle area in [m^2]
+
+    def m_dot(self, p1):
+        phi = np.sqrt(self.gamma * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1)))
+        m_dot = (p1 * self.A_th / np.sqrt(self.R_s * self.T1)) * phi
+        return m_dot
+
+    def p_e(self, p1):
+
+        p_e_new = 10000
+        p_e_old = 0
+
+        while abs(p_e_old - p_e_new) > 0.002:
+            p_e_old = p_e_new
+            phi1 = np.sqrt((self.gamma - 1) * 0.5) * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1))
+            phi2 = self.epsilon * np.sqrt(1 - (p_e_old/p1)**((self.gamma-1)/self.gamma))
+            p_e_new = p1 * (phi1 / phi2)**self.gamma
+
+        p_e = p_e_new
+        return p_e
+
+    def v_e(self, p1, p_e):
+        v_e = np.sqrt(2*self.gamma/(self.gamma-1) * self.R_s * self.T1 * (1 - (p_e/p1)**(self.gamma-1)/self.gamma))
+        return v_e
+
+    def thrust(self, p1):
+        p_e = self.p_e(p1)
+        thrust = self.m_dot(p1) * self.v_e(p1, p_e) + (p_e - self.p_amb) * self.A_e
+        return thrust
+
+
+# rocket = Hopper()
+#
+# print(rocket.thrust(17 * 10000 + 100000))
+
+
 class HopperEnv:
     # Physical Properties of the Hopper & the Environment
-    m = 3
-    g = 9.81
-    MaxThrust = 50  # Maximum possible thrust in N
+    g = 9.81  # Gravitational acceleration of the Earth [m/s^2]
+    p_amb = 101300  # Ambient pressure in [Pa]
 
     # Time and Step variable
     duration = 20
@@ -16,44 +68,39 @@ class HopperEnv:
 
     # Counters and Variables
     episode_step = 0
-    margin = 0.05  # Allowed stationary offset of 5cm
 
     # RL related constants for the environment
-    # MOVE_PENALTY = 0
     BOUNDARY_PENALTY = 10
     sigma_1 = 2
-    # HEIGHT_REWARD = 10
-    # HEIGHT_STEP_REWARD = 2
 
     # Action Space and Observation Space Sizes
     action_space = 100
     observation_space = 3
 
-    # Initial and target state
+    # Initial state
     x0 = 0
     v0 = 0
 
-    # New random altitude goal
+    # Random target state (altitude)
     xt = random.uniform(0, 8)
 
-    def thrust_eqn(self, action):
-        thrust = action/100 * self.MaxThrust
-        return thrust
+    # Use Hopper Equations in Hopper Environment
+    rocket = Hopper()
 
     def f(self, t, y, action):
 
         x = y[0]
         v = y[1]
 
-        thrust = HopperEnv.thrust_eqn(self, action)
+        thrust = self.rocket.thrust(action * 10000 + 100000)
 
         cd = 0.6
         A = 0.1 * 0.3
         rho = 1.225
         F_aero = 0.5 * cd * rho * A * v * abs(v)
 
         x_dot = v
-        v_dot = thrust / self.m - self.g - F_aero / self.m
+        v_dot = thrust / Hopper.m - self.g - F_aero / Hopper.m
 
         return np.array([x_dot, v_dot])
 
@@ -98,7 +145,8 @@ def step(self, action, state):
             y[0] = 10 - self.xt
             y[1] = 0
 
-        y = np.concatenate((y, np.array([action])))  # By giving the action in the state we give the Network the current valve position
+        # By giving the action in the state we give the Network the current valve position
+        y = np.concatenate((y, np.array([action])))
 
         # Define the reward
         if abs(y[0]) < self.sigma_1:
@@ -113,25 +161,8 @@ def step(self, action, state):
 
         reward = rew1 + penalty
 
-        # if self.xt + y[0] < 0:
-        #     reward = -self.BOUNDARY_PENALTY
-        # elif self.xt + y[0] > 10:
-        #     reward = -self.BOUNDARY_PENALTY
-        # elif abs(y[0]) < self.margin:
-        #     reward = self.HEIGHT_REWARD
-        # elif abs(y[0]) < 0.5:
-        #     reward = self.HEIGHT_STEP_REWARD
-        # else:
-        #     reward = -self.MOVE_PENALTY
-
         done = False
         if self.episode_step >= 200:
             done = True
 
         return y, reward, done
-
-    # def render(self):
-    #     img = self.get_image()
-    #     img = img.resize((300, 300))  # resizing so we can see our agent in all its glory.
-    #     cv2.imshow("image", np.array(img))  # show it!
-    #     cv2.waitKey(1)