diff --git a/agent.py b/DoubleDQNAgent.py similarity index 99% rename from agent.py rename to DoubleDQNAgent.py index a991da5..b419f9c 100644 --- a/agent.py +++ b/DoubleDQNAgent.py @@ -2,12 +2,10 @@ import numpy as np import matplotlib.pyplot as plt import pandas as pd -import random -from replay_buffer import ReplayBuffer +from DoubleDQNReplayBuffer import ReplayBuffer import rocket - def DeepQNetwork(lr, num_actions, input_dims, fc1, fc2): q_net = tf.keras.Sequential([ tf.keras.layers.Dense(fc1, input_shape=(input_dims,), activation='relu'), diff --git a/replay_buffer.py b/DoubleDQNReplayBuffer.py similarity index 100% rename from replay_buffer.py rename to DoubleDQNReplayBuffer.py diff --git a/EnvironmentTest.py b/EnvironmentTest.py index 4a7a1dd..a989952 100644 --- a/EnvironmentTest.py +++ b/EnvironmentTest.py @@ -6,7 +6,7 @@ duration = 20 N = 200 -env = rocket.HopperEnv() +env = environment.HopperEnv() state = env.reset() print(f"The initial state of the hopper is: {state}") @@ -15,7 +15,7 @@ t = np.linspace(0, duration, N) for i in enumerate(t): - new_state, reward, done = env.step(0.298, state) + new_state, reward, done = env.step(59, state) state = new_state S.append(new_state[0]) diff --git a/main.py b/main.py index 4570841..6fa748d 100644 --- a/main.py +++ b/main.py @@ -3,7 +3,7 @@ # to explore more actions in the beginning of the training session import rocket -from agent import Agent +from DoubleDQNAgent import Agent env = rocket.HopperEnv() @@ -16,7 +16,7 @@ OBSERVATION_SPACE_SIZE = env.observation_space FILE_TYPE = 'tf' -FILE = 'saved_networks/dqn_model9' +FILE = 'saved_networks/dqn_model293' dqn_agent = Agent(lr=0.00075, discount_factor=0.95, num_actions=ACTION_SPACE_SIZE, epsilon=1.0, batch_size=64, input_dims=OBSERVATION_SPACE_SIZE) diff --git a/rocket.py b/rocket.py index a06da2b..e38c25e 100644 --- a/rocket.py +++ b/rocket.py @@ -1,11 +1,63 @@ import random import numpy as np + +class Hopper: + # Physical Properties of the hopper + m = 3 # Hopper weight in [kg] + MaxThrust = 50 # Maximum possible thrust in [N] + p_amb = 101300 # Ambient pressure in [Pa] + + # Properties of working fluid Nitrogen - [N2] --> Assuming ideal gas behaviour + gamma = 1.4 # isentropic exponent [-] + p0 = 1100000 # Inlet pressure of Valve + T1 = 298 # Temperature after valve [K] + R_s = 296.8 # specific gas constant [J/kg*K] + + # Properties of the Nozzle + d_th = 0.011 # Throat diameter in [m] + epsilon = 1.35 # Expansion ratio + A_th = np.pi * 0.25 * d_th**2 # Throat Area in [m^2] + A_e = epsilon * A_th # Exit Nozzle area in [m^2] + + def m_dot(self, p1): + phi = np.sqrt(self.gamma * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1))) + m_dot = (p1 * self.A_th / np.sqrt(self.R_s * self.T1)) * phi + return m_dot + + def p_e(self, p1): + + p_e_new = 10000 + p_e_old = 0 + + while abs(p_e_old - p_e_new) > 0.002: + p_e_old = p_e_new + phi1 = np.sqrt((self.gamma - 1) * 0.5) * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1)) + phi2 = self.epsilon * np.sqrt(1 - (p_e_old/p1)**((self.gamma-1)/self.gamma)) + p_e_new = p1 * (phi1 / phi2)**self.gamma + + p_e = p_e_new + return p_e + + def v_e(self, p1, p_e): + v_e = np.sqrt(2*self.gamma/(self.gamma-1) * self.R_s * self.T1 * (1 - (p_e/p1)**(self.gamma-1)/self.gamma)) + return v_e + + def thrust(self, p1): + p_e = self.p_e(p1) + thrust = self.m_dot(p1) * self.v_e(p1, p_e) + (p_e - self.p_amb) * self.A_e + return thrust + + +# rocket = Hopper() +# +# print(rocket.thrust(17 * 10000 + 100000)) + + class HopperEnv: # Physical Properties of the Hopper & the Environment - m = 3 - g = 9.81 - MaxThrust = 50 # Maximum possible thrust in N + g = 9.81 # Gravitational acceleration of the Earth [m/s^2] + p_amb = 101300 # Ambient pressure in [Pa] # Time and Step variable duration = 20 @@ -16,36 +68,31 @@ class HopperEnv: # Counters and Variables episode_step = 0 - margin = 0.05 # Allowed stationary offset of 5cm # RL related constants for the environment - # MOVE_PENALTY = 0 BOUNDARY_PENALTY = 10 sigma_1 = 2 - # HEIGHT_REWARD = 10 - # HEIGHT_STEP_REWARD = 2 # Action Space and Observation Space Sizes action_space = 100 observation_space = 3 - # Initial and target state + # Initial state x0 = 0 v0 = 0 - # New random altitude goal + # Random target state (altitude) xt = random.uniform(0, 8) - def thrust_eqn(self, action): - thrust = action/100 * self.MaxThrust - return thrust + # Use Hopper Equations in Hopper Environment + rocket = Hopper() def f(self, t, y, action): x = y[0] v = y[1] - thrust = HopperEnv.thrust_eqn(self, action) + thrust = self.rocket.thrust(action * 10000 + 100000) cd = 0.6 A = 0.1 * 0.3 @@ -53,7 +100,7 @@ def f(self, t, y, action): F_aero = 0.5 * cd * rho * A * v * abs(v) x_dot = v - v_dot = thrust / self.m - self.g - F_aero / self.m + v_dot = thrust / Hopper.m - self.g - F_aero / Hopper.m return np.array([x_dot, v_dot]) @@ -98,7 +145,8 @@ def step(self, action, state): y[0] = 10 - self.xt y[1] = 0 - y = np.concatenate((y, np.array([action]))) # By giving the action in the state we give the Network the current valve position + # By giving the action in the state we give the Network the current valve position + y = np.concatenate((y, np.array([action]))) # Define the reward if abs(y[0]) < self.sigma_1: @@ -113,25 +161,8 @@ def step(self, action, state): reward = rew1 + penalty - # if self.xt + y[0] < 0: - # reward = -self.BOUNDARY_PENALTY - # elif self.xt + y[0] > 10: - # reward = -self.BOUNDARY_PENALTY - # elif abs(y[0]) < self.margin: - # reward = self.HEIGHT_REWARD - # elif abs(y[0]) < 0.5: - # reward = self.HEIGHT_STEP_REWARD - # else: - # reward = -self.MOVE_PENALTY - done = False if self.episode_step >= 200: done = True return y, reward, done - - # def render(self): - # img = self.get_image() - # img = img.resize((300, 300)) # resizing so we can see our agent in all its glory. - # cv2.imshow("image", np.array(img)) # show it! - # cv2.waitKey(1)