Skip to content

Commit

Permalink
- Implemented Hopper class to resemble the valve actuation better
Browse files Browse the repository at this point in the history
  • Loading branch information
Louis committed Aug 10, 2023
1 parent 3db4a38 commit dd6c577
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 39 deletions.
4 changes: 1 addition & 3 deletions agent.py → DoubleDQNAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
from replay_buffer import ReplayBuffer
from DoubleDQNReplayBuffer import ReplayBuffer
import rocket



def DeepQNetwork(lr, num_actions, input_dims, fc1, fc2):
q_net = tf.keras.Sequential([
tf.keras.layers.Dense(fc1, input_shape=(input_dims,), activation='relu'),
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions EnvironmentTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
duration = 20
N = 200

env = rocket.HopperEnv()
env = environment.HopperEnv()

state = env.reset()
print(f"The initial state of the hopper is: {state}")
Expand All @@ -15,7 +15,7 @@
t = np.linspace(0, duration, N)

for i in enumerate(t):
new_state, reward, done = env.step(0.298, state)
new_state, reward, done = env.step(59, state)
state = new_state
S.append(new_state[0])

Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# to explore more actions in the beginning of the training session

import rocket
from agent import Agent
from DoubleDQNAgent import Agent

env = rocket.HopperEnv()

Expand All @@ -16,7 +16,7 @@
OBSERVATION_SPACE_SIZE = env.observation_space

FILE_TYPE = 'tf'
FILE = 'saved_networks/dqn_model9'
FILE = 'saved_networks/dqn_model293'

dqn_agent = Agent(lr=0.00075, discount_factor=0.95, num_actions=ACTION_SPACE_SIZE, epsilon=1.0, batch_size=64,
input_dims=OBSERVATION_SPACE_SIZE)
Expand Down
95 changes: 63 additions & 32 deletions rocket.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,63 @@
import random
import numpy as np


class Hopper:
# Physical Properties of the hopper
m = 3 # Hopper weight in [kg]
MaxThrust = 50 # Maximum possible thrust in [N]
p_amb = 101300 # Ambient pressure in [Pa]

# Properties of working fluid Nitrogen - [N2] --> Assuming ideal gas behaviour
gamma = 1.4 # isentropic exponent [-]
p0 = 1100000 # Inlet pressure of Valve
T1 = 298 # Temperature after valve [K]
R_s = 296.8 # specific gas constant [J/kg*K]

# Properties of the Nozzle
d_th = 0.011 # Throat diameter in [m]
epsilon = 1.35 # Expansion ratio
A_th = np.pi * 0.25 * d_th**2 # Throat Area in [m^2]
A_e = epsilon * A_th # Exit Nozzle area in [m^2]

def m_dot(self, p1):
phi = np.sqrt(self.gamma * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1)))
m_dot = (p1 * self.A_th / np.sqrt(self.R_s * self.T1)) * phi
return m_dot

def p_e(self, p1):

p_e_new = 10000
p_e_old = 0

while abs(p_e_old - p_e_new) > 0.002:
p_e_old = p_e_new
phi1 = np.sqrt((self.gamma - 1) * 0.5) * (2 / (self.gamma+1))**((self.gamma+1)/(self.gamma-1))
phi2 = self.epsilon * np.sqrt(1 - (p_e_old/p1)**((self.gamma-1)/self.gamma))
p_e_new = p1 * (phi1 / phi2)**self.gamma

p_e = p_e_new
return p_e

def v_e(self, p1, p_e):
v_e = np.sqrt(2*self.gamma/(self.gamma-1) * self.R_s * self.T1 * (1 - (p_e/p1)**(self.gamma-1)/self.gamma))
return v_e

def thrust(self, p1):
p_e = self.p_e(p1)
thrust = self.m_dot(p1) * self.v_e(p1, p_e) + (p_e - self.p_amb) * self.A_e
return thrust


# rocket = Hopper()
#
# print(rocket.thrust(17 * 10000 + 100000))


class HopperEnv:
# Physical Properties of the Hopper & the Environment
m = 3
g = 9.81
MaxThrust = 50 # Maximum possible thrust in N
g = 9.81 # Gravitational acceleration of the Earth [m/s^2]
p_amb = 101300 # Ambient pressure in [Pa]

# Time and Step variable
duration = 20
Expand All @@ -16,44 +68,39 @@ class HopperEnv:

# Counters and Variables
episode_step = 0
margin = 0.05 # Allowed stationary offset of 5cm

# RL related constants for the environment
# MOVE_PENALTY = 0
BOUNDARY_PENALTY = 10
sigma_1 = 2
# HEIGHT_REWARD = 10
# HEIGHT_STEP_REWARD = 2

# Action Space and Observation Space Sizes
action_space = 100
observation_space = 3

# Initial and target state
# Initial state
x0 = 0
v0 = 0

# New random altitude goal
# Random target state (altitude)
xt = random.uniform(0, 8)

def thrust_eqn(self, action):
thrust = action/100 * self.MaxThrust
return thrust
# Use Hopper Equations in Hopper Environment
rocket = Hopper()

def f(self, t, y, action):

x = y[0]
v = y[1]

thrust = HopperEnv.thrust_eqn(self, action)
thrust = self.rocket.thrust(action * 10000 + 100000)

cd = 0.6
A = 0.1 * 0.3
rho = 1.225
F_aero = 0.5 * cd * rho * A * v * abs(v)

x_dot = v
v_dot = thrust / self.m - self.g - F_aero / self.m
v_dot = thrust / Hopper.m - self.g - F_aero / Hopper.m

return np.array([x_dot, v_dot])

Expand Down Expand Up @@ -98,7 +145,8 @@ def step(self, action, state):
y[0] = 10 - self.xt
y[1] = 0

y = np.concatenate((y, np.array([action]))) # By giving the action in the state we give the Network the current valve position
# By giving the action in the state we give the Network the current valve position
y = np.concatenate((y, np.array([action])))

# Define the reward
if abs(y[0]) < self.sigma_1:
Expand All @@ -113,25 +161,8 @@ def step(self, action, state):

reward = rew1 + penalty

# if self.xt + y[0] < 0:
# reward = -self.BOUNDARY_PENALTY
# elif self.xt + y[0] > 10:
# reward = -self.BOUNDARY_PENALTY
# elif abs(y[0]) < self.margin:
# reward = self.HEIGHT_REWARD
# elif abs(y[0]) < 0.5:
# reward = self.HEIGHT_STEP_REWARD
# else:
# reward = -self.MOVE_PENALTY

done = False
if self.episode_step >= 200:
done = True

return y, reward, done

# def render(self):
# img = self.get_image()
# img = img.resize((300, 300)) # resizing so we can see our agent in all its glory.
# cv2.imshow("image", np.array(img)) # show it!
# cv2.waitKey(1)

0 comments on commit dd6c577

Please sign in to comment.