Skip to content

Commit

Permalink
-Started Runge Kutta comparison to analytical formula
Browse files Browse the repository at this point in the history
  • Loading branch information
Louis committed Aug 10, 2023
1 parent dd6c577 commit df071bf
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 18 deletions.
14 changes: 7 additions & 7 deletions DoubleDQNAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import matplotlib.pyplot as plt
import pandas as pd
from DoubleDQNReplayBuffer import ReplayBuffer
import rocket
import rocketDQN


def DeepQNetwork(lr, num_actions, input_dims, fc1, fc2):
Expand Down Expand Up @@ -80,7 +80,7 @@ def train(self):
def train_model(self, env, num_episodes, graph):

scores, episodes, avg_scores, obj = [], [], [], []
goal = 200
goal = 100
f = 0
txt = open("saved_networks.txt", "w")

Expand All @@ -103,7 +103,7 @@ def train_model(self, env, num_episodes, graph):
avg_scores.append(avg_score)
print("Episode {0}/{1}, Score: {2} ({3}), AVG Score: {4}".format(i, num_episodes, score, self.epsilon,
avg_score))
if avg_score >= 0.0 and score >= 0.0:
if avg_score >= 50.0 and score >= 100.0:
self.q_net.save(("saved_networks/dqn_model{0}".format(f)))
self.q_net.save_weights(("saved_networks/dqn_model{0}/net_weights{0}.h5".format(f)))
txt.write("Save {0} - Episode {1}/{2}, Score: {3} ({4}), AVG Score: {5}\n".format(f, i, num_episodes,
Expand All @@ -120,7 +120,7 @@ def train_model(self, env, num_episodes, graph):
plt.plot('x', 'Average Score', data=df, marker='', color='orange', linewidth=2, linestyle='dashed',
label='AverageScore')
plt.plot('x', 'Solved Requirement', data=df, marker='', color='red', linewidth=2, linestyle='dashed',
label='Solved Requirement')
label='Target Reward')
plt.legend()
plt.savefig('Hopper1D_Train.png')

Expand All @@ -132,7 +132,7 @@ def test(self, env, num_episodes, file_type, file, graph):
self.q_net.load_weights(file)
self.epsilon = 0.0
scores, episodes, avg_scores, obj = [], [], [], []
goal = 0
goal = 100
score = 0.0
for i in range(num_episodes):
state = env.reset()
Expand Down Expand Up @@ -163,7 +163,7 @@ def test(self, env, num_episodes, file_type, file, graph):
avg_score))

if i % 10 == 0:
plt.plot(np.linspace(0, rocket.HopperEnv.duration, np.size(S1)), S1)
plt.plot(np.linspace(0, rocketDQN.HopperEnv.duration, np.size(S1)), S1)
plt.savefig('TestRuns.png')
plt.close()
if graph:
Expand All @@ -173,7 +173,7 @@ def test(self, env, num_episodes, file_type, file, graph):
plt.plot('x', 'Average Score', data=df, marker='', color='orange', linewidth=2, linestyle='dashed',
label='AverageScore')
plt.plot('x', 'Solved Requirement', data=df, marker='', color='red', linewidth=2, linestyle='dashed',
label='Solved Requirement')
label='Target Reward')
plt.legend()
plt.savefig('Hopper1D_Test.png')

Expand Down
31 changes: 29 additions & 2 deletions EnvironmentTest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import numpy as np
import matplotlib.pyplot as plt
import rocket
import rocketDQN

# Test of the ODE and step() + reset() fcn
duration = 20
N = 200

env = environment.HopperEnv()
env = rocketDQN.HopperEnv()

state = env.reset()
print(f"The initial state of the hopper is: {state}")
Expand All @@ -22,3 +22,30 @@
print(S[199])
plt.plot(t, S)
plt.show()

# Comparison plot of Runge-Kutta solver with various step sizes in comparison to the analytical solution
step_size = 0.1
duration = 20
N = duration/step_size
env = rocketDQN.HopperEnv()

state = env.reset()
x0 = state[0]
print(f"The initial state of the hopper is: {state}")

S = []
S_kutta = []
t_kutta = np.linspace(0, duration, int(N))

for i in enumerate(t_kutta):
new_state, reward, done = env.step(0, state)
state = new_state
x = -0.5 * 9.81 * t_kutta[i[0]]**2 + x0
S.append(x)
S_kutta.append(new_state[0])

plt.plot(t_kutta, S_kutta)
plt.plot(t_kutta, S)
print(f"Kutta last value: {S_kutta[-1]}")
print(f"Analytical last value: {S[-1]}")
plt.show()
8 changes: 4 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@
# batch size instead of just one experience. Epsilon greedy algorithm is also implemented to allow the network
# to explore more actions in the beginning of the training session

import rocket
import rocketDQN
from DoubleDQNAgent import Agent

env = rocket.HopperEnv()
env = rocketDQN.HopperEnv()

TRAIN = 0
TEST = 1
EPISODES = 100
EPISODES = 50
GRAPH = True

ACTION_SPACE_SIZE = env.action_space
OBSERVATION_SPACE_SIZE = env.observation_space

FILE_TYPE = 'tf'
FILE = 'saved_networks/dqn_model293'
FILE = 'saved_networks/dqn_model281'

dqn_agent = Agent(lr=0.00075, discount_factor=0.95, num_actions=ACTION_SPACE_SIZE, epsilon=1.0, batch_size=64,
input_dims=OBSERVATION_SPACE_SIZE)
Expand Down
10 changes: 5 additions & 5 deletions rocket.py → rocketDQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class HopperEnv:
v0 = 0

# Random target state (altitude)
xt = random.uniform(0, 8)
xt = random.uniform(0.5, 4.5)

# Use Hopper Equations in Hopper Environment
rocket = Hopper()
Expand Down Expand Up @@ -116,7 +116,7 @@ def ode45_step(self, f, y, t, h, action):

def reset(self):
# New random altitude goal
self.xt = random.uniform(0, 8)
self.xt = random.uniform(0.5, 4.5)

# Calculate the new initial state vector
s0 = [self.x0 - self.xt, self.v0]
Expand All @@ -141,8 +141,8 @@ def step(self, action, state):
if y[0] + self.xt < 0:
y[0] = self.x0 - self.xt
y[1] = 0
elif y[0] + self.xt > 10:
y[0] = 10 - self.xt
elif y[0] + self.xt > 5:
y[0] = 5 - self.xt
y[1] = 0

# By giving the action in the state we give the Network the current valve position
Expand All @@ -154,7 +154,7 @@ def step(self, action, state):
else:
rew1 = 0

if y[0] + self.xt == 10 or y[0] + self.xt == 0:
if y[0] + self.xt == 5 or y[0] + self.xt == 0:
penalty = -self.BOUNDARY_PENALTY
else:
penalty = 0
Expand Down

0 comments on commit df071bf

Please sign in to comment.