-
Notifications
You must be signed in to change notification settings - Fork 0
/
Train.py
100 lines (69 loc) · 3.57 KB
/
Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
'''# Train.py
This is the main training script. It will train a new batch of agents.
Results will be printed to the console and logged.
- Population of 256 agents will be run through a genetic algorithm to create the next generation
- Top Agent is saved every generation, Best Agent is saved each time a new highscore is achieved
- Training will continue until the user quits
'''
from pandas.core.frame import DataFrame
from lib.neural_network import Tools
from concurrent.futures import ProcessPoolExecutor
from time import time
import lib.game_main as br
import pickle
import pandas as pd
def worker(agent, graphics = False) -> int:
return br.main(agent=agent, graphics=graphics)
def process_generation(population: list) -> list:
with ProcessPoolExecutor() as executor:
fitness = list(executor.map(worker, population))
return fitness, population
def log(training_log, generation, g_top_fit, top_fit, mean_fit, complete_time) -> DataFrame:
training_log = training_log.append(pd.DataFrame({'Generation':[generation],
'Top Fitness': [g_top_fit],
'Mean Fitness': [mean_fit],
'Run Time (s)': [complete_time]}))
try:
training_log.to_csv('./logs/training_log.csv', index = False)
except Exception:
pass
print(f'Generation: {generation:03} -- Generation Top Fitness: {g_top_fit} -- All Time Top Fitness: {top_fit} -- Mean Fitness: {mean_fit:.2F} -- Completed in {complete_time:.2f} Seconds')
return training_log
def main() -> None:
####### Hyperparameters #######
INPUT_SIZE = 25
LAYER_CONFIG = [[16,'relu'],
[3,'softmax']]
MUTATE_RATE = 0.25
MUTATE_SCALE = 0.1
POPULATION_SIZE = 256
################################
generation = 1
all_time_top_fitness = 0
start_time = time()
population = Tools.Genetic.create_population(POPULATION_SIZE, INPUT_SIZE, LAYER_CONFIG)
training_log = pd.DataFrame(columns=['Generation', 'Top Fitness', 'Mean Fitness','Run Time (s)'])
while True:
fitnesses, population = process_generation(population)
generation_top_fitness = max(fitnesses)
indx_top_fitness = fitnesses.index(generation_top_fitness)
#save best networks for replay
#top agent is the the agent that had the highest score in the generation
with open('./trained_agents/top_agent.pickle', 'wb') as f:
pickle.dump(population[indx_top_fitness], f)
if generation_top_fitness >= all_time_top_fitness:
#best agent is the agent that has the highest score of the training session
with open('./trained_agents/best_agent.pickle', 'wb') as f:
pickle.dump(population[indx_top_fitness], f)
all_time_top_fitness = generation_top_fitness
#log to csv
training_log = log(training_log, generation, generation_top_fitness, all_time_top_fitness, sum(fitnesses)/len(fitnesses), time()-start_time)
start_time = time()
#Select best networks and perform crossover, mixing their weights and biases
fitnesses = [fitness**2 for fitness in fitnesses]
population = [Tools.Genetic.crossover(population, fitnesses) for _ in range(POPULATION_SIZE)]
#mutate resulting children
Tools.Genetic.mutate(population, rate = MUTATE_RATE, scale = MUTATE_SCALE)
generation += 1
if __name__ == '__main__':
main()