-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.py
89 lines (65 loc) · 2.13 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from pdb import set_trace as T
import os
import nmmo
from config.bases import Base
from scripted import baselines
class Train(Base, nmmo.config.Medium, nmmo.config.AllGameSystems):
@property
def PATH_MAPS(self):
return os.path.join(super().PATH_MAPS, 'training')
@property
def NUM_ENVS(self):
return self.NUM_CPUS * self.PLAYER_N
@property
def BATCH_SIZE(self):
return int(self.NUM_ENVS * self.NUM_STEPS)
# Hardcoded for now: number of args to predict
NUM_ARGUMENTS = 8
COMBAT = True
EXP_NAME = 'CleanRL'
ENV_ID = 'nmmo'
WANDB_PROJECT_NAME = 'cleanRL'
WANDB_ENTITY = None
TORCH_DETERMINISTIC = True
SEED = 1
TOTAL_TIMESTEPS = 1_000_000_000
CUDA = [0]
NUM_CPUS = 8
HORIZON = 512
NUM_STEPS = 512
NUM_MINIBATCHES = 576 #544
LEARNING_RATE = 5e-5
UPDATE_EPOCHS = 1
ANNEAL_LR = False
GAE = True
NORM_ADV = True
CLIP_VLOSS = True
TARGET_KL = None
GAE_LAMBDA = 1.0
GAMMA = 0.99
CLIP_COEF = 0.3
ENT_COEF = 0.0
VF_COEF = 1.0
MAX_GRAD_NORM = 0.5
class Eval(Train):
SPECIALIZE = True
PLAYERS = [
baselines.Meander,
baselines.Fisher, baselines.Herbalist, baselines.Prospector, baselines.Carver, baselines.Alchemist,
baselines.Melee, baselines.Range, baselines.Mage] + [nmmo.Agent] * 7
NUM_CPUS = 4
TERRAIN_FLIP_SEED = True
RESPAWN = False
@property
def PATH_MAPS(self):
return os.path.join(super().PATH_MAPS.strip('training'), 'evaluation')
MAP_N = 32
class Debug(Train):
HORIZON = 6
NUM_CPUS = 2
NUM_STEPS = 128
NUM_MINIBATCHES = 128
NUM_STEPS = 1
CUDA = {}
class DebugEval(Debug, Eval):
pass