-
Notifications
You must be signed in to change notification settings - Fork 27
/
rice_warpdrive.yaml
47 lines (42 loc) · 2.15 KB
/
rice_warpdrive.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# Copyright (c) 2022, salesforce.com, inc and MILA.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause
# Checkpoint saving setting
saving:
metrics_log_freq: 100 # how often (in iterations) to log (and print) the metrics
model_params_save_freq: 1000 # how often (in iterations) to save the model parameters
basedir: "/tmp" # base folder used for saving
name: "rice" # experiment name
tag: "experiments" # experiment tag
# Trainer settings
trainer:
num_envs: 100 # number of environment replicas
num_episodes: 1000 # number of episodes to run the training for
train_batch_size: 10000 # total batch size used for training per iteration (across all the environments)
# Environment configuration
env:
num_discrete_action_levels: 10 # number of discrete levels for the saving and mitigation actions
negotiation_on: False # flag to indicate whether negotiation is allowed or not
# Policy network settings
policy:
regions:
to_train: True # flag indicating whether the model needs to be trained
algorithm: "A2C" # algorithm used to train the policy
vf_loss_coeff: 0.1 # loss coefficient schedule for the value function loss
entropy_coeff: # loss coefficient schedule for the entropy loss
# piecewise linear, specified as (timestep, value)
- [0, 0.5]
- [1000000, 0.1]
- [5000000, 0.05]
clip_grad_norm: True # flag indicating whether to clip the gradient norm or not
max_grad_norm: 0.5 # when clip_grad_norm is True, the clip level
normalize_advantage: False # flag indicating whether to normalize advantage or not
normalize_return: False # flag indicating whether to normalize return or not
gamma: 0.92 # discount factor
lr: 0.0005 # learning rate
model: # policy model settings
type: "fully_connected" # model type
fc_dims: [256, 256] # dimension(s) of the fully connected layers as a list
model_ckpt_filepath: "" # filepath (used to restore a previously saved model)