scripts/rice_warpdrive.yaml

# Copyright (c) 2022, salesforce.com, inc and MILA.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root
# or https://opensource.org/licenses/BSD-3-Clause


# Checkpoint saving setting
saving:
    metrics_log_freq: 100 # how often (in iterations) to log (and print) the metrics
    model_params_save_freq: 1000 # how often (in iterations) to save the model parameters
    basedir: "/tmp" # base folder used for saving
    name: "rice" # experiment name
    tag: "experiments" # experiment tag

# Trainer settings
trainer:
    num_envs: 100 # number of environment replicas
    num_episodes: 1000 # number of episodes to run the training for
    train_batch_size: 10000 # total batch size used for training per iteration (across all the environments)

# Environment configuration
env:
    num_discrete_action_levels: 10 # number of discrete levels for the saving and mitigation actions
    negotiation_on: False # flag to indicate whether negotiation is allowed or not

# Policy network settings
policy:
    regions:
        to_train: True # flag indicating whether the model needs to be trained
        algorithm: "A2C" # algorithm used to train the policy
        vf_loss_coeff: 0.1 # loss coefficient schedule for the value function loss
        entropy_coeff: # loss coefficient schedule for the entropy loss
        # piecewise linear, specified as (timestep, value)
            - [0, 0.5]
            - [1000000, 0.1]
            - [5000000, 0.05]
        clip_grad_norm: True # flag indicating whether to clip the gradient norm or not
        max_grad_norm: 0.5 # when clip_grad_norm is True, the clip level
        normalize_advantage: False # flag indicating whether to normalize advantage or not
        normalize_return: False # flag indicating whether to normalize return or not
        gamma: 0.92 # discount factor
        lr: 0.0005 # learning rate
        model: # policy model settings
            type: "fully_connected" # model type
            fc_dims: [256, 256] # dimension(s) of the fully connected layers as a list
            model_ckpt_filepath: "" # filepath (used to restore a previously saved model)