ppo_test.yaml


algorithm: PPO

stop:
    episode_reward_mean: 5000
    training_iteration: 1000
    timesteps_total: 200000000

config:
    env: godot
    env_config:
        framerate: null
        action_repeat: null
        show_window: false
        seed: 0
    framework: torch  
    lambda: 0.95
    gamma: 0.95

    vf_clip_param: 1.0
    clip_param: 0.2
    entropy_coeff: 0.001
    entropy_coeff_schedule: null
    train_batch_size: 1024
    sgd_minibatch_size: 128
    num_sgd_iter: 16
    num_workers: 4
    lr: 0.0003
    num_envs_per_worker: 16
    batch_mode: truncate_episodes
    rollout_fragment_length: 16
    num_gpus: 1
    model:
        fcnet_hiddens: [256, 256] 
        use_lstm: false
        lstm_cell_size : 32
        framestack: 4
    no_done_at_end: false
    soft_horizon: false