diff --git a/config.yaml b/config.yaml index c6d46e5..768c6cc 100644 --- a/config.yaml +++ b/config.yaml @@ -9,7 +9,7 @@ debug: stream_wrapper: False init_state: "victory_road_5" state_dir: pyboy_states - max_steps: 20480 + max_steps: 21000 log_frequency: 1 disable_ai_actions: True use_global_map: False @@ -97,22 +97,22 @@ train: total_timesteps: 1_000_000_000 # 100_000_000_000 for full games batch_size: 65536 minibatch_size: 2048 - learning_rate: 2.0e-4 anneal_lr: False - gamma: 0.998 - gae_lambda: 0.95 num_minibatches: 4 update_epochs: 3 norm_adv: True + ent_coef: 0.010005 + gae_lambda: 0.9501 + gamma: 0.9980002 clip_coef: 0.1 clip_vloss: True - ent_coef: 0.01 - vf_coef: 0.5 + learning_rate: 2.0e-4 max_grad_norm: 0.5 target_kl: ~ + vf_clip_coef: 0.1 + vf_coef: 0.5 batch_rows: 128 bptt_horizon: 16 - vf_clip_coef: 0.1 num_envs: 288 num_workers: 24 @@ -313,30 +313,30 @@ rewards: baseline.ObjectRewardRequiredEventsMapIds: reward: - event: 1.0 - seen_pokemon: 4.0 - caught_pokemon: 4.0 - moves_obtained: 4.0 - hm_count: 10.0 - level: 1.0 - badges: 5.0 + a_press: 0.0 # 0.00001 + badges: 3.0 + bag_menu: 0.0 + caught_pokemon: 2.5 cut_coords: 0.0 cut_tiles: 0.0 - start_menu: 0.0 - pokemon_menu: 0.0 - stats_menu: 0.0 - bag_menu: 0.0 - explore_hidden_objs: 0.0001 + event: .75 + exploration: 0.019 + explore_hidden_objs: 0.00009999 explore_signs: 0.015 + explore_warps: 0.01006 + hm_count: 7.5 + level: 1.05 + moves_obtained: 4.0 + pokecenter_heal: 0.47 + pokemon_menu: 0.0 + required_event: 7.0 + required_item: 3.0 seen_action_bag_menu: 0.0 - required_event: 5.0 - required_item: 5.0 - useful_item: 1.0 - pokecenter_heal: 0.5 - exploration: 0.02 - a_press: 0.0 # 0.00001 - explore_warps: 0.01 - use_surf: 0.5 + seen_pokemon: 2.5 + start_menu: 0.0 + stats_menu: 0.0 + use_surf: 0.4 + useful_item: 0.825