Skip to content

Commit

Permalink
Merge branch 'puffer1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
thatguy11325 committed Jun 24, 2024
2 parents afe63df + ca41349 commit 2237ecb
Show file tree
Hide file tree
Showing 10 changed files with 1,003 additions and 746 deletions.
41 changes: 27 additions & 14 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,31 @@ debug:
headless: False
stream_wrapper: False
init_state: victory_road
max_steps: 1_000_000
max_steps: 16
log_frequency: 1
disable_wild_encounters: True
disable_ai_actions: True
use_global_map: True
train:
device: cpu
compile: False
compile_mode: default
num_envs: 1
envs_per_worker: 1
envs_per_batch: 1
batch_size: 16
num_workers: 1
env_batch_size: 4
env_pool: True
zero_copy: False
batch_size: 4
minibatch_size: 4
batch_rows: 4
bptt_horizon: 2
total_timesteps: 100_000_000
save_checkpoint: True
checkpoint_interval: 4
save_overlay: True
save_overlay: False
overlay_interval: 4
verbose: False
env_pool: False
log_frequency: 5000
load_optimizer_state: False
# swarm_frequency: 10
# swarm_keep_pct: .1
Expand Down Expand Up @@ -62,6 +66,7 @@ env:
auto_pokeflute: True
infinite_money: True
use_global_map: False
save_state: False


train:
Expand All @@ -73,6 +78,7 @@ train:
float32_matmul_precision: "high"
total_timesteps: 100_000_000_000
batch_size: 65536
minibatch_size: 2048
learning_rate: 2.0e-4
anneal_lr: False
gamma: 0.998
Expand All @@ -90,10 +96,11 @@ train:
bptt_horizon: 16
vf_clip_coef: 0.1

num_envs: 96
envs_per_worker: 1
envs_per_batch: 32
num_envs: 288
num_workers: 24
env_batch_size: 72
env_pool: True
zero_copy: False

verbose: True
data_dir: runs
Expand All @@ -104,11 +111,15 @@ train:
cpu_offload: True
pool_kernel: [0]
load_optimizer_state: False
use_rnn: True
async_wrapper: False

# swarm_frequency: 500
# swarm_keep_pct: .8

wrappers:
empty: []

baseline:
- stream_wrapper.StreamWrapper:
user: thatguy
Expand All @@ -126,6 +137,7 @@ wrappers:
forgetting_frequency: 10
- exploration.OnResetExplorationWrapper:
full_reset_frequency: 1
jitter: 0

finite_coords:
- stream_wrapper.StreamWrapper:
Expand Down Expand Up @@ -224,9 +236,10 @@ policies:
policy:
hidden_size: 512

recurrent:
rnn:
# Assumed to be in the same module as the policy
name: RecurrentMultiConvolutionalWrapper
input_size: 512
hidden_size: 512
num_layers: 1
name: MultiConvolutionalRNN
args:
input_size: 512
hidden_size: 512
num_layers: 1
33 changes: 33 additions & 0 deletions pokemonred_puffer/c_gae.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# distutils: define_macros=NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION
# cython: language_level=3
# cython: boundscheck=False
# cython: initializedcheck=False
# cython: wraparound=False
# cython: nonecheck=False

import numpy as np
cimport numpy as cnp

def compute_gae(cnp.ndarray dones, cnp.ndarray values,
cnp.ndarray rewards, float gamma, float gae_lambda):
'''Fast Cython implementation of Generalized Advantage Estimation (GAE)'''
cdef int num_steps = len(rewards)
cdef cnp.ndarray advantages = np.zeros(num_steps, dtype=np.float32)
cdef float[:] c_advantages = advantages
cdef float[:] c_dones = dones
cdef float[:] c_values = values
cdef float[:] c_rewards = rewards

cdef float lastgaelam = 0
cdef float nextnonterminal, delta
cdef int t, t_cur, t_next
for t in range(num_steps-1):
t_cur = num_steps - 2 - t
t_next = num_steps - 1 - t
nextnonterminal = 1.0 - c_dones[t_next]
delta = c_rewards[t_next] + gamma * c_values[t_next] * nextnonterminal - c_values[t_cur]
lastgaelam = delta + gamma * gae_lambda * nextnonterminal * lastgaelam
c_advantages[t_cur] = lastgaelam

return advantages

Loading

0 comments on commit 2237ecb

Please sign in to comment.