Skip to content

Commit

Permalink
puffer 1.0 port
Browse files Browse the repository at this point in the history
  • Loading branch information
thatguy11325 committed Jun 20, 2024
1 parent afe63df commit 48dbc3d
Show file tree
Hide file tree
Showing 8 changed files with 788 additions and 700 deletions.
28 changes: 17 additions & 11 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ debug:
device: cpu
compile: False
compile_mode: default
num_envs: 1
envs_per_worker: 1
envs_per_batch: 1
batch_size: 16
num_envs: 2
num_workers: 1
env_batch_size: 16
env_pool: True
zero_copy: False
batch_rows: 4
bptt_horizon: 2
total_timesteps: 100_000_000
Expand Down Expand Up @@ -73,6 +74,7 @@ train:
float32_matmul_precision: "high"
total_timesteps: 100_000_000_000
batch_size: 65536
minibatch_size: 32768
learning_rate: 2.0e-4
anneal_lr: False
gamma: 0.998
Expand All @@ -91,9 +93,10 @@ train:
vf_clip_coef: 0.1

num_envs: 96
envs_per_worker: 1
envs_per_batch: 32
num_workers: 96
env_batch_size: 32
env_pool: True
zero_copy: False

verbose: True
data_dir: runs
Expand All @@ -104,6 +107,7 @@ train:
cpu_offload: True
pool_kernel: [0]
load_optimizer_state: False
use_rnn: True

# swarm_frequency: 500
# swarm_keep_pct: .8
Expand All @@ -126,6 +130,7 @@ wrappers:
forgetting_frequency: 10
- exploration.OnResetExplorationWrapper:
full_reset_frequency: 1
jitter: 0

finite_coords:
- stream_wrapper.StreamWrapper:
Expand Down Expand Up @@ -224,9 +229,10 @@ policies:
policy:
hidden_size: 512

recurrent:
rnn:
# Assumed to be in the same module as the policy
name: RecurrentMultiConvolutionalWrapper
input_size: 512
hidden_size: 512
num_layers: 1
name: MultiConvolutionalRNN
args:
input_size: 512
hidden_size: 512
num_layers: 1
33 changes: 33 additions & 0 deletions pokemonred_puffer/c_gae.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# distutils: define_macros=NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION
# cython: language_level=3
# cython: boundscheck=False
# cython: initializedcheck=False
# cython: wraparound=False
# cython: nonecheck=False

import numpy as np
cimport numpy as cnp

def compute_gae(cnp.ndarray dones, cnp.ndarray values,
cnp.ndarray rewards, float gamma, float gae_lambda):
'''Fast Cython implementation of Generalized Advantage Estimation (GAE)'''
cdef int num_steps = len(rewards)
cdef cnp.ndarray advantages = np.zeros(num_steps, dtype=np.float32)
cdef float[:] c_advantages = advantages
cdef float[:] c_dones = dones
cdef float[:] c_values = values
cdef float[:] c_rewards = rewards

cdef float lastgaelam = 0
cdef float nextnonterminal, delta
cdef int t, t_cur, t_next
for t in range(num_steps-1):
t_cur = num_steps - 2 - t
t_next = num_steps - 1 - t
nextnonterminal = 1.0 - c_dones[t_next]
delta = c_rewards[t_next] + gamma * c_values[t_next] * nextnonterminal - c_values[t_cur]
lastgaelam = delta + gamma * gae_lambda * nextnonterminal * lastgaelam
c_advantages[t_cur] = lastgaelam

return advantages

Loading

0 comments on commit 48dbc3d

Please sign in to comment.