move jax.block_until_ready to accelerate training (#30)

rail-berkeley · Feb 26, 2024 · 3f48278 · 3f48278
1 parent 4c1c0cf
commit 3f48278
Show file tree

Hide file tree

Showing 6 changed files with 6 additions and 12 deletions.
diff --git a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py
@@ -377,17 +377,16 @@ def stats_callback(type: str, payload: dict) -> dict:
                 agent, critics_info = agent.update_critics(
                     batch,
                 )
-                agent = jax.block_until_ready(agent)
 
         with timer.context("train"):
             batch = next(replay_iterator)
             demo_batch = next(demo_iterator)
             batch = concat_batches(batch, demo_batch, axis=0)
             agent, update_info = agent.update_high_utd(batch, utd_ratio=1)
-            agent = jax.block_until_ready(agent)
 
         # publish the updated network
         if update_steps > 0 and update_steps % (FLAGS.steps_per_update) == 0:
+            agent = jax.block_until_ready(agent)
             server.publish_network(agent.state.params)
 
         if update_steps % FLAGS.log_period == 0 and wandb_logger:

diff --git a/examples/async_cable_route_drq/async_drq_randomized.py b/examples/async_cable_route_drq/async_drq_randomized.py
@@ -286,17 +286,16 @@ def stats_callback(type: str, payload: dict) -> dict:
                 agent, critics_info = agent.update_critics(
                     batch,
                 )
-                agent = jax.block_until_ready(agent)
 
         with timer.context("train"):
             batch = next(replay_iterator)
             demo_batch = next(demo_iterator)
             batch = concat_batches(batch, demo_batch, axis=0)
             agent, update_info = agent.update_high_utd(batch, utd_ratio=1)
-            agent = jax.block_until_ready(agent)
 
         # publish the updated network
         if step > 0 and step % (FLAGS.steps_per_update) == 0:
+            agent = jax.block_until_ready(agent)
             server.publish_network(agent.state.params)
 
         if update_steps % FLAGS.log_period == 0 and wandb_logger:

diff --git a/examples/async_drq_sim/async_drq_sim.py b/examples/async_drq_sim/async_drq_sim.py
@@ -229,15 +229,14 @@ def stats_callback(type: str, payload: dict) -> dict:
                 agent, critics_info = agent.update_critics(
                     batch,
                 )
-                agent = jax.block_until_ready(agent)
 
         with timer.context("train"):
             batch = next(replay_iterator)
             agent, update_info = agent.update_high_utd(batch, utd_ratio=1)
-            agent = jax.block_until_ready(agent)
 
         # publish the updated network
         if step > 0 and step % (FLAGS.steps_per_update) == 0:
+            agent = jax.block_until_ready(agent)
             server.publish_network(agent.state.params)
 
         if update_steps % FLAGS.log_period == 0 and wandb_logger:

diff --git a/examples/async_pcb_insert_drq/async_drq_randomized.py b/examples/async_pcb_insert_drq/async_drq_randomized.py
@@ -281,17 +281,16 @@ def stats_callback(type: str, payload: dict) -> dict:
                 agent, critics_info = agent.update_critics(
                     batch,
                 )
-                agent = jax.block_until_ready(agent)
 
         with timer.context("train"):
             batch = next(replay_iterator)
             demo_batch = next(demo_iterator)
             batch = concat_batches(batch, demo_batch, axis=0)
             agent, update_info = agent.update_high_utd(batch, utd_ratio=1)
-            agent = jax.block_until_ready(agent)
 
         # publish the updated network
         if step > 0 and step % (FLAGS.steps_per_update) == 0:
+            agent = jax.block_until_ready(agent)
             server.publish_network(agent.state.params)
 
         if update_steps % FLAGS.log_period == 0 and wandb_logger:

diff --git a/examples/async_peg_insert_drq/async_drq_randomized.py b/examples/async_peg_insert_drq/async_drq_randomized.py
@@ -280,17 +280,16 @@ def stats_callback(type: str, payload: dict) -> dict:
                 agent, critics_info = agent.update_critics(
                     batch,
                 )
-                agent = jax.block_until_ready(agent)
 
         with timer.context("train"):
             batch = next(replay_iterator)
             demo_batch = next(demo_iterator)
             batch = concat_batches(batch, demo_batch, axis=0)
             agent, update_info = agent.update_high_utd(batch, utd_ratio=1)
-            agent = jax.block_until_ready(agent)
 
         # publish the updated network
         if step > 0 and step % (FLAGS.steps_per_update) == 0:
+            agent = jax.block_until_ready(agent)
             server.publish_network(agent.state.params)
 
         if update_steps % FLAGS.log_period == 0 and wandb_logger:

diff --git a/examples/async_rlpd_drq_sim/async_rlpd_drq_sim.py b/examples/async_rlpd_drq_sim/async_rlpd_drq_sim.py
@@ -247,18 +247,17 @@ def stats_callback(type: str, payload: dict) -> dict:
                 agent, critics_info = agent.update_critics(
                     batch,
                 )
-                agent = jax.block_until_ready(agent)
 
         with timer.context("train"):
             batch = next(replay_iterator)
             if demo_iterator is not None:
                 demo_batch = next(demo_iterator)
                 batch = concat_batches(batch, demo_batch, axis=0)
             agent, update_info = agent.update_high_utd(batch, utd_ratio=1)
-            agent = jax.block_until_ready(agent)
 
         # publish the updated network
         if step > 0 and step % (FLAGS.steps_per_update) == 0:
+            agent = jax.block_until_ready(agent)
             server.publish_network(agent.state.params)
 
         if update_steps % FLAGS.log_period == 0 and wandb_logger: