Merge pull request #324 from cpnota/develop

v0.9.1
cpnota · Mar 17, 2024 · ac81d00 · ac81d00
2 parents 9ce894f + f8073e5
commit ac81d00
Show file tree

Hide file tree

Showing 190 changed files with 5,014 additions and 3,118 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -15,7 +15,7 @@ jobs:
  runs-on: ubuntu-latest
  strategy:
  matrix:
- python-version: [3.8, 3.9]
+ python-version: [3.8, 3.11]
 
  steps:
  - uses: actions/checkout@v2
@@ -25,9 +25,8 @@ jobs:
  python-version: ${{ matrix.python-version }}
  - name: Install dependencies
  run: |
- sudo apt-get install swig
- sudo apt-get install unrar
- pip install torch~=1.11 --extra-index-url https://download.pytorch.org/whl/cpu
+ python -m pip install --upgrade pip
+ pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu
  make install
  - name: Lint code
  run: |

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -1,33 +1,34 @@
 # This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 
 name: Upload Python Package
 
 on:
  release:
- types: [created]
+ types: [published]
+
+permissions:
+ contents: read
 
 jobs:
  deploy:
-
  runs-on: ubuntu-latest
-
- environment: deployment
-
+ environment: publish
+ permissions:
+ id-token: write
  steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
  - name: Set up Python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v3
  with:
- python-version: '3.x'
+ python-version: 3.11
  - name: Install dependencies
  run: |
  python -m pip install --upgrade pip
- pip install setuptools wheel twine
- - name: Build and publish
- env:
- TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
- run: |
- python setup.py sdist bdist_wheel
- twine upload dist/*
+ pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu
+ pip install setuptools wheel
+ make install
+ - name: Build package
+ run: make build
+ - name: Publish package
+ uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -1,26 +1,16 @@
-# .readthedocs.yml
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-# Required
 version: 2
 
-# Build documentation in the docs/ directory with Sphinx
-sphinx:
- configuration: docs/source/conf.py
-
-# Build documentation with MkDocs
-#mkdocs:
-# configuration: mkdocs.yml
+build:
+ os: "ubuntu-22.04"
+ tools:
+ python: "3.11"
 
-# Optionally build your docs in additional formats such as PDF and ePub
-formats: all
-
-# Optionally set the version of Python and requirements required to build your docs
 python:
- version: 3.7
  install:
  - method: pip
  path: .
  extra_requirements:
  - docs
+
+sphinx:
+ configuration: docs/source/conf.py
diff --git a/Makefile b/Makefile
@@ -11,10 +11,13 @@ integration-test:
  python -m unittest discover -s integration -p "*test.py"
 
 lint:
- flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all
+ black --check all benchmarks examples integration setup.py
+ isort --profile black --check all benchmarks examples integration setup.py
+ flake8 --select "F401" all benchmarks examples integration setup.py
 
 format:
- autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all
+ black all benchmarks examples integration setup.py
+ isort --profile black all benchmarks examples integration setup.py
 
 tensorboard:
  tensorboard --logdir runs

diff --git a/README.md b/README.md
@@ -21,10 +21,11 @@ Additionally, we provide an [example project](https://github.com/cpnota/all-exam
 
 ## High-Quality Reference Implementations
 
-The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and PyBullet robotics simulations. Some benchmark results showing results on-par with published results can be found below:
+The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and MuJoCo/Pybullet robotics simulations. Some benchmark results showing results on-par with published results can be found below:
 
-![atari40](benchmarks/atari40.png)
-![pybullet](benchmarks/pybullet.png)
+![atari40](benchmarks/atari_40m.png)
+![atari40](benchmarks/mujoco_v4.png)
+![pybullet](benchmarks/pybullet_v0.png)
 
 As of today, `all` contains implementations of the following deep RL algorithms:
 

diff --git a/all/__init__.py b/all/__init__.py
@@ -1,26 +1,16 @@
-import all.agents
-import all.approximation
-import all.core
-import all.environments
-import all.logging
-import all.memory
-import all.nn
-import all.optim
-import all.policies
-import all.presets
 from all.core import State, StateArray
 
 __all__ = [
- 'agents',
- 'approximation',
- 'core',
- 'environments',
- 'logging',
- 'memory',
- 'nn',
- 'optim',
- 'policies',
- 'presets',
- 'State',
- 'StateArray'
+ "agents",
+ "approximation",
+ "core",
+ "environments",
+ "logging",
+ "memory",
+ "nn",
+ "optim",
+ "policies",
+ "presets",
+ "State",
+ "StateArray",
 ]
diff --git a/all/agents/__init__.py b/all/agents/__init__.py
@@ -15,7 +15,6 @@
 from .vqn import VQN, VQNTestAgent
 from .vsarsa import VSarsa, VSarsaTestAgent
 
-
 __all__ = [
  # Agent interfaces
  "Agent",

diff --git a/all/agents/_agent.py b/all/agents/_agent.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+
 from all.optim import Schedulable
 
 

diff --git a/all/agents/_multiagent.py b/all/agents/_multiagent.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+
 from all.optim import Schedulable
 
 

diff --git a/all/agents/_parallel_agent.py b/all/agents/_parallel_agent.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+
 from all.optim import Schedulable
 
 

diff --git a/all/agents/a2c.py b/all/agents/a2c.py
@@ -1,7 +1,8 @@
-import torch
 from torch.nn.functional import mse_loss
+
 from all.logging import DummyLogger
 from all.memory import NStepAdvantageBuffer
+
 from ._agent import Agent
 from ._parallel_agent import ParallelAgent
 
@@ -28,15 +29,15 @@ class A2C(ParallelAgent):
  """
 
  def __init__(
-  self,
-  features,
-  v,
-  policy,
-  discount_factor=0.99,
-  entropy_loss_scaling=0.01,
-  n_envs=None,
-  n_steps=4,
-  logger=DummyLogger()
+ self,
+ features,
+ v,
+ policy,
+ discount_factor=0.99,
+ entropy_loss_scaling=0.01,
+ n_envs=None,
+ n_steps=4,
+ logger=DummyLogger(),
  ):
  if n_envs is None:
  raise RuntimeError("Must specify n_envs.")
@@ -80,7 +81,9 @@ def _train(self, next_states):
  value_loss = mse_loss(values, targets)
  policy_gradient_loss = -(distribution.log_prob(actions) * advantages).mean()
  entropy_loss = -distribution.entropy().mean()
- policy_loss = policy_gradient_loss + self.entropy_loss_scaling * entropy_loss
+ policy_loss = (
+ policy_gradient_loss + self.entropy_loss_scaling * entropy_loss
+ )
  loss = value_loss + policy_loss
 
  # backward pass
@@ -90,16 +93,16 @@ def _train(self, next_states):
  self.features.step()
 
  # record metrics
- self.logger.add_info('entropy', -entropy_loss)
- self.logger.add_info('normalized_value_error', value_loss / targets.var())
+ self.logger.add_info("entropy", -entropy_loss)
+ self.logger.add_info("normalized_value_error", value_loss / targets.var())
 
  def _make_buffer(self):
  return NStepAdvantageBuffer(
  self.v,
  self.features,
  self.n_steps,
  self.n_envs,
- discount_factor=self.discount_factor
+ discount_factor=self.discount_factor,
  )
 
 

diff --git a/all/agents/c51.py b/all/agents/c51.py
@@ -1,6 +1,8 @@
-import torch
 import numpy as np
+import torch
+
 from all.logging import DummyLogger
+
 from ._agent import Agent
 
 
@@ -26,16 +28,16 @@ class C51(Agent):
  """
 
  def __init__(
-  self,
-  q_dist,
-  replay_buffer,
-  discount_factor=0.99,
-  eps=1e-5,
-  exploration=0.02,
-  minibatch_size=32,
-  replay_start_size=5000,
-  update_frequency=1,
-  logger=DummyLogger(),
+ self,
+ q_dist,
+ replay_buffer,
+ discount_factor=0.99,
+ eps=1e-5,
+ exploration=0.02,
+ minibatch_size=32,
+ replay_start_size=5000,
+ update_frequency=1,
+ logger=DummyLogger(),
  ):
  # objects
  self.q_dist = q_dist
@@ -81,7 +83,9 @@ def _best_actions(self, probs):
  def _train(self):
  if self._should_train():
  # sample transitions from buffer
- states, actions, rewards, next_states, weights = self.replay_buffer.sample(self.minibatch_size)
+ states, actions, rewards, next_states, weights = self.replay_buffer.sample(
+ self.minibatch_size
+ )
  # forward pass
  dist = self.q_dist(states, actions)
  # compute target distribution
@@ -100,14 +104,15 @@ def _train(self):
 
  def _should_train(self):
  self._frames_seen += 1
- return self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0
+ return (
+ self._frames_seen > self.replay_start_size
+ and self._frames_seen % self.update_frequency == 0
+ )
 
  def _compute_target_dist(self, states, rewards):
  actions = self._best_actions(self.q_dist.no_grad(states))
  dist = self.q_dist.target(states, actions)
- shifted_atoms = (
- rewards.view((-1, 1)) + self.discount_factor * self.q_dist.atoms
- )
+ shifted_atoms = rewards.view((-1, 1)) + self.discount_factor * self.q_dist.atoms
  return self.q_dist.project(dist, shifted_atoms)
 
  def _kl(self, dist, target_dist):
@@ -117,7 +122,7 @@ def _kl(self, dist, target_dist):
 
 
 class C51TestAgent(Agent):
- def __init__(self, q_dist, n_actions, exploration=0.):
+ def __init__(self, q_dist, n_actions, exploration=0.0):
  self.q_dist = q_dist
  self.n_actions = n_actions
  self.exploration = exploration