Merge pull request #12 from HendrikPN/feature/min_loss

New architecture and results
HendrikPN · Jan 7, 2021 · b57c9d1 · b57c9d1
2 parents b520f0c + 7dad4fb
commit b57c9d1
Show file tree

Hide file tree

Showing 16 changed files with 147 additions and 82 deletions.
diff --git a/README.md b/README.md
@@ -67,7 +67,7 @@ Once you run the program, you should see numbers like the following:
 ```
 What you see is the training of the prediction agents and an autoencoder. Given an environment objective (here `env1`), the policy prediction has a smooth L1 loss of 1.2. The autoencoder trying to reproduce the input has a binary cross entropy loss of 0.2 97. Over time, this will be reduced while the selection neurons start affecting the latent representation with noise. A log of the amount of noise is being generated at `results_log/selection.txt`. There you find results like this:
 ```
-env1, [-9.83377456665039, -9.83377456665039, -9.83377456665039]
+env1, [-9.865717887878418, -9.865717887878418, -9.865717887878418, -9.865717887878418, -9.865717887878418, -9.865717887878418, -9.865717887878418, -9.865717887878418]
 ```
 These numbers quantify the noise of the three selection neurons for the environment objective (or decoder agent) `env1`. Once one of these values increases above 0. the value of the associated latent neuron cannot be recovered by a decoder.
 
@@ -97,8 +97,20 @@ This is the current setting of the code and can be immediately performed by runn
 in the main directory.
 That is, if you run the code as is, you will start training one autoencoder and three decoders to predict the behavior 
 of the pretrained DPS agents. 
-At the same, selection neurons will pressure the encoder to create a representation of the observation that can be 
+At the same, selection neurons will pressure the encoder to create a minimal representation of the observation that can be 
 shared among decoders efficiently.
 In obtaining the results in the paper, we observed the following loss:
 
 ![Results Loss](assets/images/results_loss.png)
+
+### Data availability
+
+The data for the plots of the training progress here and in the publication is made available on 
+[Zenodo](https://doi.org/10.5281/zenodo.4425741).
+
+## What's New
+
++ 2021-01-07: Brand new results
++ 2021-01-07: Updated architecture and loss function for automated dimensionality reduction
++ 2020-02-21: Bug fixes
++ 2020-01-06: Hello world :)
diff --git a/assets/images/latent_space.png b/assets/images/latent_space.png
diff --git a/assets/images/results_loss.png b/assets/images/results_loss.png
diff --git a/assets/images/results_rl.png b/assets/images/results_rl.png
diff --git a/assets/images/selection_figure.png b/assets/images/selection_figure.png
diff --git a/reinforced_scinet/analysis/_run_analysis.py b/reinforced_scinet/analysis/_run_analysis.py
@@ -16,18 +16,18 @@
 
 PLOT_LATENT = False # plot the latent variable's behavior
 PLOT_RESULTS = False # plot the performance of the RL agent
-PLOT_RESULTS_LOSS = True # plot the performance of pretrainer
-PLOT_FIGURE = False # plot the figure from the whitepaper
+PLOT_RESULTS_LOSS = False # plot the performance of pretrainer
+PLOT_FIGURE = True # plot the figure from the whitepaper
 
 ENV_ID = 'env2' # the environment id to be used (usually not relevant)
 
 if __name__ == "__main__":
- analyzer = AnalyzerSubGridWorld(ENV_ID, load_model=PLOT_LATENT)
+ analyzer = AnalyzerSubGridWorld(ENV_ID, load_model=True)
  if PLOT_LATENT:
- analyzer.plot_latent_space()
+ analyzer.plot_latent_space(limit=[-0.3, 0.3])
  if PLOT_RESULTS:
- analyzer.plot_results_figure(avg_mod=200)
+ analyzer.plot_results_figure(avg_mod=200, limit=[0., 400.])
  if PLOT_RESULTS_LOSS:
- analyzer.plot_loss_figure(avg_mod=100)
+ analyzer.plot_loss_figure(avg_mod=100, limitDec=[0., 2.], limitAE=[0., 0.02])
  if PLOT_FIGURE:
- analyzer.plot_selection_figure()
+ analyzer.plot_selection_figure(plot_neurons=[6, 1, 0], limit=[-0.3, 0.3])
diff --git a/reinforced_scinet/analysis/analyzer.py b/reinforced_scinet/analysis/analyzer.py
@@ -44,7 +44,7 @@ def __init__(self, env_id, load_model=False):
 
  Args:
  env_id (str): The environment Id specifying which environment to use given a Config. 
- load_model (bool): Whether or not to load the autoencoder as specified in Config. Defaults to False.
+ load_model (bool, optional): Whether or not to load the autoencoder as specified in Config. Default: False.
  """
  if not Config.ENV_NAME == 'subgridworld-v0':
  raise NotImplementedError("This analyzer is tailored to the sub-grid world environment. "+
@@ -61,13 +61,12 @@ def __init__(self, env_id, load_model=False):
  self.encoder.load_state_dict(loaded_dict)
  self.encoder = self.encoder.encoder
 
- def plot_latent_space(self, limit_min=-1., limit_max=1.):
+ def plot_latent_space(self, limit=[-0.5, 0.5]):
  """
  This method plots the responses of all latent neurons to combinations of x,y,z positions.
 
  Args:
- limit_min (float): The minimal value that is being plotted.
- limit_max (float): The maximal value that is being pltoted.
+ limit (list, optional): The minimal and maximal value that are being plotted. Default: [-0.5, 0.5]
  """
  latent_neurons = range(Config.LATENT_SIZE)
  axes_label = ['x', 'y', 'z'] # all axes
@@ -86,24 +85,29 @@ def plot_latent_space(self, limit_min=-1., limit_max=1.):
  # get responses for points
  z = self._get_data_subgridworld(x, y, axes, ignore_axes[i], neuron)
 
- ax = fig.add_subplot(3,3,index, projection='3d')
+ ax = fig.add_subplot(Config.LATENT_SIZE, 3, index, projection='3d')
  surf = ax.plot_surface(x, y, z, cmap=cm.inferno,
- linewidth=0, antialiased=False, vmin=limit_min, vmax=limit_max)
+ linewidth=0, antialiased=False, vmin=limit[0], vmax=limit[1])
 
- ax.set_xlabel(f"{axes_label[axes[0]]}-axis", fontsize=22, labelpad=15)
- ax.set_ylabel(f"{axes_label[axes[1]]}-axis", fontsize=22, labelpad=15)
- ax.set_zlabel(f"Latent variable #{neuron}", fontsize=22, labelpad=15)
- ax.set_zlim(limit_min, limit_max)
+ fs = 22 * 5/Config.LATENT_SIZE
+ ax.set_xlabel(f"{axes_label[axes[0]]}-axis", fontsize=fs, labelpad=15)
+ ax.set_ylabel(f"{axes_label[axes[1]]}-axis", fontsize=fs, labelpad=15)
+ ax.set_zlabel(f"Latent variable #{neuron}", fontsize=fs, labelpad=15)
+ ax.set_zlim(limit[0], limit[1])
 
- plt.tight_layout()
+ plt.tight_layout(pad=1.5)
  plt.savefig('results/latent_space.png')
  plt.show()
 
- def plot_selection_figure(self):
+ def plot_selection_figure(self, plot_neurons=[6, 1, 0], limit=[-0.3, 0.3]):
  """
  Plot for the figure as it appears in the whitepaper. 
+
+ Args:
+ plot_neurons (list): The indices of neurons to be plotted.
+ limit (list): The minimal and maximal value that are being plotted.
  """
- latent_neurons = range(Config.LATENT_SIZE)
+ latent_neurons = plot_neurons # the relevant neurons
  axes_label = ['x', 'y', 'z'] # all axes
  show_axes = [[0,1], [1,2], [0,2]] # combinations of axes to be plotted
  ignore_axes = [2,0,1] # axes which are to be ignored per plot
@@ -127,9 +131,9 @@ def plot_selection_figure(self):
  # get responses for points
  z = self._get_data_subgridworld(x, y, axes, ignore_axes[i], neuron)
  # add plot to figure
- ax = fig.add_subplot(3,3,index, projection='3d')
+ ax = fig.add_subplot(3, 3, index, projection='3d')
  surf = ax.plot_surface(x, y, z, cmap=cm.inferno,
- linewidth=0, antialiased=False, vmin=-1.2, vmax=1.2)
+ linewidth=0, antialiased=False, vmin=limit[0], vmax=limit[1])
 
  # style changes: labels, ticks, limits,...
  ax.set_xlabel(f"{axes_label[axes[0]]}-axis", fontsize=22, labelpad=15)
@@ -145,7 +149,7 @@ def plot_selection_figure(self):
  ax.text2D(0.01, 0.8, 'Latent neuron\nactivation', fontsize=25, transform=plt.gcf().transFigure)
  if index in [4]:
  ax.text2D(0.01, 0.5, 'Latent neuron\nactivation', fontsize=25, transform=plt.gcf().transFigure)
- ax.set_zlim(-1.2, 1.2)
+ ax.set_zlim(limit[0], limit[1])
 
  # create plots of selection noise
  for neuron in latent_neurons:
@@ -154,16 +158,17 @@ def plot_selection_figure(self):
 
  plt.tight_layout()
  plt.subplots_adjust(wspace=0.1, hspace=0.1)
- plt.savefig('results/selection_figure.png')
+ plt.savefig('results/selection_figure.pdf')
  plt.show()
 
- def plot_results_figure(self, avg_mod=200):
+ def plot_results_figure(self, avg_mod=200, limit=[0., 400.]):
  """
  Plots reinforcement learning results for the various agents in the sub-grid world environment.
  Plots the results from one results file.
 
  Args:
- avg_mod (int): The number of elements to be averaged over. Defaults to 200.
+ avg_mod (int, optional): The number of elements to be averaged over. Default: 200.
+ limit (list, optional): The y-limit for the plot. Default: [0., 400.].
  """
  fig = plt.figure("Reinforcement learning results", figsize=(25,6))
 
@@ -173,28 +178,30 @@ def plot_results_figure(self, avg_mod=200):
  plt.rcParams.update(params)
 
  for index, env_id in enumerate(Config.ENV_IDS):
- self._plot_results(fig, index+1, env_id, avg_mod)
+ self._plot_results(fig, index+1, env_id, avg_mod, limit)
  plt.tight_layout()
  plt.savefig('results/results_rl.png')
  plt.show()
 
- def plot_loss_figure(self, avg_mod=100):
+ def plot_loss_figure(self, avg_mod=100, limitDec=[0.,1.], limitAE=[0.,0.01]):
  """
  Plots prediction loss results for the various decoders (and autoencoders) in the sub-grid world environment.
  Plots the results from one loss results file.
 
  Args:
- avg_mod (int): The number of elements to be averaged over. Defaults to 100.
+ avg_mod (int, optional): The number of elements to be averaged over. Default: 100.
+ limitDec (list, optional): The y-limit for the decoders. Default: [0., 1.].
+ limitAE (list, optional): The y-limit for the autoencoder. Default: [0., 0.01].
  """
  fig = plt.figure("Prediction loss", figsize=(25,6))
 
  # style changes
  plt.rc('font', size=20)
  params = {'axes.linewidth': 1.}
  plt.rcParams.update(params)
- self._plot_results_loss(fig, 1, None, 'Autoencoder', avg_mod)
+ self._plot_results_loss(fig, 1, None, 'Autoencoder', avg_mod, limitDec, limitAE)
  for index, env_id in enumerate(Config.ENV_IDS):
- self._plot_results_loss(fig, index+2, env_id, 'Policy', avg_mod)
+ self._plot_results_loss(fig, index+2, env_id, 'Policy', avg_mod, limitDec, limitAE)
  plt.tight_layout()
  plt.savefig('results/results_loss.png')
  plt.show()
@@ -247,20 +254,21 @@ def _get_data_subgridworld(self, x, y, axis_show, axis_ignore, latent_neuron):
 
  return data.numpy()
 
- def _plot_selection(self, fig, selection_neuron, index):
+ def _plot_selection(self, fig, selection_neuron, index, limit=[-10,10]):
  """
  Plots the selection noise in the figure for the whitepaper.
 
  Args:
  fig (plt.figure): The figure in which the plot is to be displayed.
  selection_neuron (int): The index of the selection neuron to be plotted.
  index (int): The index in the figure.
+ limit (list, optional): The y-limit for the figure. Default: [-10, 10].
  """
  # get data points from file
  downsample = 10 # do not take all points from file
  with open('../results_log/selection.txt', 'r') as data:
  results_full = np.array([line.split() for line in data])
- results_id = np.delete(results_full, [1,2,3], 1).flatten()
+ results_id = np.delete(results_full, [i for i in range(1, Config.LATENT_SIZE+1)], 1).flatten()
  results_full = np.delete(results_full, [0], 1)#.flatten()
  data.close()
 
@@ -271,7 +279,7 @@ def _plot_selection(self, fig, selection_neuron, index):
 
  # create plot
  ax = fig.add_subplot(3,3,index)
- for env_id in Config.ENV_IDS:
+ for env_id in Config.ENV_IDS + ['enc']:
  # get lines for specific env ID
  positions = np.where(results_id == env_id+',')
  results = results_full[positions]
@@ -302,15 +310,16 @@ def _plot_selection(self, fig, selection_neuron, index):
  if index in [7]:
  ax.text(0.01, 0.15, 'Selection\nneuron\nactivation', fontsize=25, transform=plt.gcf().transFigure)
  if index in [9]:
- legend = ax.legend(['Decoder 1','Decoder 2', 'Decoder 3'], loc="upper right", bbox_to_anchor=(1.09, 0.55))
+ legend = ax.legend(['Decoder 1','Decoder 2', 'Decoder 3', 'Encoder'], 
+ loc="upper right", bbox_to_anchor=(1.09, 0.58))
  legend.get_frame().set_linewidth(2.0)
  plt.xlabel("Training Episodes", fontsize=25, labelpad=15)
  ax.grid(axis='y')
  ax.yaxis.set_major_locator(plt.MaxNLocator(6))
- plt.ylim(-10,10)
+ plt.ylim(limit[0],limit[1])
 
 
- def _plot_results(self, fig, label, env_id, avg_mod):
+ def _plot_results(self, fig, label, env_id, avg_mod, limit):
  """
  Plots the results for one agent with a specified environment id.
  The plot is compressed by averaging over `avg_mod` number of steps.
@@ -320,6 +329,7 @@ def _plot_results(self, fig, label, env_id, avg_mod):
  label (int): The index of the plot in figure.
  env_id (str): The id of the environment for which the results are to be displayed.
  avg_mod (int): The number of elements to be averaged over.
+ limit (list): The y-limit for the plot.
  """
  ax = fig.add_subplot(1,3,label)
  # get results
@@ -360,9 +370,9 @@ def _plot_results(self, fig, label, env_id, avg_mod):
  plt.setp(ax.get_yticklabels(), visible=False)
  plt.title(f"Agent #{label}")
  plt.grid(True)
- plt.ylim(0,400)
+ plt.ylim(limit[0],limit[1])
 
- def _plot_results_loss(self, fig, label, env_id, loss_type, avg_mod):
+ def _plot_results_loss(self, fig, label, env_id, loss_type, avg_mod, limitDec, limitAE):
  """
  Plots the prediction loss for decoders and autoencoder.
 
@@ -372,6 +382,8 @@ def _plot_results_loss(self, fig, label, env_id, loss_type, avg_mod):
  env_id (str): The id of the environment for which the results are to be displayed.
  loss_type (str): The type of loss coming either from 'Autoencoder' or 'Policy'
  avg_mod (int): The number of elements to be averaged over.
+ limitDec (list): The y-limit for the decoder plot.
+ limitAE (list): The y-limit for the autoencoder plot.
 
  """
  ax = fig.add_subplot(1,4,label)
@@ -423,9 +435,9 @@ def _plot_results_loss(self, fig, label, env_id, loss_type, avg_mod):
  # remove y label
  plt.setp(ax.get_yticklabels(), visible=False)
  plt.title(f"Decoder #{label-1}")
- plt.ylim(0., 0.6)
+ plt.ylim(limitDec[0], limitDec[1])
  if label == 1:
  plt.title(f"Autoencoder")
- plt.ylim(0., 0.002)
+ plt.ylim(limitAE[0], limitAE[1])
  plt.ylabel("BCE loss", labelpad=15)
  plt.grid(True)
diff --git a/reinforced_scinet/config.py b/reinforced_scinet/config.py
@@ -105,7 +105,7 @@ class Config:
 
  INPUT_SIZE = 36 # FROM ENV SPEC: sum(grid_size) 
 
- LATENT_SIZE = 3 # FROM ENV SPEC: minimum number of neurons to reproduce img 
+ LATENT_SIZE = 8 # FROM ENV SPEC: minimum number of neurons to reproduce img 
 
  NUM_ACTIONS = 6 # FROM ENV SPEC: number of available actions 
 
@@ -173,7 +173,9 @@ class Config:
 
  AGENT_PREDICT_DISCOUNT = 1.0 # the discount rate of the agent's loss in 'selection' mode
 
- SELECTION_DISCOUNT = 0.02 # the discount rate of the selection neuron's loss
+ SELECTION_DISCOUNT = 0.04 # the discount rate of the selection neuron's loss
+
+ MIN_DISCOUNT = 0.02 # the discount rate of the representation minimization
 
  REWARD_RESCALE = 10.0 # factor by which the discounted reward is rescaled
 

diff --git a/reinforced_scinet/load_models/ae_select.pth b/reinforced_scinet/load_models/ae_select.pth
diff --git a/reinforced_scinet/memory.py b/reinforced_scinet/memory.py
@@ -64,7 +64,7 @@ def add_event(self, event, flag=True):
 
  Args:
  event (tuple): Tuple consisting of observation, action, h-value and reward.
- flag (bool): Whether or not this event will be used for training. Default is True.
+ flag (bool, optional): Whether or not this event will be used for training. Default: True.
  """
  self.history.append(event)
  self.flags.append(flag)