From e9ab4c1988a262690752eb3b856658b61072348b Mon Sep 17 00:00:00 2001
From: Tyler Morrow <tmorro@sandia.gov>
Date: Thu, 15 Aug 2024 09:03:41 -0600
Subject: [PATCH] Move common imports to top-level; fix model saving bugs.

---
 examples/courses/Primer 1/Primer1.ipynb       |  68 +-
 examples/data/conversion/pcf_to_ss.py         |   3 +-
 examples/data/difficulty_score.py             |   4 +-
 .../data/preprocessing/energy_calibration.py  |   4 +-
 examples/data/synthesis/mix_seeds.py          |   4 +-
 examples/data/synthesis/synthesize_passbys.py |   3 +-
 examples/data/synthesis/synthesize_seeds.py   |   2 +-
 .../synthesis/synthesize_seeds_advanced.py    |   2 +-
 examples/data/synthesis/synthesize_spectra.py |   4 +-
 examples/modeling/anomaly_detection.py        |   4 +-
 examples/modeling/arad.py                     |   6 +-
 examples/modeling/arad_latent_prediction.py   |   6 +-
 examples/modeling/classifier_comparison.py    |   7 +-
 .../modeling/label_proportion_estimation.py   |   6 +-
 .../modeling/neural_network_classifier.py     |   6 +-
 examples/run_examples.py                      |   1 +
 examples/visualization/confusion_matrix.py    |   6 +-
 examples/visualization/distance_matrix.py     |   2 +-
 .../plot_sampleset_compare_to.py              |   4 +-
 examples/visualization/plot_spectra.py        |   2 +-
 riid/__init__.py                              |  12 +-
 riid/data/converters/aipt.py                  |   3 +-
 riid/data/converters/topcoder.py              |   3 +-
 riid/data/synthetic/__init__.py               | 374 +---------
 riid/data/synthetic/base.py                   | 290 ++++++++
 riid/data/synthetic/passby.py                 |   4 +-
 riid/data/synthetic/seed.py                   |  88 ++-
 riid/data/synthetic/static.py                 |   4 +-
 riid/gadras/api.py                            |  12 +-
 riid/metrics.py                               |   2 +-
 riid/models/__init__.py                       | 245 +------
 riid/models/base.py                           | 256 +++++++
 riid/models/bayes.py                          |   4 +-
 riid/models/neural_nets/__init__.py           | 669 +-----------------
 riid/models/neural_nets/arad.py               |   6 +-
 riid/models/neural_nets/basic.py              | 197 ++++++
 riid/models/neural_nets/lpe.py                | 489 +++++++++++++
 riid/visualize.py                             |   2 +-
 tests/anomaly_tests.py                        |   4 +-
 tests/data_tests.py                           |   6 +-
 tests/gadras_tests.py                         |   3 +-
 tests/model_tests.py                          |  15 +-
 tests/sampleset_tests.py                      |   4 +-
 tests/seedmixer_tests.py                      |   4 +-
 tests/staticsynth_tests.py                    |   9 +-
 tests/visualize_tests.py                      |   6 +-
 46 files changed, 1442 insertions(+), 1413 deletions(-)
 create mode 100644 riid/data/synthetic/base.py
 create mode 100644 riid/models/base.py
 create mode 100644 riid/models/neural_nets/basic.py
 create mode 100644 riid/models/neural_nets/lpe.py

diff --git a/examples/courses/Primer 1/Primer1.ipynb b/examples/courses/Primer 1/Primer1.ipynb
index ffe4714d..459e3ef8 100755
--- a/examples/courses/Primer 1/Primer1.ipynb	
+++ b/examples/courses/Primer 1/Primer1.ipynb	
@@ -155,7 +155,7 @@
     "from riid.gadras.api import GADRAS_API_SEEMINGLY_AVAILABLE\n",
     "\n",
     "if GADRAS_API_SEEMINGLY_AVAILABLE:\n",
-    "    from riid.data.synthetic.seed import SeedSynthesizer\n",
+    "    from riid import SeedSynthesizer\n",
     "    seed_syn = SeedSynthesizer()\n",
     "    # The YAML file defining the seed synthesis specification is ultimately parsed into a dictionary.\n",
     "    # You can also load it yourself and pass in the dictionary instead - this is useful for varying detector parameters!\n",
@@ -163,7 +163,7 @@
     "else:\n",
     "    # If you don't have Windows with GADRAS installed, this will use the dummy seeds below which are not actual gamma spectra.\n",
     "    # Another option would be to load a seeds file obtained elsewhere.\n",
-    "    from riid.data.synthetic import get_dummy_seeds\n",
+    "    from riid import get_dummy_seeds\n",
     "    seeds_ss = get_dummy_seeds()"
    ]
   },
@@ -251,7 +251,7 @@
    "outputs": [],
    "source": [
     "\"\"\"Seed mixing\"\"\"\n",
-    "from riid.data.synthetic.seed import SeedMixer\n",
+    "from riid import SeedMixer\n",
     "\n",
     "mixed_bg_seeds_ss = SeedMixer(\n",
     "    bg_seeds_ss,\n",
@@ -278,7 +278,7 @@
    "outputs": [],
    "source": [
     "\"\"\"Combining SampleSets\"\"\"\n",
-    "from riid.data.sampleset import SampleSet\n",
+    "from riid import SampleSet\n",
     "\n",
     "combined_ss = SampleSet()\n",
     "combined_ss.concat([fg_seeds_ss, mixed_bg_seeds_ss])\n",
@@ -318,14 +318,14 @@
    "outputs": [],
    "source": [
     "\"\"\"Static Synthesis\"\"\"\n",
-    "from riid.data.synthetic.static import StaticSynthesizer\n",
+    "from riid import StaticSynthesizer\n",
     "\n",
     "static_syn = StaticSynthesizer(\n",
     "    samples_per_seed=100,\n",
     "    bg_cps=300,\n",
     "    live_time_function=\"uniform\",\n",
     "    live_time_function_args=(0.25, 8),\n",
-    "    snr_function=\"uniform\",\n",
+    "    snr_function=\"log10\",\n",
     "    snr_function_args=(0.1, 100),\n",
     "    apply_poisson_noise=True,\n",
     "    return_fg=True,\n",
@@ -348,8 +348,7 @@
    "outputs": [],
    "source": [
     "\"\"\"Normalization\"\"\"\n",
-    "gross_ss.normalize()\n",
-    "bg_ss.normalize()"
+    "fg_ss.normalize()"
    ]
   },
   {
@@ -371,16 +370,10 @@
    "outputs": [],
    "source": [
     "\"\"\"Model fitting\"\"\"\n",
-    "from riid.models.neural_nets import MLPClassifier\n",
-    "from riid.metrics import single_f1\n",
+    "from riid.models import MLPClassifier\n",
     "\n",
-    "model = MLPClassifier(\n",
-    "    hidden_layers=(256,),\n",
-    "    learning_rate=4e-3,\n",
-    "    metrics=[single_f1]\n",
-    ")\n",
-    "\n",
-    "history = model.fit(gross_ss, bg_ss, epochs=25, patience=5, verbose=True)"
+    "model = MLPClassifier()\n",
+    "history = model.fit(fg_ss, epochs=10, verbose=True)"
    ]
   },
   {
@@ -402,12 +395,9 @@
    "outputs": [],
    "source": [
     "\"\"\"Generate some in-distribution data the model has not seen.\"\"\"\n",
-    "test_bg_ss, test_gross_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss)\n",
-    "test_bg_ss.normalize()\n",
-    "test_gross_ss.normalize()\n",
-    "# Adjust ground truth\n",
-    "#test_gross_ss.sources.drop(test_bg_ss.sources.columns, axis=1, inplace=True)\n",
-    "#test_gross_ss.normalize_sources()"
+    "test_fg_ss, test_gross_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss)\n",
+    "test_fg_ss.normalize()\n",
+    "test_gross_ss.normalize()"
    ]
   },
   {
@@ -417,7 +407,7 @@
    "outputs": [],
    "source": [
     "\"\"\"Use the model!\"\"\"\n",
-    "model.predict(test_gross_ss, test_bg_ss)  # Saved in your SampleSet containing non-background sources (the gross spectra)"
+    "model.predict(test_fg_ss)  # Results are saved in the SampleSet's prediction_probas DataFrame"
    ]
   },
   {
@@ -429,8 +419,8 @@
     "\"\"\"Calculate performance metric\"\"\"\n",
     "from sklearn.metrics import f1_score\n",
     "\n",
-    "labels = test_gross_ss.get_labels()\n",
-    "predictions = test_gross_ss.get_predictions()\n",
+    "labels = test_fg_ss.get_labels()\n",
+    "predictions = test_fg_ss.get_predictions()\n",
     "f1_score(labels, predictions, average=\"micro\")"
    ]
   },
@@ -443,7 +433,7 @@
     "\"\"\"Confusion Matrix\"\"\"\n",
     "from riid.visualize import confusion_matrix\n",
     "\n",
-    "_ = confusion_matrix(test_gross_ss)"
+    "_ = confusion_matrix(test_fg_ss)"
    ]
   },
   {
@@ -455,7 +445,7 @@
     "\"\"\"SNR vs. Model Score\"\"\"\n",
     "from riid.visualize import plot_snr_vs_score\n",
     "\n",
-    "_ = plot_snr_vs_score(test_gross_ss, xscale=\"log\")"
+    "_ = plot_snr_vs_score(test_fg_ss, xscale=\"log\")"
    ]
   },
   {
@@ -465,13 +455,23 @@
    "outputs": [],
    "source": [
     "\"\"\"Save model\"\"\"\n",
-    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "\n",
+    "def _delete_if_exists(path: Path):\n",
+    "    if path.exists():\n",
+    "        path.unlink()\n",
     "\n",
-    "model_path = \"./model.h5\"\n",
-    "if os.path.exists(model_path):\n",
-    "    os.remove(model_path)\n",
+    "model_path_json = Path(\"./model.json\")\n",
+    "model_path_tflite = model_path_json.with_suffix(\".tflite\")\n",
+    "model_path_onnx = model_path_json.with_suffix(\".onnx\")\n",
+    "_delete_if_exists(model_path_json)\n",
+    "_delete_if_exists(model_path_tflite)\n",
+    "_delete_if_exists(model_path_onnx)\n",
     "\n",
-    "model.save(model_path)"
+    "model.save(str(model_path_json))\n",
+    "model.to_tflite(str(model_path_tflite))\n",
+    "model.to_onnx(str(model_path_onnx))"
    ]
   },
   {
@@ -571,7 +571,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.12.4"
   },
   "orig_nbformat": 4,
   "vscode": {
diff --git a/examples/data/conversion/pcf_to_ss.py b/examples/data/conversion/pcf_to_ss.py
index 0a5b6d65..e94e0148 100644
--- a/examples/data/conversion/pcf_to_ss.py
+++ b/examples/data/conversion/pcf_to_ss.py
@@ -12,10 +12,9 @@
 import os
 from pathlib import Path
 
-from riid import SAMPLESET_HDF_FILE_EXTENSION
+from riid import SAMPLESET_HDF_FILE_EXTENSION, read_pcf
 from riid.data.converters import (_validate_and_create_output_dir,
                                   convert_directory)
-from riid.data.sampleset import read_pcf
 
 
 def convert_and_save(input_file_path: str, output_dir: str = None,
diff --git a/examples/data/difficulty_score.py b/examples/data/difficulty_score.py
index 103d9e1b..6f838ea6 100644
--- a/examples/data/difficulty_score.py
+++ b/examples/data/difficulty_score.py
@@ -2,9 +2,7 @@
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
 """This example demonstrates how to compute the difficulty of a given SampleSet."""
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 
 fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg()
 mixed_bg_seed_ss = SeedMixer(bg_seeds_ss, mixture_size=3)\
diff --git a/examples/data/preprocessing/energy_calibration.py b/examples/data/preprocessing/energy_calibration.py
index 25061fea..03cefc28 100644
--- a/examples/data/preprocessing/energy_calibration.py
+++ b/examples/data/preprocessing/energy_calibration.py
@@ -8,9 +8,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 
 SYNTHETIC_DATA_CONFIG = {
     "samples_per_seed": 10,
diff --git a/examples/data/synthesis/mix_seeds.py b/examples/data/synthesis/mix_seeds.py
index 911fc56d..a2a7bbac 100644
--- a/examples/data/synthesis/mix_seeds.py
+++ b/examples/data/synthesis/mix_seeds.py
@@ -3,8 +3,8 @@
 # the U.S. Government retains certain rights in this software.
 """This example demonstrates how to generate synthetic gamma spectra from seeds."""
 import numpy as np
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
+
+from riid import SeedMixer, get_dummy_seeds
 
 fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg()
 
diff --git a/examples/data/synthesis/synthesize_passbys.py b/examples/data/synthesis/synthesize_passbys.py
index 4c9ca023..7b2bbb3e 100644
--- a/examples/data/synthesis/synthesize_passbys.py
+++ b/examples/data/synthesis/synthesize_passbys.py
@@ -7,8 +7,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.passby import PassbySynthesizer
+from riid import PassbySynthesizer, get_dummy_seeds
 
 if len(sys.argv) == 2:
     import matplotlib
diff --git a/examples/data/synthesis/synthesize_seeds.py b/examples/data/synthesis/synthesize_seeds.py
index ab837dca..a33eabfc 100644
--- a/examples/data/synthesis/synthesize_seeds.py
+++ b/examples/data/synthesis/synthesize_seeds.py
@@ -4,7 +4,7 @@
 """This example demonstrates how to generate synthetic seeds from GADRAS."""
 import yaml
 
-from riid.data.synthetic.seed import SeedSynthesizer
+from riid import SeedSynthesizer
 
 seed_synth_config = """
 ---
diff --git a/examples/data/synthesis/synthesize_seeds_advanced.py b/examples/data/synthesis/synthesize_seeds_advanced.py
index 390ac72b..d259b242 100644
--- a/examples/data/synthesis/synthesize_seeds_advanced.py
+++ b/examples/data/synthesis/synthesize_seeds_advanced.py
@@ -5,7 +5,7 @@
 configuration expansion features."""
 import yaml
 
-from riid.data.synthetic.seed import SeedSynthesizer
+from riid import SeedSynthesizer
 
 seed_synth_config = """
 ---
diff --git a/examples/data/synthesis/synthesize_spectra.py b/examples/data/synthesis/synthesize_spectra.py
index 95530244..ce150123 100644
--- a/examples/data/synthesis/synthesize_spectra.py
+++ b/examples/data/synthesis/synthesize_spectra.py
@@ -2,9 +2,7 @@
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
 """This example demonstrates how to generate synthetic gamma spectra from seeds."""
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 
 SYNTHETIC_DATA_CONFIG = {
     "samples_per_seed": 10000,
diff --git a/examples/modeling/anomaly_detection.py b/examples/modeling/anomaly_detection.py
index e3b27a13..503470e2 100644
--- a/examples/modeling/anomaly_detection.py
+++ b/examples/modeling/anomaly_detection.py
@@ -10,10 +10,8 @@
 import numpy as np
 from matplotlib import cm
 
+from riid import PassbySynthesizer, SeedMixer, get_dummy_seeds
 from riid.anomaly import PoissonNChannelEventDetector
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.passby import PassbySynthesizer
-from riid.data.synthetic.seed import SeedMixer
 
 if len(sys.argv) == 2:
     import matplotlib
diff --git a/examples/modeling/arad.py b/examples/modeling/arad.py
index f15d2e4e..120b0d07 100644
--- a/examples/modeling/arad.py
+++ b/examples/modeling/arad.py
@@ -6,10 +6,8 @@
 import numpy as np
 import pandas as pd
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.neural_nets.arad import ARADv1, ARADv2
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
+from riid.models import ARADv1, ARADv2
 
 # Config
 rng = np.random.default_rng(42)
diff --git a/examples/modeling/arad_latent_prediction.py b/examples/modeling/arad_latent_prediction.py
index 9b233954..4d3b199b 100644
--- a/examples/modeling/arad_latent_prediction.py
+++ b/examples/modeling/arad_latent_prediction.py
@@ -8,10 +8,8 @@
 from keras.api.metrics import Accuracy, CategoricalCrossentropy
 from sklearn.metrics import f1_score, mean_squared_error
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv2
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
+from riid.models import ARADLatentPredictor, ARADv2
 
 # Config
 rng = np.random.default_rng(42)
diff --git a/examples/modeling/classifier_comparison.py b/examples/modeling/classifier_comparison.py
index 4fce43e7..d51af336 100644
--- a/examples/modeling/classifier_comparison.py
+++ b/examples/modeling/classifier_comparison.py
@@ -7,12 +7,9 @@
 import matplotlib.pyplot as plt
 from sklearn.metrics import f1_score
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.bayes import PoissonBayesClassifier
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 from riid.metrics import precision_recall_curve
-from riid.models.neural_nets import MLPClassifier
+from riid.models import MLPClassifier, PoissonBayesClassifier
 from riid.visualize import plot_precision_recall
 
 if len(sys.argv) == 2:
diff --git a/examples/modeling/label_proportion_estimation.py b/examples/modeling/label_proportion_estimation.py
index b4e5e4c4..1cfe30c7 100644
--- a/examples/modeling/label_proportion_estimation.py
+++ b/examples/modeling/label_proportion_estimation.py
@@ -4,10 +4,8 @@
 
 from sklearn.metrics import mean_absolute_error
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.neural_nets import LabelProportionEstimator
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
+from riid.models import LabelProportionEstimator
 
 # Generate some mixture training data.
 fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg()
diff --git a/examples/modeling/neural_network_classifier.py b/examples/modeling/neural_network_classifier.py
index 277a2dc5..831230c2 100644
--- a/examples/modeling/neural_network_classifier.py
+++ b/examples/modeling/neural_network_classifier.py
@@ -5,10 +5,8 @@
 import numpy as np
 from sklearn.metrics import f1_score
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.neural_nets import MLPClassifier
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
+from riid.models import MLPClassifier
 
 # Generate some training data
 fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg()
diff --git a/examples/run_examples.py b/examples/run_examples.py
index aaa29c0f..1424e7de 100644
--- a/examples/run_examples.py
+++ b/examples/run_examples.py
@@ -6,6 +6,7 @@
 import subprocess
 import sys
 from pathlib import Path
+
 import pandas as pd
 from tabulate import tabulate
 
diff --git a/examples/visualization/confusion_matrix.py b/examples/visualization/confusion_matrix.py
index b2e25e72..27dd2705 100644
--- a/examples/visualization/confusion_matrix.py
+++ b/examples/visualization/confusion_matrix.py
@@ -4,10 +4,8 @@
 """This example demonstrates how to obtain confusion matrices."""
 import sys
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models.neural_nets import MLPClassifier
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
+from riid.models import MLPClassifier
 from riid.visualize import confusion_matrix
 
 if len(sys.argv) == 2:
diff --git a/examples/visualization/distance_matrix.py b/examples/visualization/distance_matrix.py
index 5f229f25..49e5af33 100644
--- a/examples/visualization/distance_matrix.py
+++ b/examples/visualization/distance_matrix.py
@@ -9,7 +9,7 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 
-from riid.data.synthetic import get_dummy_seeds
+from riid import get_dummy_seeds
 
 if len(sys.argv) == 2:
     import matplotlib
diff --git a/examples/visualization/plot_sampleset_compare_to.py b/examples/visualization/plot_sampleset_compare_to.py
index 9ccf94bb..5a99f05d 100644
--- a/examples/visualization/plot_sampleset_compare_to.py
+++ b/examples/visualization/plot_sampleset_compare_to.py
@@ -4,9 +4,7 @@
 """This example demonstrates how to compare sample sets."""
 import sys
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 from riid.visualize import plot_ss_comparison
 
 if len(sys.argv) == 2:
diff --git a/examples/visualization/plot_spectra.py b/examples/visualization/plot_spectra.py
index d0f1e1e7..04f71bd1 100644
--- a/examples/visualization/plot_spectra.py
+++ b/examples/visualization/plot_spectra.py
@@ -4,7 +4,7 @@
 """This example demonstrates how to plot gamma spectra."""
 import sys
 
-from riid.data.synthetic import get_dummy_seeds
+from riid import get_dummy_seeds
 from riid.visualize import plot_spectra
 
 if len(sys.argv) == 2:
diff --git a/riid/__init__.py b/riid/__init__.py
index d19e9880..44fe4e4b 100644
--- a/riid/__init__.py
+++ b/riid/__init__.py
@@ -7,9 +7,15 @@
 import logging
 import os
 import sys
-
 from importlib.metadata import version
 
+from riid.data.sampleset import (SampleSet, SpectraState, SpectraType,
+                                 read_hdf, read_json, read_pcf)
+from riid.data.synthetic.passby import PassbySynthesizer
+from riid.data.synthetic.seed import (SeedMixer, SeedSynthesizer,
+                                      get_dummy_seeds)
+from riid.data.synthetic.static import StaticSynthesizer
+
 HANDLER = logging.StreamHandler(sys.stdout)
 logging.root.addHandler(HANDLER)
 logging.root.setLevel(logging.DEBUG)
@@ -31,3 +37,7 @@
     "riid.data.synthetic.passby.PassbySynthesizer._generate_single_passby": True,
     "riid.data.sampleset.SampleSet._channels_to_energies": True,
 }
+
+__all__ = ["SampleSet", "SpectraState", "SpectraType",
+           "read_hdf", "read_json", "read_pcf", "get_dummy_seeds",
+           "PassbySynthesizer", "SeedSynthesizer", "StaticSynthesizer", "SeedMixer"]
diff --git a/riid/data/converters/aipt.py b/riid/data/converters/aipt.py
index d890a440..06f7c91a 100644
--- a/riid/data/converters/aipt.py
+++ b/riid/data/converters/aipt.py
@@ -10,9 +10,8 @@
 
 import pandas as pd
 
-from riid import SAMPLESET_HDF_FILE_EXTENSION
+from riid import SAMPLESET_HDF_FILE_EXTENSION, SampleSet
 from riid.data.converters import _validate_and_create_output_dir
-from riid.data.sampleset import SampleSet
 
 ELEMENT_IDS_PER_FILE = [0, 1, 2, 3]
 DEFAULT_ECAL = [
diff --git a/riid/data/converters/topcoder.py b/riid/data/converters/topcoder.py
index e75b07a7..fffd8ada 100644
--- a/riid/data/converters/topcoder.py
+++ b/riid/data/converters/topcoder.py
@@ -13,10 +13,9 @@
 import numpy as np
 import pandas as pd
 
-from riid import SAMPLESET_HDF_FILE_EXTENSION
+from riid import SAMPLESET_HDF_FILE_EXTENSION, SampleSet
 from riid.data.converters import _validate_and_create_output_dir
 from riid.data.labeling import label_to_index_element
-from riid.data.sampleset import SampleSet
 
 SOURCE_ID_TO_LABEL = {
     0: "Background",
diff --git a/riid/data/synthetic/__init__.py b/riid/data/synthetic/__init__.py
index 015c491d..bc07729c 100644
--- a/riid/data/synthetic/__init__.py
+++ b/riid/data/synthetic/__init__.py
@@ -1,373 +1,9 @@
 # Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
-"""This modules contains utilities for synthesizing gamma spectra."""
-from collections import Counter
-from typing import Any
+"""This module contains utilities for synthesizing gamma spectra."""
+# The following imports are left to not break previous imports; remove in v3
+from riid.data.synthetic.base import Synthesizer, get_distribution_values
+from riid.data.synthetic.seed import get_dummy_seeds
 
-import numpy as np
-import pandas as pd
-from numpy.random import Generator
-
-from riid.data import get_expected_spectra
-from riid.data.sampleset import (SampleSet, SpectraState, SpectraType,
-                                 _get_utc_timestamp)
-
-
-class Synthesizer():
-    """Base class for synthesizers."""
-
-    SYNTHETIC_STR = "synthetic"
-    SUPPORTED_SAMPLING_FUNCTIONS = ["uniform", "log10", "discrete", "list"]
-
-    def __init__(self, bg_cps: float = 300.0, long_bg_live_time: float = 120.0,
-                 apply_poisson_noise: bool = True,
-                 normalize_sources: bool = True,
-                 return_fg: bool = True,
-                 return_gross: bool = False,
-                 rng: Generator = np.random.default_rng()):
-        """
-        Args:
-            bg_cps: constant rate of gammas from background
-            long_bg_live_time: live time on which to base background subtractions
-            apply_poisson_noise: whether to apply Poisson noise to spectra
-            normalize_sources: whether to normalize ground truth proportions to sum to 1
-            return_fg: whether to compute and return background subtracted spectra
-            return_gross: whether to return gross spectra (always computed)
-            rng: NumPy random number generator, useful for experiment repeatability
-        """
-        self.bg_cps = bg_cps
-        self.long_bg_live_time = long_bg_live_time
-        self.apply_poisson_noise = apply_poisson_noise
-        self.normalize_sources = normalize_sources
-        self.return_fg = return_fg
-        self.return_gross = return_gross
-        self._rng = rng
-        self._synthesis_start_dt = None
-        self._n_samples_synthesized = 0
-
-    def __str__(self):
-        output = "SynthesizerConfig"
-        for k, v in sorted(vars(self).items()):
-            output += "  {}: {}".format(k, str(v))
-        return output
-
-    def _reset_progress(self):
-        self._n_samples_synthesized = 0
-        self._synthesis_start_dt = _get_utc_timestamp()
-
-    def _report_progress(self, n_samples_expected, batch_name):
-        percent_complete = 100 * self._n_samples_synthesized / n_samples_expected
-        msg = (
-            f"Synthesizing ... {percent_complete:.0f}% "
-            f"(currently on {batch_name}"
-        )
-        MAX_MSG_LEN = 80
-        msg = (msg[:MAX_MSG_LEN] + "...") if len(msg) > MAX_MSG_LEN else msg
-        msg += ")"
-        print("\033[K" + msg, end="\r")
-
-    def _report_completion(self, delay):
-        summary = (
-            f"Synthesis complete!\n"
-            f"Generated {self._n_samples_synthesized} samples in ~{delay:.2f}s "
-            f"(~{(self._n_samples_synthesized / delay):.2f} samples/sec)."
-        )
-        print("\033[K" + summary)
-
-    def _verify_n_samples_synthesized(self, actual: int, expected: int):
-        assert expected == actual, (
-            f"{actual} generated, but {expected} were expected. "
-            "Be sure to remove any columns from your seeds' sources DataFrame that "
-            "contain all zeroes.")
-
-    def _get_batch(self, fg_seed, fg_sources, bg_seed, bg_sources, ecal,
-                   lt_targets, snr_targets, rt_targets=None, distance_cm=None):
-        if not (self.return_fg or self.return_gross):
-            raise ValueError("Computing to return nothing.")
-
-        bg_counts_expected = lt_targets * self.bg_cps
-        fg_counts_expected = snr_targets * np.sqrt(bg_counts_expected)
-
-        fg_spectra = get_expected_spectra(fg_seed.values, fg_counts_expected)
-        bg_spectra = get_expected_spectra(bg_seed.values, bg_counts_expected)
-
-        long_bg_counts_expected = self.long_bg_live_time * self.bg_cps
-        long_bg_spectrum_expected = bg_seed.values * long_bg_counts_expected
-
-        gross_spectra = None
-        long_bg_spectra = None
-        fg_counts = 0
-        bg_counts = 0
-        long_bg_counts = 0
-        fg_ss = None
-        gross_ss = None
-
-        # Spectra
-        if self.apply_poisson_noise:
-            gross_spectra = self._rng.poisson(fg_spectra + bg_spectra)
-            if self.return_fg:
-                long_bg_spectrum = self._rng.poisson(long_bg_spectrum_expected)
-                long_bg_seed = long_bg_spectrum / long_bg_spectrum.sum()
-                long_bg_spectra = get_expected_spectra(long_bg_seed, bg_counts_expected)
-                fg_spectra = gross_spectra - long_bg_spectra
-        else:
-            gross_spectra = fg_spectra + bg_spectra
-            if self.return_fg:
-                long_bg_spectra = bg_spectra
-                fg_spectra = gross_spectra - long_bg_spectra
-
-        # Counts
-        fg_counts = fg_spectra.sum(axis=1, dtype=float)
-        if self.return_fg:
-            long_bg_counts = long_bg_spectra.sum(axis=1, dtype=float)
-        if self.return_gross:
-            bg_counts = bg_spectra.sum(axis=1, dtype=float)
-
-        # Sample sets
-        if self.return_fg:
-            snrs = fg_counts / np.sqrt(long_bg_counts.clip(1))
-            fg_ss = get_fg_sample_set(fg_spectra, fg_sources, ecal, lt_targets,
-                                      snrs=snrs, total_counts=fg_counts,
-                                      real_times=rt_targets, distance_cm=distance_cm,
-                                      timestamps=self._synthesis_start_dt)
-            self._n_samples_synthesized += fg_ss.n_samples
-        if self.return_gross:
-            tiled_fg_sources = _tile_sources_and_scale(
-                fg_sources,
-                gross_spectra.shape[0],
-                fg_counts,
-            )
-            tiled_bg_sources = _tile_sources_and_scale(
-                bg_sources,
-                gross_spectra.shape[0],
-                bg_counts,
-            )
-            gross_sources = get_merged_sources_samplewise(tiled_fg_sources, tiled_bg_sources)
-            gross_counts = gross_spectra.sum(axis=1)
-            snrs = fg_counts / np.sqrt(bg_counts.clip(1))
-            gross_ss = get_gross_sample_set(gross_spectra, gross_sources, ecal,
-                                            lt_targets, snrs, gross_counts,
-                                            real_times=rt_targets, distance_cm=distance_cm,
-                                            timestamps=self._synthesis_start_dt)
-            self._n_samples_synthesized += gross_ss.n_samples
-
-        return fg_ss, gross_ss
-
-
-def get_sample_set(spectra, sources, ecal, live_times, snrs, total_counts=None,
-                   real_times=None, distance_cm=None, timestamps=None,
-                   descriptions=None) -> SampleSet:
-    n_samples = spectra.shape[0]
-
-    ss = SampleSet()
-    ss.spectra_state = SpectraState.Counts
-    ss.spectra = pd.DataFrame(spectra)
-    ss.sources = sources
-    ss.info.description = np.full(n_samples, "")  # Ensures the length of info equal n_samples
-    if descriptions:
-        ss.info.description = descriptions
-    ss.info.snr = snrs
-    ss.info.timestamp = timestamps
-    ss.info.total_counts = total_counts if total_counts is not None else spectra.sum(axis=1)
-    ss.info.ecal_order_0 = ecal[0]
-    ss.info.ecal_order_1 = ecal[1]
-    ss.info.ecal_order_2 = ecal[2]
-    ss.info.ecal_order_3 = ecal[3]
-    ss.info.ecal_low_e = ecal[4]
-    ss.info.live_time = live_times
-    ss.info.real_time = real_times if real_times is not None else live_times
-    ss.info.distance_cm = distance_cm
-    ss.info.occupancy_flag = 0
-    ss.info.tag = " "  # TODO: test if this can be empty string
-
-    return ss
-
-
-def _tile_sources_and_scale(sources, n_samples, scalars) -> pd.DataFrame:
-    tiled_sources = pd.DataFrame(
-        np.tile(sources.values, (n_samples, 1)),
-        columns=sources.index
-    )
-    # Multiplying normalized source values by spectrum counts.
-    # This is REQUIRED for properly merging sources DataFrames later when synthesizing
-    # multiple isotopes.
-    tiled_sources = tiled_sources.multiply(scalars, axis="index")
-    return tiled_sources
-
-
-def get_fg_sample_set(spectra, sources, ecal, live_times, snrs, total_counts,
-                      real_times=None, distance_cm=None, timestamps=None,
-                      descriptions=None) -> SampleSet:
-    tiled_sources = _tile_sources_and_scale(
-        sources,
-        spectra.shape[0],
-        spectra.sum(axis=1)
-    )
-    ss = get_sample_set(
-        spectra=spectra,
-        sources=tiled_sources,
-        ecal=ecal,
-        live_times=live_times,
-        snrs=snrs,
-        total_counts=total_counts,
-        real_times=real_times,
-        distance_cm=distance_cm,
-        timestamps=timestamps,
-        descriptions=descriptions
-    )
-    ss.spectra_type = SpectraType.Foreground
-    return ss
-
-
-def get_gross_sample_set(spectra, sources, ecal, live_times, snrs, total_counts,
-                         real_times=None, distance_cm=None, timestamps=None,
-                         descriptions=None) -> SampleSet:
-    ss = get_sample_set(
-        spectra=spectra,
-        sources=sources,
-        ecal=ecal,
-        live_times=live_times,
-        snrs=snrs,
-        total_counts=total_counts,
-        real_times=real_times,
-        distance_cm=distance_cm,
-        timestamps=timestamps,
-        descriptions=descriptions
-    )
-    ss.spectra_type = SpectraType.Gross
-    return ss
-
-
-def get_distribution_values(function: str, function_args: Any, n_values: int,
-                            rng: Generator = np.random.default_rng()):
-    """Randomly sample a list of values based one of many distributions.
-
-    Args:
-        function: name of the distribution function
-        function_args: argument or collection of arguments to be
-            passed to the function, if any.
-        n_values: size of the distribution
-        rng: NumPy random number generator, useful for experiment repeatability
-
-    Returns:
-        Value or collection of sampled values
-
-    Raises:
-        `ValueError` when an unsupported function type is provided
-    """
-    values = None
-    if function == "uniform":
-        values = rng.uniform(*function_args, size=n_values)
-    elif function == "log10":
-        log10_args = tuple(map(np.log10, function_args))
-        values = np.power(10, rng.uniform(*log10_args, size=n_values))
-    elif function == "discrete":
-        values = rng.choice(function_args, size=n_values)
-    elif function == "list":
-        values = np.array(function_args)
-    else:
-        raise ValueError(f"{function} function not supported for sampling.")
-
-    return values
-
-
-def get_merged_sources_samplewise(sources1: pd.DataFrame, sources2: pd.DataFrame) -> pd.DataFrame:
-    merged_sources_df = sources1.add(sources2, axis=1, fill_value=0)
-    return merged_sources_df
-
-
-def get_samples_per_seed(columns: pd.MultiIndex, min_samples_per_seed: int, balance_level: int):
-    level_values = columns.get_level_values(level=balance_level)
-    level_value_to_n_seeds = Counter(level_values)
-    unique_level_values = list(level_value_to_n_seeds.keys())
-    occurences = np.array(list(level_value_to_n_seeds.values()))
-    max_samples_per_level_value = occurences.max() * min_samples_per_seed
-    samples_per_level_value = np.ceil(max_samples_per_level_value / occurences).astype(int)
-    lv_to_samples_per_seed = {k: v for (k, v) in zip(unique_level_values, samples_per_level_value)}
-    total_samples_expected = sum([x * y for x, y in zip(occurences, samples_per_level_value)])
-
-    return lv_to_samples_per_seed, total_samples_expected
-
-
-def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0,
-                    count_rate: float = 1000.0, normalize: bool = True,
-                    rng: Generator = np.random.default_rng()) -> SampleSet:
-    """Get a random, dummy `SampleSet` of ideal seeds.
-
-    WARNING: the spectra returned by this function each contain one gaussian peak that does
-    not overlap with the peaks of other spectra.  Such data is about as *ideal* as one
-    could hope to be working with and does not represent anything real.
-    Therefore, **do not** use this data for any purpose other than testing, debugging, or
-    examples where code, not results, is being demonstrated. Any use in scientific studies
-    does not make sense.
-
-    Args:
-        n_channels: number of channels in the spectra DataFrame
-        live_time: collection time on which to base seeds
-            (higher creates a less noisy shape)
-        count_rate: count rate on which to base seeds
-            (higher creates a less noisy shape)
-        normalize: whether to apply an L1-norm to the spectra
-        rng: NumPy random number generator, useful for experiment repeatability
-
-    Returns:
-        `SampleSet` with randomly generated spectra
-    """
-    ss = SampleSet()
-    ss.measured_or_synthetic = "synthetic"
-    ss.spectra_state = SpectraState.Counts
-    ss.spectra_type = SpectraType.BackgroundForeground
-    ss.synthesis_info = {
-        "subtract_background": True,
-    }
-    sources = [
-        ("Industrial",  "Am241",    "Unshielded Am241"),
-        ("Industrial",  "Ba133",    "Unshielded Ba133"),
-        ("NORM",        "K40",      "PotassiumInSoil"),
-        ("NORM",        "K40",      "Moderately Shielded K40"),
-        ("NORM",        "Ra226",    "UraniumInSoil"),
-        ("NORM",        "Th232",    "ThoriumInSoil"),
-        ("SNM",         "U238",     "Unshielded U238"),
-        ("SNM",         "Pu239",    "Unshielded Pu239"),
-        ("SNM",         "Pu239",    "Moderately Shielded Pu239"),
-        ("SNM",         "Pu239",    "Heavily Shielded Pu239"),
-    ]
-    n_sources = len(sources)
-    n_fg_sources = n_sources
-    sources_cols = pd.MultiIndex.from_tuples(
-        sources,
-        names=SampleSet.SOURCES_MULTI_INDEX_NAMES
-    )
-    sources_data = np.identity(n_sources)
-    ss.sources = pd.DataFrame(data=sources_data, columns=sources_cols)
-
-    histograms = []
-    N_FG_COUNTS = int(count_rate * live_time)
-    fg_std = np.sqrt(n_channels / n_sources)
-    channels_per_sources = n_channels / n_fg_sources
-    for i in range(n_fg_sources):
-        mu = i * channels_per_sources + channels_per_sources / 2
-        counts = rng.normal(mu, fg_std, size=N_FG_COUNTS)
-        fg_histogram, _ = np.histogram(counts, bins=n_channels, range=(0, n_channels))
-        histograms.append(fg_histogram)
-    histograms = np.array(histograms)
-
-    ss.spectra = pd.DataFrame(data=histograms)
-
-    ss.info.total_counts = ss.spectra.sum(axis=1)
-    ss.info.live_time = live_time
-    ss.info.real_time = live_time
-    ss.info.snr = None
-    ss.info.ecal_order_0 = 0
-    ss.info.ecal_order_1 = 3000
-    ss.info.ecal_order_2 = 100
-    ss.info.ecal_order_3 = 0
-    ss.info.ecal_low_e = 0
-    ss.info.description = ""
-    ss.update_timestamp()
-
-    if normalize:
-        ss.normalize()
-
-    return ss
+__all__ = ["get_dummy_seeds", "Synthesizer", "get_distribution_values"]
diff --git a/riid/data/synthetic/base.py b/riid/data/synthetic/base.py
new file mode 100644
index 00000000..9209c7f3
--- /dev/null
+++ b/riid/data/synthetic/base.py
@@ -0,0 +1,290 @@
+# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+# Under the terms of Contract DE-NA0003525 with NTESS,
+# the U.S. Government retains certain rights in this software.
+"""This module contains utilities for synthesizing gamma spectra."""
+from collections import Counter
+from typing import Any
+
+import numpy as np
+import pandas as pd
+from numpy.random import Generator
+
+from riid.data import get_expected_spectra
+from riid.data.sampleset import (SampleSet, SpectraState, SpectraType,
+                                 _get_utc_timestamp)
+
+
+class Synthesizer():
+    """Base class for synthesizers."""
+
+    SYNTHETIC_STR = "synthetic"
+    SUPPORTED_SAMPLING_FUNCTIONS = ["uniform", "log10", "discrete", "list"]
+
+    def __init__(self, bg_cps: float = 300.0, long_bg_live_time: float = 120.0,
+                 apply_poisson_noise: bool = True,
+                 normalize_sources: bool = True,
+                 return_fg: bool = True,
+                 return_gross: bool = False,
+                 rng: Generator = np.random.default_rng()):
+        """
+        Args:
+            bg_cps: constant rate of gammas from background
+            long_bg_live_time: live time on which to base background subtractions
+            apply_poisson_noise: whether to apply Poisson noise to spectra
+            normalize_sources: whether to normalize ground truth proportions to sum to 1
+            return_fg: whether to compute and return background subtracted spectra
+            return_gross: whether to return gross spectra (always computed)
+            rng: NumPy random number generator, useful for experiment repeatability
+        """
+        self.bg_cps = bg_cps
+        self.long_bg_live_time = long_bg_live_time
+        self.apply_poisson_noise = apply_poisson_noise
+        self.normalize_sources = normalize_sources
+        self.return_fg = return_fg
+        self.return_gross = return_gross
+        self._rng = rng
+        self._synthesis_start_dt = None
+        self._n_samples_synthesized = 0
+
+    def __str__(self):
+        output = "SynthesizerConfig"
+        for k, v in sorted(vars(self).items()):
+            output += "  {}: {}".format(k, str(v))
+        return output
+
+    def _reset_progress(self):
+        self._n_samples_synthesized = 0
+        self._synthesis_start_dt = _get_utc_timestamp()
+
+    def _report_progress(self, n_samples_expected, batch_name):
+        percent_complete = 100 * self._n_samples_synthesized / n_samples_expected
+        msg = (
+            f"Synthesizing ... {percent_complete:.0f}% "
+            f"(currently on {batch_name}"
+        )
+        MAX_MSG_LEN = 80
+        msg = (msg[:MAX_MSG_LEN] + "...") if len(msg) > MAX_MSG_LEN else msg
+        msg += ")"
+        print("\033[K" + msg, end="\r")
+
+    def _report_completion(self, delay):
+        summary = (
+            f"Synthesis complete!\n"
+            f"Generated {self._n_samples_synthesized} samples in ~{delay:.2f}s "
+            f"(~{(self._n_samples_synthesized / delay):.2f} samples/sec)."
+        )
+        print("\033[K" + summary)
+
+    def _verify_n_samples_synthesized(self, actual: int, expected: int):
+        assert expected == actual, (
+            f"{actual} generated, but {expected} were expected. "
+            "Be sure to remove any columns from your seeds' sources DataFrame that "
+            "contain all zeroes.")
+
+    def _get_batch(self, fg_seed, fg_sources, bg_seed, bg_sources, ecal,
+                   lt_targets, snr_targets, rt_targets=None, distance_cm=None):
+        if not (self.return_fg or self.return_gross):
+            raise ValueError("Computing to return nothing.")
+
+        bg_counts_expected = lt_targets * self.bg_cps
+        fg_counts_expected = snr_targets * np.sqrt(bg_counts_expected)
+
+        fg_spectra = get_expected_spectra(fg_seed.values, fg_counts_expected)
+        bg_spectra = get_expected_spectra(bg_seed.values, bg_counts_expected)
+
+        long_bg_counts_expected = self.long_bg_live_time * self.bg_cps
+        long_bg_spectrum_expected = bg_seed.values * long_bg_counts_expected
+
+        gross_spectra = None
+        long_bg_spectra = None
+        fg_counts = 0
+        bg_counts = 0
+        long_bg_counts = 0
+        fg_ss = None
+        gross_ss = None
+
+        # Spectra
+        if self.apply_poisson_noise:
+            gross_spectra = self._rng.poisson(fg_spectra + bg_spectra)
+            if self.return_fg:
+                long_bg_spectrum = self._rng.poisson(long_bg_spectrum_expected)
+                long_bg_seed = long_bg_spectrum / long_bg_spectrum.sum()
+                long_bg_spectra = get_expected_spectra(long_bg_seed, bg_counts_expected)
+                fg_spectra = gross_spectra - long_bg_spectra
+        else:
+            gross_spectra = fg_spectra + bg_spectra
+            if self.return_fg:
+                long_bg_spectra = bg_spectra
+                fg_spectra = gross_spectra - long_bg_spectra
+
+        # Counts
+        fg_counts = fg_spectra.sum(axis=1, dtype=float)
+        if self.return_fg:
+            long_bg_counts = long_bg_spectra.sum(axis=1, dtype=float)
+        if self.return_gross:
+            bg_counts = bg_spectra.sum(axis=1, dtype=float)
+
+        # Sample sets
+        if self.return_fg:
+            snrs = fg_counts / np.sqrt(long_bg_counts.clip(1))
+            fg_ss = get_fg_sample_set(fg_spectra, fg_sources, ecal, lt_targets,
+                                      snrs=snrs, total_counts=fg_counts,
+                                      real_times=rt_targets, distance_cm=distance_cm,
+                                      timestamps=self._synthesis_start_dt)
+            self._n_samples_synthesized += fg_ss.n_samples
+        if self.return_gross:
+            tiled_fg_sources = _tile_sources_and_scale(
+                fg_sources,
+                gross_spectra.shape[0],
+                fg_counts,
+            )
+            tiled_bg_sources = _tile_sources_and_scale(
+                bg_sources,
+                gross_spectra.shape[0],
+                bg_counts,
+            )
+            gross_sources = get_merged_sources_samplewise(tiled_fg_sources, tiled_bg_sources)
+            gross_counts = gross_spectra.sum(axis=1)
+            snrs = fg_counts / np.sqrt(bg_counts.clip(1))
+            gross_ss = get_gross_sample_set(gross_spectra, gross_sources, ecal,
+                                            lt_targets, snrs, gross_counts,
+                                            real_times=rt_targets, distance_cm=distance_cm,
+                                            timestamps=self._synthesis_start_dt)
+            self._n_samples_synthesized += gross_ss.n_samples
+
+        return fg_ss, gross_ss
+
+
+def get_sample_set(spectra, sources, ecal, live_times, snrs, total_counts=None,
+                   real_times=None, distance_cm=None, timestamps=None,
+                   descriptions=None) -> SampleSet:
+    n_samples = spectra.shape[0]
+
+    ss = SampleSet()
+    ss.spectra_state = SpectraState.Counts
+    ss.spectra = pd.DataFrame(spectra)
+    ss.sources = sources
+    ss.info.description = np.full(n_samples, "")  # Ensures the length of info equal n_samples
+    if descriptions:
+        ss.info.description = descriptions
+    ss.info.snr = snrs
+    ss.info.timestamp = timestamps
+    ss.info.total_counts = total_counts if total_counts is not None else spectra.sum(axis=1)
+    ss.info.ecal_order_0 = ecal[0]
+    ss.info.ecal_order_1 = ecal[1]
+    ss.info.ecal_order_2 = ecal[2]
+    ss.info.ecal_order_3 = ecal[3]
+    ss.info.ecal_low_e = ecal[4]
+    ss.info.live_time = live_times
+    ss.info.real_time = real_times if real_times is not None else live_times
+    ss.info.distance_cm = distance_cm
+    ss.info.occupancy_flag = 0
+    ss.info.tag = " "  # TODO: test if this can be empty string
+
+    return ss
+
+
+def _tile_sources_and_scale(sources, n_samples, scalars) -> pd.DataFrame:
+    tiled_sources = pd.DataFrame(
+        np.tile(sources.values, (n_samples, 1)),
+        columns=sources.index
+    )
+    # Multiplying normalized source values by spectrum counts.
+    # This is REQUIRED for properly merging sources DataFrames later when synthesizing
+    # multiple isotopes.
+    tiled_sources = tiled_sources.multiply(scalars, axis="index")
+    return tiled_sources
+
+
+def get_fg_sample_set(spectra, sources, ecal, live_times, snrs, total_counts,
+                      real_times=None, distance_cm=None, timestamps=None,
+                      descriptions=None) -> SampleSet:
+    tiled_sources = _tile_sources_and_scale(
+        sources,
+        spectra.shape[0],
+        spectra.sum(axis=1)
+    )
+    ss = get_sample_set(
+        spectra=spectra,
+        sources=tiled_sources,
+        ecal=ecal,
+        live_times=live_times,
+        snrs=snrs,
+        total_counts=total_counts,
+        real_times=real_times,
+        distance_cm=distance_cm,
+        timestamps=timestamps,
+        descriptions=descriptions
+    )
+    ss.spectra_type = SpectraType.Foreground
+    return ss
+
+
+def get_gross_sample_set(spectra, sources, ecal, live_times, snrs, total_counts,
+                         real_times=None, distance_cm=None, timestamps=None,
+                         descriptions=None) -> SampleSet:
+    ss = get_sample_set(
+        spectra=spectra,
+        sources=sources,
+        ecal=ecal,
+        live_times=live_times,
+        snrs=snrs,
+        total_counts=total_counts,
+        real_times=real_times,
+        distance_cm=distance_cm,
+        timestamps=timestamps,
+        descriptions=descriptions
+    )
+    ss.spectra_type = SpectraType.Gross
+    return ss
+
+
+def get_distribution_values(function: str, function_args: Any, n_values: int,
+                            rng: Generator = np.random.default_rng()):
+    """Randomly sample a list of values based one of many distributions.
+
+    Args:
+        function: name of the distribution function
+        function_args: argument or collection of arguments to be
+            passed to the function, if any.
+        n_values: size of the distribution
+        rng: NumPy random number generator, useful for experiment repeatability
+
+    Returns:
+        Value or collection of sampled values
+
+    Raises:
+        `ValueError` when an unsupported function type is provided
+    """
+    values = None
+    if function == "uniform":
+        values = rng.uniform(*function_args, size=n_values)
+    elif function == "log10":
+        log10_args = tuple(map(np.log10, function_args))
+        values = np.power(10, rng.uniform(*log10_args, size=n_values))
+    elif function == "discrete":
+        values = rng.choice(function_args, size=n_values)
+    elif function == "list":
+        values = np.array(function_args)
+    else:
+        raise ValueError(f"{function} function not supported for sampling.")
+
+    return values
+
+
+def get_merged_sources_samplewise(sources1: pd.DataFrame, sources2: pd.DataFrame) -> pd.DataFrame:
+    merged_sources_df = sources1.add(sources2, axis=1, fill_value=0)
+    return merged_sources_df
+
+
+def get_samples_per_seed(columns: pd.MultiIndex, min_samples_per_seed: int, balance_level: int):
+    level_values = columns.get_level_values(level=balance_level)
+    level_value_to_n_seeds = Counter(level_values)
+    unique_level_values = list(level_value_to_n_seeds.keys())
+    occurences = np.array(list(level_value_to_n_seeds.values()))
+    max_samples_per_level_value = occurences.max() * min_samples_per_seed
+    samples_per_level_value = np.ceil(max_samples_per_level_value / occurences).astype(int)
+    lv_to_samples_per_seed = {k: v for (k, v) in zip(unique_level_values, samples_per_level_value)}
+    total_samples_expected = sum([x * y for x, y in zip(occurences, samples_per_level_value)])
+
+    return lv_to_samples_per_seed, total_samples_expected
diff --git a/riid/data/synthetic/passby.py b/riid/data/synthetic/passby.py
index 12b1a68f..84234618 100644
--- a/riid/data/synthetic/passby.py
+++ b/riid/data/synthetic/passby.py
@@ -11,8 +11,8 @@
 import pandas as pd
 from numpy.random import Generator
 
-from riid.data.sampleset import SampleSet
-from riid.data.synthetic import Synthesizer, get_distribution_values
+from riid import SampleSet
+from riid.data.synthetic.base import Synthesizer, get_distribution_values
 
 
 class PassbySynthesizer(Synthesizer):
diff --git a/riid/data/synthetic/seed.py b/riid/data/synthetic/seed.py
index 9fd1b2c4..02f70412 100644
--- a/riid/data/synthetic/seed.py
+++ b/riid/data/synthetic/seed.py
@@ -1,7 +1,7 @@
 # Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
-"""This modules contains utilities for generating synthetic gamma spectrum templates from GADRAS."""
+"""This module contains utilities for generating synthetic gamma spectrum templates from GADRAS."""
 import os
 from contextlib import contextmanager
 from copy import deepcopy
@@ -12,7 +12,8 @@
 import yaml
 from numpy.random import Generator
 
-from riid.data.sampleset import SampleSet, _get_utc_timestamp, read_pcf
+from riid import SampleSet, SpectraState, SpectraType, read_pcf
+from riid.data.sampleset import _get_utc_timestamp
 from riid.gadras.api import (DETECTOR_PARAMS, GADRAS_ASSEMBLY_PATH,
                              INJECT_PARAMS, SourceInjector, get_gadras_api,
                              get_inject_setups, validate_inject_config)
@@ -405,3 +406,86 @@ def get_choices(choices_so_far: list, options: list, options_probas: np.array,
     n_choices_remaining -= 1
     return get_choices(choices_so_far, options, options_probas, restricted_pairs,
                        n_choices_remaining, rng)
+
+
+def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0,
+                    count_rate: float = 1000.0, normalize: bool = True,
+                    rng: Generator = np.random.default_rng()) -> SampleSet:
+    """Get a random, dummy `SampleSet` of ideal seeds.
+
+    WARNING: the spectra returned by this function each contain one gaussian peak that does
+    not overlap with the peaks of other spectra.  Such data is about as *ideal* as one
+    could hope to be working with and does not represent anything real.
+    Therefore, **do not** use this data for any purpose other than testing, debugging, or
+    examples where code, not results, is being demonstrated. Any use in scientific studies
+    does not make sense.
+
+    Args:
+        n_channels: number of channels in the spectra DataFrame
+        live_time: collection time on which to base seeds
+            (higher creates a less noisy shape)
+        count_rate: count rate on which to base seeds
+            (higher creates a less noisy shape)
+        normalize: whether to apply an L1-norm to the spectra
+        rng: NumPy random number generator, useful for experiment repeatability
+
+    Returns:
+        `SampleSet` with randomly generated spectra
+    """
+    ss = SampleSet()
+    ss.measured_or_synthetic = "synthetic"
+    ss.spectra_state = SpectraState.Counts
+    ss.spectra_type = SpectraType.BackgroundForeground
+    ss.synthesis_info = {
+        "subtract_background": True,
+    }
+    sources = [
+        ("Industrial",  "Am241",    "Unshielded Am241"),
+        ("Industrial",  "Ba133",    "Unshielded Ba133"),
+        ("NORM",        "K40",      "PotassiumInSoil"),
+        ("NORM",        "K40",      "Moderately Shielded K40"),
+        ("NORM",        "Ra226",    "UraniumInSoil"),
+        ("NORM",        "Th232",    "ThoriumInSoil"),
+        ("SNM",         "U238",     "Unshielded U238"),
+        ("SNM",         "Pu239",    "Unshielded Pu239"),
+        ("SNM",         "Pu239",    "Moderately Shielded Pu239"),
+        ("SNM",         "Pu239",    "Heavily Shielded Pu239"),
+    ]
+    n_sources = len(sources)
+    n_fg_sources = n_sources
+    sources_cols = pd.MultiIndex.from_tuples(
+        sources,
+        names=SampleSet.SOURCES_MULTI_INDEX_NAMES
+    )
+    sources_data = np.identity(n_sources)
+    ss.sources = pd.DataFrame(data=sources_data, columns=sources_cols)
+
+    histograms = []
+    N_FG_COUNTS = int(count_rate * live_time)
+    fg_std = np.sqrt(n_channels / n_sources)
+    channels_per_sources = n_channels / n_fg_sources
+    for i in range(n_fg_sources):
+        mu = i * channels_per_sources + channels_per_sources / 2
+        counts = rng.normal(mu, fg_std, size=N_FG_COUNTS)
+        fg_histogram, _ = np.histogram(counts, bins=n_channels, range=(0, n_channels))
+        histograms.append(fg_histogram)
+    histograms = np.array(histograms)
+
+    ss.spectra = pd.DataFrame(data=histograms)
+
+    ss.info.total_counts = ss.spectra.sum(axis=1)
+    ss.info.live_time = live_time
+    ss.info.real_time = live_time
+    ss.info.snr = None
+    ss.info.ecal_order_0 = 0
+    ss.info.ecal_order_1 = 3000
+    ss.info.ecal_order_2 = 100
+    ss.info.ecal_order_3 = 0
+    ss.info.ecal_low_e = 0
+    ss.info.description = ""
+    ss.update_timestamp()
+
+    if normalize:
+        ss.normalize()
+
+    return ss
diff --git a/riid/data/synthetic/static.py b/riid/data/synthetic/static.py
index f0fd82dd..cdf0aed0 100644
--- a/riid/data/synthetic/static.py
+++ b/riid/data/synthetic/static.py
@@ -10,8 +10,8 @@
 import numpy as np
 from numpy.random import Generator
 
-from riid.data.sampleset import SampleSet, SpectraState, SpectraType
-from riid.data.synthetic import Synthesizer, get_distribution_values
+from riid import SampleSet, SpectraState, SpectraType
+from riid.data.synthetic.base import Synthesizer, get_distribution_values
 
 
 class StaticSynthesizer(Synthesizer):
diff --git a/riid/gadras/api.py b/riid/gadras/api.py
index 187dc95a..c7af0e94 100644
--- a/riid/gadras/api.py
+++ b/riid/gadras/api.py
@@ -2,20 +2,20 @@
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
 """This module contains utilities for working with the GADRAS API."""
+import copy
+import itertools
 import json
 import os
 import sys
-import numpy as np
-from numpy.random import Generator
 from typing import List
-import copy
-import itertools
 
+import numpy as np
 import tqdm
 from jsonschema import validate
+from numpy.random import Generator
 
-from riid.data.sampleset import SampleSet, read_pcf
-from riid.data.synthetic import get_distribution_values
+from riid import SampleSet, read_pcf
+from riid.data.synthetic.base import get_distribution_values
 
 GADRAS_API_SEEMINGLY_AVAILABLE = False
 GADRAS_DIR_ENV_VAR_KEY = "GADRAS_DIR"
diff --git a/riid/metrics.py b/riid/metrics.py
index aa677448..81b5a9c9 100644
--- a/riid/metrics.py
+++ b/riid/metrics.py
@@ -5,7 +5,7 @@
 import numpy as np
 import sklearn
 
-from riid.data.sampleset import SampleSet
+from riid import SampleSet
 
 
 def multi_f1(y_true: np.ndarray, y_pred: np.ndarray) -> float:
diff --git a/riid/models/__init__.py b/riid/models/__init__.py
index 0825b251..3a73b7a0 100644
--- a/riid/models/__init__.py
+++ b/riid/models/__init__.py
@@ -1,243 +1,10 @@
 # Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
-"""This module contains functionality shared across all PyRIID models."""
-import json
-import os
-import uuid
-from abc import abstractmethod
-from enum import Enum
+"""This module contains PyRIID models."""
+from riid.models.bayes import PoissonBayesClassifier
+from riid.models.neural_nets import LabelProportionEstimator, MLPClassifier
+from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv1, ARADv2
 
-import numpy as np
-import tensorflow as tf
-import tf2onnx
-from keras.api.models import Model
-from keras.api.utils import get_custom_objects
-
-import riid
-from riid.data.labeling import label_to_index_element
-from riid.data.sampleset import SampleSet, SpectraState
-from riid.losses import mish
-from riid.metrics import multi_f1, single_f1
-
-get_custom_objects().update({
-    "multi_f1": multi_f1,
-    "single_f1": single_f1,
-    "mish": mish,
-})
-
-
-class ModelInput(int, Enum):
-    """Enumerates the potential input sources for a model."""
-    GrossSpectrum = 0
-    BackgroundSpectrum = 1
-    ForegroundSpectrum = 2
-
-
-class PyRIIDModel:
-    """Base class for PyRIID models."""
-
-    def __init__(self, *args, **kwargs):
-        self._info = {}
-        self._temp_file_path = "temp_model.json"
-        self._custom_objects = {}
-        self._initialize_info()
-
-    @property
-    def seeds(self):
-        return self._info["seeds"]
-
-    @seeds.setter
-    def seeds(self, value):
-        self._info["seeds"] = value
-
-    @property
-    def info(self):
-        return self._info
-
-    @info.setter
-    def info(self, value):
-        self._info = value
-
-    @property
-    def target_level(self):
-        return self._info["target_level"]
-
-    @target_level.setter
-    def target_level(self, value):
-        if value in SampleSet.SOURCES_MULTI_INDEX_NAMES:
-            self._info["target_level"] = value
-        else:
-            msg = (
-                f"Target level '{value}' is invalid.  "
-                f"Acceptable levels: {SampleSet.SOURCES_MULTI_INDEX_NAMES}"
-            )
-            raise ValueError(msg)
-
-    @property
-    def model(self) -> Model:
-        return self._model
-
-    @model.setter
-    def model(self, value: Model):
-        self._model = value
-
-    @property
-    def model_id(self):
-        return self._info["model_id"]
-
-    @model_id.setter
-    def model_id(self, value):
-        self._info["model_id"] = value
-
-    @property
-    def model_inputs(self):
-        return self._info["model_inputs"]
-
-    @model_inputs.setter
-    def model_inputs(self, value):
-        self._info["model_inputs"] = value
-
-    @property
-    def model_outputs(self):
-        return self._info["model_outputs"]
-
-    @model_outputs.setter
-    def model_outputs(self, value):
-        self._info["model_outputs"] = value
-
-    def get_model_outputs_as_label_tuples(self):
-        return [
-            label_to_index_element(v, self.target_level) for v in self.model_outputs
-        ]
-
-    def _get_model_dict(self) -> dict:
-        model_json = self.model.to_json()
-        model_dict = json.loads(model_json)
-        model_weights = self.model.get_weights()
-        model_dict = {
-            "info": self._info,
-            "model": model_dict,
-            "weights": model_weights,
-        }
-        return model_dict
-
-    def _get_model_str(self) -> str:
-        model_dict = self._get_model_dict()
-        model_str = json.dumps(model_dict, indent=4, cls=PyRIIDModelJsonEncoder)
-        return model_str
-
-    def _initialize_info(self):
-        init_info = {
-            "model_id": str(uuid.uuid4()),
-            "model_type": self.__class__.__name__,
-            "normalization": SpectraState.Unknown,
-            "pyriid_version": riid.__version__,
-        }
-        self._update_info(**init_info)
-
-    def _update_info(self, **kwargs):
-        self._info.update(kwargs)
-
-    def _update_custom_objects(self, key, value):
-        self._custom_objects.update({key: value})
-
-    def load(self, model_path: str):
-        """Load the model from a path.
-
-        Args:
-            model_path: path from which to load the model.
-        """
-        if not os.path.exists(model_path):
-            raise ValueError("Model file does not exist.")
-
-        with open(model_path) as fin:
-            model = json.load(fin)
-
-        model_str = json.dumps(model["model"])
-        self.model = tf.keras.models.model_from_json(model_str, custom_objects=self._custom_objects)
-        self.model.set_weights([np.array(x) for x in model["weights"]])
-        self.info = model["info"]
-
-    def save(self, model_path: str, overwrite=False):
-        """Save the model to a path.
-
-        Args:
-            model_path: path at which to save the model.
-            overwrite: whether to overwrite an existing file if it already exists.
-
-        Raises:
-            `ValueError` when the given path already exists
-        """
-        if os.path.exists(model_path) and not overwrite:
-            raise ValueError("Model file already exists.")
-
-        model_str = self._get_model_str()
-        with open(model_path, "w") as fout:
-            fout.write(model_str)
-
-    def to_onnx(self, model_path: str = None, **tf2onnx_kwargs: dict):
-        """Convert the model to an ONNX model.
-
-        Args:
-            model_path: path at which to save the model
-            tf2onnx_kwargs: additional kwargs to pass to the conversion
-        """
-        if not model_path.endswith(riid.ONNX_MODEL_FILE_EXTENSION):
-            raise ValueError(f"ONNX file path must end with {riid.ONNX_MODEL_FILE_EXTENSION}")
-        if os.path.exists(model_path):
-            raise ValueError("Model file already exists.")
-
-        tf2onnx.convert.from_keras(
-            self.model,
-            output_path=model_path,
-            **tf2onnx_kwargs
-        )
-
-    def to_tflite(self, model_path: str, quantize: bool = False, prune: bool = False):
-        """Convert the model to a TFLite model and optionally applying quantization or pruning.
-
-        Args:
-            model_path: file path at which to save the model
-            quantize: whether to apply quantization
-            prune: whether to apply pruning
-        """
-        if not model_path.endswith(riid.TFLITE_MODEL_FILE_EXTENSION):
-            raise ValueError(f"TFLite file path must end with {riid.TFLITE_MODEL_FILE_EXTENSION}")
-        if os.path.exists(model_path):
-            raise ValueError("Model file already exists.")
-
-        optimizations = []
-        if quantize:
-            optimizations.append(tf.lite.Optimize.DEFAULT)
-        if prune:
-            optimizations.append(tf.lite.Optimize.EXPERIMENTAL_SPARSITY)
-
-        converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
-        converter.optimizations = optimizations
-        tflite_model = converter.convert()
-
-        with open(model_path, "wb") as fout:
-            fout.write(tflite_model)
-
-    @abstractmethod
-    def fit(self):
-        pass
-
-    @abstractmethod
-    def predict(self):
-        pass
-
-
-class PyRIIDModelJsonEncoder(json.JSONEncoder):
-    """Custom JSON encoder for saving models.
-    """
-    def default(self, o):
-        """Converts certain types to JSON-compatible types.
-        """
-        if isinstance(o, np.ndarray):
-            return o.tolist()
-        elif isinstance(o, np.float32):
-            return o.astype(float)
-
-        return super().default(o)
+__all__ = ["PoissonBayesClassifier", "LabelProportionEstimator", "MLPClassifier",
+           "ARADLatentPredictor", "ARADv1", "ARADv2"]
diff --git a/riid/models/base.py b/riid/models/base.py
new file mode 100644
index 00000000..8f4e0329
--- /dev/null
+++ b/riid/models/base.py
@@ -0,0 +1,256 @@
+# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+# Under the terms of Contract DE-NA0003525 with NTESS,
+# the U.S. Government retains certain rights in this software.
+"""This module contains functionality shared across all PyRIID models."""
+import json
+import os
+from pathlib import Path
+import uuid
+from abc import abstractmethod
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+import tf2onnx
+from keras.api.models import Model
+from keras.api.utils import get_custom_objects
+
+import riid
+from riid import SampleSet, SpectraState
+from riid.data.labeling import label_to_index_element
+from riid.losses import mish
+from riid.metrics import multi_f1, single_f1
+
+get_custom_objects().update({
+    "multi_f1": multi_f1,
+    "single_f1": single_f1,
+    "mish": mish,
+})
+
+
+class ModelInput(int, Enum):
+    """Enumerates the potential input sources for a model."""
+    GrossSpectrum = 0
+    BackgroundSpectrum = 1
+    ForegroundSpectrum = 2
+
+
+class PyRIIDModel:
+    """Base class for PyRIID models."""
+
+    def __init__(self, *args, **kwargs):
+        self._info = {}
+        self._temp_file_path = "temp_model.json"
+        self._custom_objects = {}
+        self._initialize_info()
+
+    @property
+    def seeds(self):
+        return self._info["seeds"]
+
+    @seeds.setter
+    def seeds(self, value):
+        self._info["seeds"] = value
+
+    @property
+    def info(self):
+        return self._info
+
+    @info.setter
+    def info(self, value):
+        self._info = value
+
+    @property
+    def target_level(self):
+        return self._info["target_level"]
+
+    @target_level.setter
+    def target_level(self, value):
+        if value in SampleSet.SOURCES_MULTI_INDEX_NAMES:
+            self._info["target_level"] = value
+        else:
+            msg = (
+                f"Target level '{value}' is invalid.  "
+                f"Acceptable levels: {SampleSet.SOURCES_MULTI_INDEX_NAMES}"
+            )
+            raise ValueError(msg)
+
+    @property
+    def model(self) -> Model:
+        return self._model
+
+    @model.setter
+    def model(self, value: Model):
+        self._model = value
+
+    @property
+    def model_id(self):
+        return self._info["model_id"]
+
+    @model_id.setter
+    def model_id(self, value):
+        self._info["model_id"] = value
+
+    @property
+    def model_inputs(self):
+        return self._info["model_inputs"]
+
+    @model_inputs.setter
+    def model_inputs(self, value):
+        self._info["model_inputs"] = value
+
+    @property
+    def model_outputs(self):
+        return self._info["model_outputs"]
+
+    @model_outputs.setter
+    def model_outputs(self, value):
+        self._info["model_outputs"] = value
+
+    def get_model_outputs_as_label_tuples(self):
+        return [
+            label_to_index_element(v, self.target_level) for v in self.model_outputs
+        ]
+
+    def _get_model_dict(self) -> dict:
+        model_json = self.model.to_json()
+        model_dict = json.loads(model_json)
+        model_weights = self.model.get_weights()
+        model_dict = {
+            "info": self._info,
+            "model": model_dict,
+            "weights": model_weights,
+        }
+        return model_dict
+
+    def _get_model_str(self) -> str:
+        model_dict = self._get_model_dict()
+        model_str = json.dumps(model_dict, indent=4, cls=PyRIIDModelJsonEncoder)
+        return model_str
+
+    def _initialize_info(self):
+        init_info = {
+            "model_id": str(uuid.uuid4()),
+            "model_type": self.__class__.__name__,
+            "normalization": SpectraState.Unknown,
+            "pyriid_version": riid.__version__,
+        }
+        self._update_info(**init_info)
+
+    def _update_info(self, **kwargs):
+        self._info.update(kwargs)
+
+    def _update_custom_objects(self, key, value):
+        self._custom_objects.update({key: value})
+
+    def load(self, model_path: str):
+        """Load the model from a path.
+
+        Args:
+            model_path: path from which to load the model.
+        """
+        if not os.path.exists(model_path):
+            raise ValueError("Model file does not exist.")
+
+        with open(model_path) as fin:
+            model = json.load(fin)
+
+        model_str = json.dumps(model["model"])
+        self.model = tf.keras.models.model_from_json(model_str, custom_objects=self._custom_objects)
+        self.model.set_weights([np.array(x) for x in model["weights"]])
+        self.info = model["info"]
+
+    def save(self, model_path: str, overwrite=False):
+        """Save the model to a path.
+
+        Args:
+            model_path: path at which to save the model.
+            overwrite: whether to overwrite an existing file if it already exists.
+
+        Raises:
+            `ValueError` when the given path already exists
+        """
+        if os.path.exists(model_path) and not overwrite:
+            raise ValueError("Model file already exists.")
+
+        model_str = self._get_model_str()
+        with open(model_path, "w") as fout:
+            fout.write(model_str)
+
+    def to_onnx(self, model_path, **tf2onnx_kwargs: dict):
+        """Convert the model to an ONNX model.
+
+        Args:
+            model_path: path at which to save the model
+            tf2onnx_kwargs: additional kwargs to pass to the conversion
+        """
+        model_path = Path(model_path)
+        if not str(model_path).endswith(riid.ONNX_MODEL_FILE_EXTENSION):
+            raise ValueError(f"ONNX file path must end with {riid.ONNX_MODEL_FILE_EXTENSION}")
+        if model_path.exists():
+            raise ValueError("Model file already exists.")
+
+        tf2onnx.convert.from_keras(
+            self.model,
+            input_signature=[
+                tf.TensorSpec(
+                    shape=input_tensor.shape,
+                    dtype=input_tensor.dtype,
+                    name=input_tensor.name
+                )
+                for input_tensor in self.model.inputs
+            ],
+            output_path=str(model_path),
+            **tf2onnx_kwargs
+        )
+
+    def to_tflite(self, model_path, quantize: bool = False, prune: bool = False):
+        """Convert the model to a TFLite model and optionally applying quantization or pruning.
+
+        Args:
+            model_path: file path at which to save the model
+            quantize: whether to apply quantization
+            prune: whether to apply pruning
+        """
+        model_path = Path(model_path)
+        if not str(model_path).endswith(riid.TFLITE_MODEL_FILE_EXTENSION):
+            raise ValueError(f"TFLite file path must end with {riid.TFLITE_MODEL_FILE_EXTENSION}")
+        if model_path.exists():
+            raise ValueError("Model file already exists.")
+
+        optimizations = []
+        if quantize:
+            optimizations.append(tf.lite.Optimize.DEFAULT)
+        if prune:
+            optimizations.append(tf.lite.Optimize.EXPERIMENTAL_SPARSITY)
+
+        saved_model_dir = model_path.stem
+        self.model.export(saved_model_dir)
+        converter = tf.lite.TFLiteConverter.from_saved_model(str(saved_model_dir))
+        converter.optimizations = optimizations
+        tflite_model = converter.convert()
+
+        with open(model_path, "wb") as fout:
+            fout.write(tflite_model)
+
+    @abstractmethod
+    def fit(self):
+        pass
+
+    @abstractmethod
+    def predict(self):
+        pass
+
+
+class PyRIIDModelJsonEncoder(json.JSONEncoder):
+    """Custom JSON encoder for saving models.
+    """
+    def default(self, o):
+        """Converts certain types to JSON-compatible types.
+        """
+        if isinstance(o, np.ndarray):
+            return o.tolist()
+        elif isinstance(o, np.float32):
+            return o.astype(float)
+
+        return super().default(o)
diff --git a/riid/models/bayes.py b/riid/models/bayes.py
index 6850cbb1..762d38c8 100644
--- a/riid/models/bayes.py
+++ b/riid/models/bayes.py
@@ -8,8 +8,8 @@
 from keras.api.layers import Add, Input, Multiply, Subtract
 from keras.api.models import Model
 
-from riid.data.sampleset import SampleSet
-from riid.models import PyRIIDModel
+from riid import SampleSet
+from riid.models.base import PyRIIDModel
 from riid.models.layers import (ClipByValueLayer, DivideLayer, ExpandDimsLayer,
                                 PoissonLogProbabilityLayer, ReduceMaxLayer,
                                 ReduceSumLayer, SeedLayer)
diff --git a/riid/models/neural_nets/__init__.py b/riid/models/neural_nets/__init__.py
index 42443f88..1dd06420 100644
--- a/riid/models/neural_nets/__init__.py
+++ b/riid/models/neural_nets/__init__.py
@@ -2,670 +2,7 @@
 # Under the terms of Contract DE-NA0003525 with NTESS,
 # the U.S. Government retains certain rights in this software.
 """This module contains neural network-based classifiers and regressors."""
-import keras
-import numpy as np
-import pandas as pd
-import tensorflow as tf
-from keras.api.activations import sigmoid, softmax
-from keras.api.callbacks import EarlyStopping
-from keras.api.layers import Dense, Dropout, Input
-from keras.api.losses import CategoricalCrossentropy, MeanSquaredError
-from keras.api.metrics import F1Score, Precision, Recall
-from keras.api.models import Model
-from keras.api.optimizers import Adam
-from keras.api.regularizers import L1L2, l1, l2
-from keras.api.utils import split_dataset
-from scipy.interpolate import UnivariateSpline
+from riid.models.neural_nets.basic import MLPClassifier
+from riid.models.neural_nets.lpe import LabelProportionEstimator
 
-from riid.data.sampleset import SampleSet, SpectraType
-from riid.losses import (build_keras_semisupervised_loss_func,
-                         chi_squared_diff, jensen_shannon_divergence,
-                         normal_nll_diff, poisson_nll_diff,
-                         reconstruction_error, sse_diff, weighted_sse_diff)
-from riid.losses.sparsemax import SparsemaxLoss, sparsemax
-from riid.metrics import build_keras_semisupervised_metric_func
-from riid.models import ModelInput, PyRIIDModel
-from riid.models.layers import L1NormLayer
-
-
-class MLPClassifier(PyRIIDModel):
-    """Multi-layer perceptron classifier."""
-    def __init__(self, activation=None, loss=None, optimizer=None,
-                 metrics=None, l2_alpha: float = 1e-4,
-                 activity_regularizer=None, final_activation=None):
-        """
-        Args:
-            activation: activate function to use for each dense layer
-            loss: loss function to use for training
-            optimizer: tensorflow optimizer or optimizer name to use for training
-            metrics: list of metrics to be evaluating during training
-            l2_alpha: alpha value for the L2 regularization of each dense layer
-            activity_regularizer: regularizer function applied each dense layer output
-            final_activation: final activation function to apply to model output
-        """
-        super().__init__()
-
-        self.activation = activation
-        self.loss = loss
-        self.optimizer = optimizer
-        self.final_activation = final_activation
-        self.metrics = metrics
-        self.l2_alpha = l2_alpha
-        self.activity_regularizer = activity_regularizer
-        self.final_activation = final_activation
-
-        if self.activation is None:
-            self.activation = "relu"
-        if self.loss is None:
-            self.loss = CategoricalCrossentropy()
-        if optimizer is None:
-            self.optimizer = Adam(learning_rate=0.01, clipnorm=0.001)
-        if self.metrics is None:
-            self.metrics = [F1Score(), Precision(), Recall()]
-        if self.activity_regularizer is None:
-            self.activity_regularizer = l1(0.0)
-        if self.final_activation is None:
-            self.final_activation = "softmax"
-        self.model = None
-        self._predict_fn = None
-
-    def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20,
-            validation_split: float = 0.2, callbacks=None,
-            patience: int = 15, es_monitor: str = "val_loss",
-            es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False):
-        """Fit a model to the given `SampleSet`(s).
-
-        Args:
-            ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either
-                foreground (AKA, "net") or gross.
-            batch_size: number of samples per gradient update
-            epochs: maximum number of training iterations
-            validation_split: percentage of the training data to use as validation data
-            callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method
-            patience: number of epochs to wait for `EarlyStopping` object
-            es_monitor: quantity to be monitored for `EarlyStopping` object
-            es_mode: mode for `EarlyStopping` object
-            es_verbose: verbosity level for `EarlyStopping` object
-            target_level: `SampleSet.sources` column level to use
-            verbose: whether to show detailed model training output
-
-        Returns:
-            `tf.History` object.
-
-        Raises:
-            `ValueError` when no spectra are provided as input
-        """
-        if ss.n_samples <= 0:
-            raise ValueError("No spectr[a|um] provided!")
-
-        if ss.spectra_type == SpectraType.Gross:
-            self.model_inputs = (ModelInput.GrossSpectrum,)
-        elif ss.spectra_type == SpectraType.Foreground:
-            self.model_inputs = (ModelInput.ForegroundSpectrum,)
-        elif ss.spectra_type == SpectraType.Background:
-            self.model_inputs = (ModelInput.BackgroundSpectrum,)
-        else:
-            raise ValueError(f"{ss.spectra_type} is not supported in this model.")
-
-        X = ss.get_samples()
-        source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T
-        model_outputs = source_contributions_df.columns.values.tolist()
-        Y = source_contributions_df.values
-
-        spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
-        labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32)
-        training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor))
-        training_dataset, validation_dataset = split_dataset(
-            training_dataset,
-            left_size=validation_split,
-            shuffle=True
-        )
-        training_dataset = training_dataset.batch(batch_size=batch_size)
-        validation_dataset = validation_dataset.batch(batch_size=batch_size)
-
-        if not self.model:
-            inputs = Input(shape=(X.shape[1],), name="Spectrum")
-            dense_layer_size = X.shape[1] // 2
-            dense_layer = Dense(
-                dense_layer_size,
-                activation=self.activation,
-                activity_regularizer=self.activity_regularizer,
-                kernel_regularizer=l2(self.l2_alpha),
-            )(inputs)
-            outputs = Dense(Y.shape[1], activation=self.final_activation)(dense_layer)
-            self.model = Model(inputs, outputs)
-            self.model.compile(loss=self.loss, optimizer=self.optimizer,
-                               metrics=self.metrics)
-
-        es = EarlyStopping(
-            monitor=es_monitor,
-            patience=patience,
-            verbose=es_verbose,
-            restore_best_weights=True,
-            mode=es_mode,
-        )
-        if callbacks:
-            callbacks.append(es)
-        else:
-            callbacks = [es]
-
-        history = self.model.fit(
-            training_dataset,
-            epochs=epochs,
-            verbose=verbose,
-            validation_data=validation_dataset,
-            callbacks=callbacks,
-         )
-
-        # Update model information
-        self._update_info(
-            target_level=target_level,
-            model_outputs=model_outputs,
-            normalization=ss.spectra_state,
-        )
-
-        # Define the predict function with tf.function and input_signature
-        self._predict_fn = tf.function(
-            self._predict,
-            # input_signature=[tf.TensorSpec(shape=[None, X.shape[1]], dtype=tf.float32)]
-            experimental_relax_shapes=True
-        )
-
-        return history
-
-    def _predict(self, input_tensor):
-        return self.model(input_tensor, training=False)
-
-    def predict(self, ss: SampleSet, bg_ss: SampleSet = None):
-        """Classify the spectra in the provided `SampleSet`(s).
-
-        Results are stored inside the first SampleSet's prediction-related properties.
-
-        Args:
-            ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either
-                foreground (AKA, "net") or gross
-            bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background
-        """
-        x_test = ss.get_samples().astype(float)
-        if bg_ss:
-            X = [x_test, bg_ss.get_samples().astype(float)]
-        else:
-            X = x_test
-
-        spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
-        results = self._predict_fn(spectra_tensor)
-
-        col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level)
-        col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1]
-        ss.prediction_probas = pd.DataFrame(
-            data=results,
-            columns=pd.MultiIndex.from_tuples(
-                self.get_model_outputs_as_label_tuples(),
-                names=col_level_subset
-            )
-        )
-
-        ss.classified_by = self.model_id
-
-
-class LabelProportionEstimator(PyRIIDModel):
-    """Regressor for predicting label proportions that uses a semi-supervised loss.
-
-    Optionally, a U-spline-based out-of-distribution detection model can be fit to target a desired
-    false positive rate.
-    """
-    UNSUPERVISED_LOSS_FUNCS = {
-        "poisson_nll": poisson_nll_diff,
-        "normal_nll": normal_nll_diff,
-        "sse": sse_diff,
-        "weighted_sse": weighted_sse_diff,
-        "jsd": jensen_shannon_divergence,
-        "chi_squared": chi_squared_diff
-    }
-    SUPERVISED_LOSS_FUNCS = {
-        "sparsemax": (
-            SparsemaxLoss,
-            {
-                "from_logits": True,
-                "reduction": tf.keras.losses.Reduction.NONE,
-            },
-            sparsemax,
-        ),
-        "categorical_crossentropy": (
-            CategoricalCrossentropy,
-            {
-                "from_logits": True,
-                "reduction": tf.keras.losses.Reduction.NONE,
-            },
-            softmax,
-        ),
-        "mse": (
-            MeanSquaredError,
-            {
-                "reduction": tf.keras.losses.Reduction.NONE,
-            },
-            sigmoid,
-        )
-    }
-    INFO_KEYS = (
-        # model architecture
-        "hidden_layers",
-        "learning_rate",
-        "epsilon",
-        "sup_loss",
-        "unsup_loss",
-        "metrics",
-        "beta",
-        "hidden_layer_activation",
-        "kernel_l1_regularization",
-        "kernel_l2_regularization",
-        "bias_l1_regularization",
-        "bias_l2_regularization",
-        "activity_l1_regularization",
-        "activity_l2_regularization",
-        "dropout",
-        "ood_fp_rate",
-        "fit_spline",
-        "spline_bins",
-        "spline_k",
-        "spline_s",
-        # dictionaries
-        "source_dict",
-        # populated when loading model
-        "spline_snrs",
-        "spline_recon_errors",
-    )
-
-    def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_loss="sse",
-                 metrics: list = ["mae", "categorical_crossentropy"], beta=0.9, source_dict=None,
-                 optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3,
-                 hidden_layer_activation: str = "mish",
-                 kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0,
-                 bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0,
-                 activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0,
-                 dropout: float = 0.0, ood_fp_rate: float = 0.05,
-                 fit_spline: bool = True, spline_bins: int = 15, spline_k: int = 3,
-                 spline_s: int = 0, spline_snrs=None, spline_recon_errors=None):
-        """
-        Args:
-            hidden_layers: tuple defining the number and size of dense layers
-            sup_loss: supervised loss function to use for training
-            unsup_loss: unsupervised loss function to use for training the
-                foreground branch of the network (options: "sse", "poisson_nll",
-                "normal_nll", "weighted_sse", "jsd", or "chi_squared")
-            metrics: list of metrics to be evaluating during training
-            beta: tradeoff parameter between the supervised and unsupervised foreground loss
-            source_dict: 2D array of pure, long-collect foreground spectra
-            optimizer: tensorflow optimizer or optimizer name to use for training
-            optimizer_kwargs: kwargs for optimizer
-            learning_rate: learning rate for the optimizer
-            hidden_layer_activation: activation function to use for each dense layer
-            kernel_l1_regularization: l1 regularization value for the kernel regularizer
-            kernel_l2_regularization: l2 regularization value for the kernel regularizer
-            bias_l1_regularization: l1 regularization value for the bias regularizer
-            bias_l2_regularization: l2 regularization value for the bias regularizer
-            activity_l1_regularization: l1 regularization value for the activity regularizer
-            activity_l2_regularization: l2 regularization value for the activity regularizer
-            dropout: amount of dropout to apply to each dense layer
-            ood_fp_rate: false positive rate used to determine threshold for
-                out-of-distribution (OOD) detection
-            fit_spline: whether or not to fit UnivariateSpline for OOD threshold function
-            spline_bins: number of bins used when fitting the UnivariateSpline threshold
-                function for OOD detection
-            spline_k: degree of smoothing for the UnivariateSpline
-            spline_s: positive smoothing factor used to choose the number of knots in the
-                UnivariateSpline (s=0 forces the spline through all the datapoints, equivalent to
-                InterpolatedUnivariateSpline)
-            spline_snrs: SNRs from training used as the x-values to fit the UnivariateSpline
-            spline_recon_errors: reconstruction errors from training used as the y-values to
-                fit the UnivariateSpline
-        """
-        super().__init__()
-
-        self.hidden_layers = hidden_layers
-        self.sup_loss = sup_loss
-        self.unsup_loss = unsup_loss
-        self.sup_loss_func, self.activation = self._get_sup_loss_func(
-            sup_loss,
-            prefix="sup"
-        )
-        self.sup_loss_func_name = self.sup_loss_func.name
-
-        self.optimizer = optimizer
-        if isinstance(optimizer, str):
-            self.optimizer = keras.optimizers.get(optimizer)
-        if optimizer_kwargs is not None:
-            for key, value in optimizer_kwargs.items():
-                setattr(self.optimizer, key, value)
-        self.optimizer.learning_rate = learning_rate
-
-        self.unsup_loss_func = self._get_unsup_loss_func(unsup_loss)
-        self.unsup_loss_func_name = f"unsup_{unsup_loss}_loss"
-        self.metrics = metrics
-        self.beta = beta
-        self.source_dict = source_dict
-        self.semisup_loss_func_name = "semisup_loss"
-        self.hidden_layer_activation = hidden_layer_activation
-        self.kernel_l1_regularization = kernel_l1_regularization
-        self.kernel_l2_regularization = kernel_l2_regularization
-        self.bias_l1_regularization = bias_l1_regularization
-        self.bias_l2_regularization = bias_l2_regularization
-        self.activity_l1_regularization = activity_l1_regularization
-        self.activity_l2_regularization = activity_l2_regularization
-        self.dropout = dropout
-        self.ood_fp_rate = ood_fp_rate
-        self.fit_spline = fit_spline
-        self.spline_bins = spline_bins
-        self.spline_k = spline_k
-        self.spline_s = spline_s
-        self.spline_snrs = spline_snrs
-        self.spline_recon_errors = spline_recon_errors
-        self.model = None
-
-        self._update_custom_objects("L1NormLayer", L1NormLayer)
-
-    @property
-    def source_dict(self) -> dict:
-        return self.info["source_dict"]
-
-    @source_dict.setter
-    def source_dict(self, value: dict):
-        self.info["source_dict"] = value
-
-    def _get_sup_loss_func(self, loss_func_str, prefix):
-        if loss_func_str not in self.SUPERVISED_LOSS_FUNCS:
-            raise KeyError(f"'{loss_func_str}' is not a supported supervised loss function.")
-        func, kwargs, activation = self.SUPERVISED_LOSS_FUNCS[loss_func_str]
-        loss_func_name = f"{prefix}_{loss_func_str}_loss"
-        return func(name=loss_func_name, **kwargs), activation
-
-    def _get_unsup_loss_func(self, loss_func_str):
-        if loss_func_str not in self.UNSUPERVISED_LOSS_FUNCS:
-            raise KeyError(f"'{loss_func_str}' is not a supported unsupervised loss function.")
-        return self.UNSUPERVISED_LOSS_FUNCS[loss_func_str]
-
-    def _initialize_model(self, input_size, output_size):
-        spectra_input = Input(input_size, name="input_spectrum")
-        spectra_norm = L1NormLayer(name="normalized_input_spectrum")(spectra_input)
-        x = spectra_norm
-        for layer, nodes in enumerate(self.hidden_layers):
-            x = Dense(
-                nodes,
-                activation=self.hidden_layer_activation,
-                kernel_regularizer=L1L2(
-                    l1=self.kernel_l1_regularization,
-                    l2=self.kernel_l2_regularization
-                ),
-                bias_regularizer=L1L2(
-                    l1=self.bias_l1_regularization,
-                    l2=self.bias_l2_regularization
-                ),
-                activity_regularizer=L1L2(
-                    l1=self.activity_l1_regularization,
-                    l2=self.activity_l2_regularization
-                ),
-                name=f"dense_{layer}"
-            )(x)
-
-            if self.dropout > 0:
-                x = Dropout(self.dropout)(x)
-        output = Dense(
-            output_size,
-            activation="linear",
-            name="output"
-        )(x)
-
-        self.model = Model(inputs=[spectra_input], outputs=[output])
-
-    def _get_info_as_dict(self):
-        info_dict = {}
-        for k, v in vars(self).items():
-            if k not in self.INFO_KEYS:
-                continue
-            if isinstance(v, np.ndarray):
-                info_dict[k] = v.tolist()
-            else:
-                info_dict[k] = v
-        return info_dict
-
-    def _get_spline_threshold_func(self):
-        return UnivariateSpline(
-            self.info["avg_snrs"],
-            self.info["thresholds"],
-            k=self.spline_k,
-            s=self.spline_s
-        )
-
-    def _fit_spline_threshold_func(self):
-        out = pd.qcut(
-            np.array(self.spline_snrs),
-            self.spline_bins,
-            labels=False,
-        )
-        thresholds = [
-            np.quantile(np.array(self.spline_recon_errors)[out == int(i)], 1-self.ood_fp_rate)
-            for i in range(self.spline_bins)
-        ]
-        avg_snrs = [
-            np.mean(np.array(self.spline_snrs)[out == int(i)]) for i in range(self.spline_bins)
-        ]
-        self._update_info(
-            avg_snrs=avg_snrs,
-            thresholds=thresholds,
-            spline_k=self.spline_k,
-            spline_s=self.spline_s,
-        )
-
-    def _get_snrs(self, ss: SampleSet, bg_cps: float, is_gross: bool) -> np.ndarray:
-        fg_counts = ss.info.total_counts.values.astype("float64")
-        bg_counts = ss.info.live_time.values * bg_cps
-        if is_gross:
-            fg_counts = fg_counts - bg_counts
-        snrs = fg_counts / np.sqrt(bg_counts)
-        return snrs
-
-    def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False,
-            batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2,
-            callbacks=None, patience: int = 15, es_monitor: str = "val_loss",
-            es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0,
-            normalize_sup_loss: bool = True, normalize_func=tf.math.tanh,
-            normalize_scaler: float = 1.0, target_level="Isotope", verbose: bool = False):
-        """Fit a model to the given SampleSet(s).
-
-        Args:
-            seeds_ss: `SampleSet` of pure, long-collect spectra
-            ss: `SampleSet` of `n` gross or foreground spectra where `n` >= 1
-            bg_cps: background rate assumption used for calculating SNR in spline function
-                using in OOD detection
-            is_gross: whether `ss` contains gross spectra
-            batch_size: number of samples per gradient update
-            epochs: maximum number of training iterations
-            validation_split: proportion of training data to use as validation data
-            callbacks: list of callbacks to be passed to TensorFlow Model.fit() method
-            patience: number of epochs to wait for `EarlyStopping` object
-            es_monitor: quantity to be monitored for `EarlyStopping` object
-            es_mode: mode for `EarlyStopping` object
-            es_verbose: verbosity level for `EarlyStopping` object
-            es_min_delta: minimum change to count as an improvement for early stopping
-            normalize_sup_loss: whether to normalize the supervised loss term
-            normalize_func: normalization function used for supervised loss term
-            normalize_scaler: scalar that sets the steepness of the normalization function
-            target_level: source level to target for model output
-            verbose: whether model training output is printed to the terminal
-        """
-        spectra = ss.get_samples().astype(float)
-        sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T
-        sources = sources_df.values.astype(float)
-        self.sources_columns = sources_df.columns
-
-        if verbose:
-            print("Building dictionary...")
-
-        if self.source_dict is None:
-            self.source_dict = _get_reordered_spectra(
-                seeds_ss.spectra,
-                seeds_ss.sources,
-                self.sources_columns,
-                target_level=target_level
-            ).values
-
-        if not self.model:
-            if verbose:
-                print("Initializing model...")
-            self._initialize_model(
-                (ss.n_channels,),
-                sources.shape[1],
-            )
-        elif verbose:
-            print("Model already initialized.")
-
-        if verbose:
-            print("Building loss functions...")
-
-        self.semisup_loss_func = build_keras_semisupervised_loss_func(
-            self.sup_loss_func,
-            self.unsup_loss_func,
-            self.source_dict,
-            self.beta,
-            self.activation,
-            n_labels=sources.shape[1],
-            normalize=normalize_sup_loss,
-            normalize_func=normalize_func,
-            normalize_scaler=normalize_scaler
-        )
-
-        semisup_metrics = None
-        if self.metrics:
-            if verbose:
-                print("Building metric functions...")
-            semisup_metrics = []
-            for each in self.metrics:
-                if isinstance(each, str):
-                    semisup_metrics.append(
-                        build_keras_semisupervised_metric_func(
-                            tf.keras.metrics.get(each),
-                            self.activation,
-                            sources.shape[1]
-                        )
-                    )
-                else:
-                    semisup_metrics.append(
-                        build_keras_semisupervised_metric_func(
-                            each,
-                            self.activation,
-                            sources.shape[1]
-                        )
-                    )
-
-        self.model.compile(
-            loss=self.semisup_loss_func,
-            optimizer=self.optimizer,
-            metrics=semisup_metrics
-        )
-
-        es = EarlyStopping(
-            monitor=es_monitor,
-            patience=patience,
-            verbose=es_verbose,
-            restore_best_weights=True,
-            mode=es_mode,
-            min_delta=es_min_delta,
-        )
-
-        if callbacks:
-            callbacks.append(es)
-        else:
-            callbacks = [es]
-
-        history = self.model.fit(
-            spectra,
-            np.append(sources, spectra, axis=1),
-            epochs=epochs,
-            verbose=verbose,
-            validation_split=validation_split,
-            callbacks=callbacks,
-            shuffle=True,
-            batch_size=batch_size
-        )
-
-        if self.fit_spline:
-            if verbose:
-                print("Finding OOD detection threshold function...")
-
-            train_logits = self.model.predict(spectra, verbose=0)
-            train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32))
-            self.spline_recon_errors = reconstruction_error(
-                tf.convert_to_tensor(spectra, dtype=tf.float32),
-                train_lpes,
-                self.source_dict,
-                self.unsup_loss_func
-            ).numpy()
-            self.spline_snrs = self._get_snrs(ss, bg_cps, is_gross)
-            self._fit_spline_threshold_func()
-
-        info = self._get_info_as_dict()
-        self._update_info(
-            target_level=target_level,
-            model_outputs=sources_df.columns.values.tolist(),
-            normalization=ss.spectra_state,
-            **info,
-        )
-
-        return history
-
-    def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verbose=False):
-        """Estimate the proportions of counts present in each sample of the provided SampleSet.
-
-        Results are stored inside the SampleSet's prediction_probas property.
-
-        Args:
-            ss: `SampleSet` of `n` foreground or gross spectra where `n` >= 1
-            bg_cps: background rate used for estimating sample SNRs.
-                If background rate varies to a significant degree, split up sampleset
-                by SNR and make multiple calls to this method.
-            is_gross: whether `ss` contains gross spectra
-        """
-        test_spectra = ss.get_samples().astype(float)
-
-        logits = self.model.predict(test_spectra, verbose=verbose)
-        lpes = self.activation(tf.convert_to_tensor(logits, dtype=tf.float32))
-
-        col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level)
-        col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1]
-        ss.prediction_probas = pd.DataFrame(
-            data=lpes,
-            columns=pd.MultiIndex.from_tuples(
-               self.get_model_outputs_as_label_tuples(),
-               names=col_level_subset
-            )
-        )
-
-        # Fill in unsupervised losses
-        recon_errors = reconstruction_error(
-            tf.convert_to_tensor(test_spectra, dtype=tf.float32),
-            lpes,
-            self.source_dict,
-            self.unsup_loss_func
-        ).numpy()
-
-        if self.fit_spline:
-            snrs = self._get_snrs(ss, bg_cps, is_gross)
-            thresholds = self._get_spline_threshold_func()(snrs)
-            is_ood = recon_errors > thresholds
-            ss.info["ood"] = is_ood
-
-        ss.info["recon_error"] = recon_errors
-
-
-def _get_reordered_spectra(old_spectra_df: pd.DataFrame, old_sources_df: pd.DataFrame,
-                           new_sources_columns, target_level) -> pd.DataFrame:
-    collapsed_sources_df = old_sources_df\
-        .T.groupby(target_level)\
-        .sum().T
-    reordered_spectra_df = old_spectra_df.iloc[
-        collapsed_sources_df[
-            new_sources_columns
-        ].idxmax()
-    ].reset_index(drop=True)
-
-    return reordered_spectra_df
+__all__ = ["LabelProportionEstimator", "MLPClassifier"]
diff --git a/riid/models/neural_nets/arad.py b/riid/models/neural_nets/arad.py
index 3679bfe6..0fd6b321 100644
--- a/riid/models/neural_nets/arad.py
+++ b/riid/models/neural_nets/arad.py
@@ -21,10 +21,10 @@
 from scipy.spatial.distance import jensenshannon
 from scipy.stats import entropy
 
-from riid.data.sampleset import SampleSet, SpectraState
+from riid import SampleSet, SpectraState
 from riid.losses import mish
-from riid.models import PyRIIDModel
-from riid.models.bayes import ExpandDimsLayer
+from riid.models.base import PyRIIDModel
+from riid.models.layers import ExpandDimsLayer
 
 
 class ARADv1TF(Model):
diff --git a/riid/models/neural_nets/basic.py b/riid/models/neural_nets/basic.py
new file mode 100644
index 00000000..81db0962
--- /dev/null
+++ b/riid/models/neural_nets/basic.py
@@ -0,0 +1,197 @@
+# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+# Under the terms of Contract DE-NA0003525 with NTESS,
+# the U.S. Government retains certain rights in this software.
+"""This module contains a simple neural network."""
+import pandas as pd
+import tensorflow as tf
+from keras.api.callbacks import EarlyStopping
+from keras.api.layers import Dense, Input
+from keras.api.losses import CategoricalCrossentropy
+from keras.api.metrics import F1Score, Precision, Recall
+from keras.api.models import Model
+from keras.api.optimizers import Adam
+from keras.api.regularizers import l1, l2
+from keras.api.utils import split_dataset
+
+from riid import SampleSet, SpectraType
+from riid.models.base import ModelInput, PyRIIDModel
+
+
+class MLPClassifier(PyRIIDModel):
+    """Multi-layer perceptron classifier."""
+    def __init__(self, activation=None, loss=None, optimizer=None,
+                 metrics=None, l2_alpha: float = 1e-4,
+                 activity_regularizer=None, final_activation=None):
+        """
+        Args:
+            activation: activate function to use for each dense layer
+            loss: loss function to use for training
+            optimizer: tensorflow optimizer or optimizer name to use for training
+            metrics: list of metrics to be evaluating during training
+            l2_alpha: alpha value for the L2 regularization of each dense layer
+            activity_regularizer: regularizer function applied each dense layer output
+            final_activation: final activation function to apply to model output
+        """
+        super().__init__()
+
+        self.activation = activation
+        self.loss = loss
+        self.optimizer = optimizer
+        self.final_activation = final_activation
+        self.metrics = metrics
+        self.l2_alpha = l2_alpha
+        self.activity_regularizer = activity_regularizer
+        self.final_activation = final_activation
+
+        if self.activation is None:
+            self.activation = "relu"
+        if self.loss is None:
+            self.loss = CategoricalCrossentropy()
+        if optimizer is None:
+            self.optimizer = Adam(learning_rate=0.01, clipnorm=0.001)
+        if self.metrics is None:
+            self.metrics = [F1Score(), Precision(), Recall()]
+        if self.activity_regularizer is None:
+            self.activity_regularizer = l1(0.0)
+        if self.final_activation is None:
+            self.final_activation = "softmax"
+        self.model = None
+        self._predict_fn = None
+
+    def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20,
+            validation_split: float = 0.2, callbacks=None,
+            patience: int = 15, es_monitor: str = "val_loss",
+            es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False):
+        """Fit a model to the given `SampleSet`(s).
+
+        Args:
+            ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either
+                foreground (AKA, "net") or gross.
+            batch_size: number of samples per gradient update
+            epochs: maximum number of training iterations
+            validation_split: percentage of the training data to use as validation data
+            callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method
+            patience: number of epochs to wait for `EarlyStopping` object
+            es_monitor: quantity to be monitored for `EarlyStopping` object
+            es_mode: mode for `EarlyStopping` object
+            es_verbose: verbosity level for `EarlyStopping` object
+            target_level: `SampleSet.sources` column level to use
+            verbose: whether to show detailed model training output
+
+        Returns:
+            `tf.History` object.
+
+        Raises:
+            `ValueError` when no spectra are provided as input
+        """
+        if ss.n_samples <= 0:
+            raise ValueError("No spectr[a|um] provided!")
+
+        if ss.spectra_type == SpectraType.Gross:
+            self.model_inputs = (ModelInput.GrossSpectrum,)
+        elif ss.spectra_type == SpectraType.Foreground:
+            self.model_inputs = (ModelInput.ForegroundSpectrum,)
+        elif ss.spectra_type == SpectraType.Background:
+            self.model_inputs = (ModelInput.BackgroundSpectrum,)
+        else:
+            raise ValueError(f"{ss.spectra_type} is not supported in this model.")
+
+        X = ss.get_samples()
+        source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T
+        model_outputs = source_contributions_df.columns.values.tolist()
+        Y = source_contributions_df.values
+
+        spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
+        labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32)
+        training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor))
+        training_dataset, validation_dataset = split_dataset(
+            training_dataset,
+            left_size=validation_split,
+            shuffle=True
+        )
+        training_dataset = training_dataset.batch(batch_size=batch_size)
+        validation_dataset = validation_dataset.batch(batch_size=batch_size)
+
+        if not self.model:
+            inputs = Input(shape=(X.shape[1],), name="Spectrum")
+            dense_layer_size = X.shape[1] // 2
+            dense_layer = Dense(
+                dense_layer_size,
+                activation=self.activation,
+                activity_regularizer=self.activity_regularizer,
+                kernel_regularizer=l2(self.l2_alpha),
+            )(inputs)
+            outputs = Dense(Y.shape[1], activation=self.final_activation)(dense_layer)
+            self.model = Model(inputs, outputs)
+            self.model.compile(loss=self.loss, optimizer=self.optimizer,
+                               metrics=self.metrics)
+
+        es = EarlyStopping(
+            monitor=es_monitor,
+            patience=patience,
+            verbose=es_verbose,
+            restore_best_weights=True,
+            mode=es_mode,
+        )
+        if callbacks:
+            callbacks.append(es)
+        else:
+            callbacks = [es]
+
+        history = self.model.fit(
+            training_dataset,
+            epochs=epochs,
+            verbose=verbose,
+            validation_data=validation_dataset,
+            callbacks=callbacks,
+         )
+
+        # Update model information
+        self._update_info(
+            target_level=target_level,
+            model_outputs=model_outputs,
+            normalization=ss.spectra_state,
+        )
+
+        # Define the predict function with tf.function and input_signature
+        self._predict_fn = tf.function(
+            self._predict,
+            # input_signature=[tf.TensorSpec(shape=[None, X.shape[1]], dtype=tf.float32)]
+            experimental_relax_shapes=True
+        )
+
+        return history
+
+    def _predict(self, input_tensor):
+        return self.model(input_tensor, training=False)
+
+    def predict(self, ss: SampleSet, bg_ss: SampleSet = None):
+        """Classify the spectra in the provided `SampleSet`(s).
+
+        Results are stored inside the first SampleSet's prediction-related properties.
+
+        Args:
+            ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either
+                foreground (AKA, "net") or gross
+            bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background
+        """
+        x_test = ss.get_samples().astype(float)
+        if bg_ss:
+            X = [x_test, bg_ss.get_samples().astype(float)]
+        else:
+            X = x_test
+
+        spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
+        results = self._predict_fn(spectra_tensor)
+
+        col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level)
+        col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1]
+        ss.prediction_probas = pd.DataFrame(
+            data=results,
+            columns=pd.MultiIndex.from_tuples(
+                self.get_model_outputs_as_label_tuples(),
+                names=col_level_subset
+            )
+        )
+
+        ss.classified_by = self.model_id
diff --git a/riid/models/neural_nets/lpe.py b/riid/models/neural_nets/lpe.py
new file mode 100644
index 00000000..346d5b3f
--- /dev/null
+++ b/riid/models/neural_nets/lpe.py
@@ -0,0 +1,489 @@
+# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+# Under the terms of Contract DE-NA0003525 with NTESS,
+# the U.S. Government retains certain rights in this software.
+"""This module contains the label proportion estimator."""
+
+import keras
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+from keras.api.activations import sigmoid, softmax
+from keras.api.callbacks import EarlyStopping
+from keras.api.layers import Dense, Dropout, Input
+from keras.api.losses import CategoricalCrossentropy, MeanSquaredError
+from keras.api.models import Model
+from keras.api.regularizers import L1L2
+from scipy.interpolate import UnivariateSpline
+
+from riid import SampleSet
+from riid.losses import (build_keras_semisupervised_loss_func,
+                         chi_squared_diff, jensen_shannon_divergence,
+                         normal_nll_diff, poisson_nll_diff,
+                         reconstruction_error, sse_diff, weighted_sse_diff)
+from riid.losses.sparsemax import SparsemaxLoss, sparsemax
+from riid.metrics import build_keras_semisupervised_metric_func
+from riid.models.base import PyRIIDModel
+from riid.models.layers import L1NormLayer
+
+
+class LabelProportionEstimator(PyRIIDModel):
+    """Regressor for predicting label proportions that uses a semi-supervised loss.
+
+    Optionally, a U-spline-based out-of-distribution detection model can be fit to target a desired
+    false positive rate.
+    """
+    UNSUPERVISED_LOSS_FUNCS = {
+        "poisson_nll": poisson_nll_diff,
+        "normal_nll": normal_nll_diff,
+        "sse": sse_diff,
+        "weighted_sse": weighted_sse_diff,
+        "jsd": jensen_shannon_divergence,
+        "chi_squared": chi_squared_diff
+    }
+    SUPERVISED_LOSS_FUNCS = {
+        "sparsemax": (
+            SparsemaxLoss,
+            {
+                "from_logits": True,
+                "reduction": tf.keras.losses.Reduction.NONE,
+            },
+            sparsemax,
+        ),
+        "categorical_crossentropy": (
+            CategoricalCrossentropy,
+            {
+                "from_logits": True,
+                "reduction": tf.keras.losses.Reduction.NONE,
+            },
+            softmax,
+        ),
+        "mse": (
+            MeanSquaredError,
+            {
+                "reduction": tf.keras.losses.Reduction.NONE,
+            },
+            sigmoid,
+        )
+    }
+    INFO_KEYS = (
+        # model architecture
+        "hidden_layers",
+        "learning_rate",
+        "epsilon",
+        "sup_loss",
+        "unsup_loss",
+        "metrics",
+        "beta",
+        "hidden_layer_activation",
+        "kernel_l1_regularization",
+        "kernel_l2_regularization",
+        "bias_l1_regularization",
+        "bias_l2_regularization",
+        "activity_l1_regularization",
+        "activity_l2_regularization",
+        "dropout",
+        "ood_fp_rate",
+        "fit_spline",
+        "spline_bins",
+        "spline_k",
+        "spline_s",
+        # dictionaries
+        "source_dict",
+        # populated when loading model
+        "spline_snrs",
+        "spline_recon_errors",
+    )
+
+    def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_loss="sse",
+                 metrics: list = ["mae", "categorical_crossentropy"], beta=0.9, source_dict=None,
+                 optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3,
+                 hidden_layer_activation: str = "mish",
+                 kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0,
+                 bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0,
+                 activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0,
+                 dropout: float = 0.0, ood_fp_rate: float = 0.05,
+                 fit_spline: bool = True, spline_bins: int = 15, spline_k: int = 3,
+                 spline_s: int = 0, spline_snrs=None, spline_recon_errors=None):
+        """
+        Args:
+            hidden_layers: tuple defining the number and size of dense layers
+            sup_loss: supervised loss function to use for training
+            unsup_loss: unsupervised loss function to use for training the
+                foreground branch of the network (options: "sse", "poisson_nll",
+                "normal_nll", "weighted_sse", "jsd", or "chi_squared")
+            metrics: list of metrics to be evaluating during training
+            beta: tradeoff parameter between the supervised and unsupervised foreground loss
+            source_dict: 2D array of pure, long-collect foreground spectra
+            optimizer: tensorflow optimizer or optimizer name to use for training
+            optimizer_kwargs: kwargs for optimizer
+            learning_rate: learning rate for the optimizer
+            hidden_layer_activation: activation function to use for each dense layer
+            kernel_l1_regularization: l1 regularization value for the kernel regularizer
+            kernel_l2_regularization: l2 regularization value for the kernel regularizer
+            bias_l1_regularization: l1 regularization value for the bias regularizer
+            bias_l2_regularization: l2 regularization value for the bias regularizer
+            activity_l1_regularization: l1 regularization value for the activity regularizer
+            activity_l2_regularization: l2 regularization value for the activity regularizer
+            dropout: amount of dropout to apply to each dense layer
+            ood_fp_rate: false positive rate used to determine threshold for
+                out-of-distribution (OOD) detection
+            fit_spline: whether or not to fit UnivariateSpline for OOD threshold function
+            spline_bins: number of bins used when fitting the UnivariateSpline threshold
+                function for OOD detection
+            spline_k: degree of smoothing for the UnivariateSpline
+            spline_s: positive smoothing factor used to choose the number of knots in the
+                UnivariateSpline (s=0 forces the spline through all the datapoints, equivalent to
+                InterpolatedUnivariateSpline)
+            spline_snrs: SNRs from training used as the x-values to fit the UnivariateSpline
+            spline_recon_errors: reconstruction errors from training used as the y-values to
+                fit the UnivariateSpline
+        """
+        super().__init__()
+
+        self.hidden_layers = hidden_layers
+        self.sup_loss = sup_loss
+        self.unsup_loss = unsup_loss
+        self.sup_loss_func, self.activation = self._get_sup_loss_func(
+            sup_loss,
+            prefix="sup"
+        )
+        self.sup_loss_func_name = self.sup_loss_func.name
+
+        self.optimizer = optimizer
+        if isinstance(optimizer, str):
+            self.optimizer = keras.optimizers.get(optimizer)
+        if optimizer_kwargs is not None:
+            for key, value in optimizer_kwargs.items():
+                setattr(self.optimizer, key, value)
+        self.optimizer.learning_rate = learning_rate
+
+        self.unsup_loss_func = self._get_unsup_loss_func(unsup_loss)
+        self.unsup_loss_func_name = f"unsup_{unsup_loss}_loss"
+        self.metrics = metrics
+        self.beta = beta
+        self.source_dict = source_dict
+        self.semisup_loss_func_name = "semisup_loss"
+        self.hidden_layer_activation = hidden_layer_activation
+        self.kernel_l1_regularization = kernel_l1_regularization
+        self.kernel_l2_regularization = kernel_l2_regularization
+        self.bias_l1_regularization = bias_l1_regularization
+        self.bias_l2_regularization = bias_l2_regularization
+        self.activity_l1_regularization = activity_l1_regularization
+        self.activity_l2_regularization = activity_l2_regularization
+        self.dropout = dropout
+        self.ood_fp_rate = ood_fp_rate
+        self.fit_spline = fit_spline
+        self.spline_bins = spline_bins
+        self.spline_k = spline_k
+        self.spline_s = spline_s
+        self.spline_snrs = spline_snrs
+        self.spline_recon_errors = spline_recon_errors
+        self.model = None
+
+        self._update_custom_objects("L1NormLayer", L1NormLayer)
+
+    @property
+    def source_dict(self) -> dict:
+        return self.info["source_dict"]
+
+    @source_dict.setter
+    def source_dict(self, value: dict):
+        self.info["source_dict"] = value
+
+    def _get_sup_loss_func(self, loss_func_str, prefix):
+        if loss_func_str not in self.SUPERVISED_LOSS_FUNCS:
+            raise KeyError(f"'{loss_func_str}' is not a supported supervised loss function.")
+        func, kwargs, activation = self.SUPERVISED_LOSS_FUNCS[loss_func_str]
+        loss_func_name = f"{prefix}_{loss_func_str}_loss"
+        return func(name=loss_func_name, **kwargs), activation
+
+    def _get_unsup_loss_func(self, loss_func_str):
+        if loss_func_str not in self.UNSUPERVISED_LOSS_FUNCS:
+            raise KeyError(f"'{loss_func_str}' is not a supported unsupervised loss function.")
+        return self.UNSUPERVISED_LOSS_FUNCS[loss_func_str]
+
+    def _initialize_model(self, input_size, output_size):
+        spectra_input = Input(input_size, name="input_spectrum")
+        spectra_norm = L1NormLayer(name="normalized_input_spectrum")(spectra_input)
+        x = spectra_norm
+        for layer, nodes in enumerate(self.hidden_layers):
+            x = Dense(
+                nodes,
+                activation=self.hidden_layer_activation,
+                kernel_regularizer=L1L2(
+                    l1=self.kernel_l1_regularization,
+                    l2=self.kernel_l2_regularization
+                ),
+                bias_regularizer=L1L2(
+                    l1=self.bias_l1_regularization,
+                    l2=self.bias_l2_regularization
+                ),
+                activity_regularizer=L1L2(
+                    l1=self.activity_l1_regularization,
+                    l2=self.activity_l2_regularization
+                ),
+                name=f"dense_{layer}"
+            )(x)
+
+            if self.dropout > 0:
+                x = Dropout(self.dropout)(x)
+        output = Dense(
+            output_size,
+            activation="linear",
+            name="output"
+        )(x)
+
+        self.model = Model(inputs=[spectra_input], outputs=[output])
+
+    def _get_info_as_dict(self):
+        info_dict = {}
+        for k, v in vars(self).items():
+            if k not in self.INFO_KEYS:
+                continue
+            if isinstance(v, np.ndarray):
+                info_dict[k] = v.tolist()
+            else:
+                info_dict[k] = v
+        return info_dict
+
+    def _get_spline_threshold_func(self):
+        return UnivariateSpline(
+            self.info["avg_snrs"],
+            self.info["thresholds"],
+            k=self.spline_k,
+            s=self.spline_s
+        )
+
+    def _fit_spline_threshold_func(self):
+        out = pd.qcut(
+            np.array(self.spline_snrs),
+            self.spline_bins,
+            labels=False,
+        )
+        thresholds = [
+            np.quantile(np.array(self.spline_recon_errors)[out == int(i)], 1-self.ood_fp_rate)
+            for i in range(self.spline_bins)
+        ]
+        avg_snrs = [
+            np.mean(np.array(self.spline_snrs)[out == int(i)]) for i in range(self.spline_bins)
+        ]
+        self._update_info(
+            avg_snrs=avg_snrs,
+            thresholds=thresholds,
+            spline_k=self.spline_k,
+            spline_s=self.spline_s,
+        )
+
+    def _get_snrs(self, ss: SampleSet, bg_cps: float, is_gross: bool) -> np.ndarray:
+        fg_counts = ss.info.total_counts.values.astype("float64")
+        bg_counts = ss.info.live_time.values * bg_cps
+        if is_gross:
+            fg_counts = fg_counts - bg_counts
+        snrs = fg_counts / np.sqrt(bg_counts)
+        return snrs
+
+    def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False,
+            batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2,
+            callbacks=None, patience: int = 15, es_monitor: str = "val_loss",
+            es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0,
+            normalize_sup_loss: bool = True, normalize_func=tf.math.tanh,
+            normalize_scaler: float = 1.0, target_level="Isotope", verbose: bool = False):
+        """Fit a model to the given SampleSet(s).
+
+        Args:
+            seeds_ss: `SampleSet` of pure, long-collect spectra
+            ss: `SampleSet` of `n` gross or foreground spectra where `n` >= 1
+            bg_cps: background rate assumption used for calculating SNR in spline function
+                using in OOD detection
+            is_gross: whether `ss` contains gross spectra
+            batch_size: number of samples per gradient update
+            epochs: maximum number of training iterations
+            validation_split: proportion of training data to use as validation data
+            callbacks: list of callbacks to be passed to TensorFlow Model.fit() method
+            patience: number of epochs to wait for `EarlyStopping` object
+            es_monitor: quantity to be monitored for `EarlyStopping` object
+            es_mode: mode for `EarlyStopping` object
+            es_verbose: verbosity level for `EarlyStopping` object
+            es_min_delta: minimum change to count as an improvement for early stopping
+            normalize_sup_loss: whether to normalize the supervised loss term
+            normalize_func: normalization function used for supervised loss term
+            normalize_scaler: scalar that sets the steepness of the normalization function
+            target_level: source level to target for model output
+            verbose: whether model training output is printed to the terminal
+        """
+        spectra = ss.get_samples().astype(float)
+        sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T
+        sources = sources_df.values.astype(float)
+        self.sources_columns = sources_df.columns
+
+        if verbose:
+            print("Building dictionary...")
+
+        if self.source_dict is None:
+            self.source_dict = _get_reordered_spectra(
+                seeds_ss.spectra,
+                seeds_ss.sources,
+                self.sources_columns,
+                target_level=target_level
+            ).values
+
+        if not self.model:
+            if verbose:
+                print("Initializing model...")
+            self._initialize_model(
+                (ss.n_channels,),
+                sources.shape[1],
+            )
+        elif verbose:
+            print("Model already initialized.")
+
+        if verbose:
+            print("Building loss functions...")
+
+        self.semisup_loss_func = build_keras_semisupervised_loss_func(
+            self.sup_loss_func,
+            self.unsup_loss_func,
+            self.source_dict,
+            self.beta,
+            self.activation,
+            n_labels=sources.shape[1],
+            normalize=normalize_sup_loss,
+            normalize_func=normalize_func,
+            normalize_scaler=normalize_scaler
+        )
+
+        semisup_metrics = None
+        if self.metrics:
+            if verbose:
+                print("Building metric functions...")
+            semisup_metrics = []
+            for each in self.metrics:
+                if isinstance(each, str):
+                    semisup_metrics.append(
+                        build_keras_semisupervised_metric_func(
+                            tf.keras.metrics.get(each),
+                            self.activation,
+                            sources.shape[1]
+                        )
+                    )
+                else:
+                    semisup_metrics.append(
+                        build_keras_semisupervised_metric_func(
+                            each,
+                            self.activation,
+                            sources.shape[1]
+                        )
+                    )
+
+        self.model.compile(
+            loss=self.semisup_loss_func,
+            optimizer=self.optimizer,
+            metrics=semisup_metrics
+        )
+
+        es = EarlyStopping(
+            monitor=es_monitor,
+            patience=patience,
+            verbose=es_verbose,
+            restore_best_weights=True,
+            mode=es_mode,
+            min_delta=es_min_delta,
+        )
+
+        if callbacks:
+            callbacks.append(es)
+        else:
+            callbacks = [es]
+
+        history = self.model.fit(
+            spectra,
+            np.append(sources, spectra, axis=1),
+            epochs=epochs,
+            verbose=verbose,
+            validation_split=validation_split,
+            callbacks=callbacks,
+            shuffle=True,
+            batch_size=batch_size
+        )
+
+        if self.fit_spline:
+            if verbose:
+                print("Finding OOD detection threshold function...")
+
+            train_logits = self.model.predict(spectra, verbose=0)
+            train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32))
+            self.spline_recon_errors = reconstruction_error(
+                tf.convert_to_tensor(spectra, dtype=tf.float32),
+                train_lpes,
+                self.source_dict,
+                self.unsup_loss_func
+            ).numpy()
+            self.spline_snrs = self._get_snrs(ss, bg_cps, is_gross)
+            self._fit_spline_threshold_func()
+
+        info = self._get_info_as_dict()
+        self._update_info(
+            target_level=target_level,
+            model_outputs=sources_df.columns.values.tolist(),
+            normalization=ss.spectra_state,
+            **info,
+        )
+
+        return history
+
+    def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verbose=False):
+        """Estimate the proportions of counts present in each sample of the provided SampleSet.
+
+        Results are stored inside the SampleSet's prediction_probas property.
+
+        Args:
+            ss: `SampleSet` of `n` foreground or gross spectra where `n` >= 1
+            bg_cps: background rate used for estimating sample SNRs.
+                If background rate varies to a significant degree, split up sampleset
+                by SNR and make multiple calls to this method.
+            is_gross: whether `ss` contains gross spectra
+        """
+        test_spectra = ss.get_samples().astype(float)
+
+        logits = self.model.predict(test_spectra, verbose=verbose)
+        lpes = self.activation(tf.convert_to_tensor(logits, dtype=tf.float32))
+
+        col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level)
+        col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1]
+        ss.prediction_probas = pd.DataFrame(
+            data=lpes,
+            columns=pd.MultiIndex.from_tuples(
+               self.get_model_outputs_as_label_tuples(),
+               names=col_level_subset
+            )
+        )
+
+        # Fill in unsupervised losses
+        recon_errors = reconstruction_error(
+            tf.convert_to_tensor(test_spectra, dtype=tf.float32),
+            lpes,
+            self.source_dict,
+            self.unsup_loss_func
+        ).numpy()
+
+        if self.fit_spline:
+            snrs = self._get_snrs(ss, bg_cps, is_gross)
+            thresholds = self._get_spline_threshold_func()(snrs)
+            is_ood = recon_errors > thresholds
+            ss.info["ood"] = is_ood
+
+        ss.info["recon_error"] = recon_errors
+
+
+def _get_reordered_spectra(old_spectra_df: pd.DataFrame, old_sources_df: pd.DataFrame,
+                           new_sources_columns, target_level) -> pd.DataFrame:
+    collapsed_sources_df = old_sources_df\
+        .T.groupby(target_level)\
+        .sum().T
+    reordered_spectra_df = old_spectra_df.iloc[
+        collapsed_sources_df[
+            new_sources_columns
+        ].idxmax()
+    ].reset_index(drop=True)
+
+    return reordered_spectra_df
diff --git a/riid/visualize.py b/riid/visualize.py
index 81dc3c3d..ef72aa93 100644
--- a/riid/visualize.py
+++ b/riid/visualize.py
@@ -15,7 +15,7 @@
 from seaborn import heatmap
 from sklearn.metrics import confusion_matrix as confusion_matrix_sklearn
 
-from riid.data.sampleset import SampleSet
+from riid import SampleSet
 
 # DO NOT TOUCH what is set below nor override them inside a function.
 plt.style.use("default")
diff --git a/tests/anomaly_tests.py b/tests/anomaly_tests.py
index 35143a1d..daddb95d 100644
--- a/tests/anomaly_tests.py
+++ b/tests/anomaly_tests.py
@@ -6,10 +6,8 @@
 
 import numpy as np
 
+from riid import PassbySynthesizer, SeedMixer, get_dummy_seeds
 from riid.anomaly import PoissonNChannelEventDetector
-from riid.data.synthetic.passby import PassbySynthesizer
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic import get_dummy_seeds
 
 
 class TestAnomaly(unittest.TestCase):
diff --git a/tests/data_tests.py b/tests/data_tests.py
index 022bedb7..cc8a1c90 100644
--- a/tests/data_tests.py
+++ b/tests/data_tests.py
@@ -6,10 +6,10 @@
 import tempfile
 import unittest
 
-from riid import SAMPLESET_HDF_FILE_EXTENSION
+from riid import (SAMPLESET_HDF_FILE_EXTENSION, SampleSet, get_dummy_seeds,
+                  read_hdf)
 from riid.data.labeling import label_to_index_element
-from riid.data.sampleset import SampleSet, _write_hdf, read_hdf
-from riid.data.synthetic import get_dummy_seeds
+from riid.data.sampleset import _write_hdf
 
 
 class TestData(unittest.TestCase):
diff --git a/tests/gadras_tests.py b/tests/gadras_tests.py
index 9210312d..680048eb 100644
--- a/tests/gadras_tests.py
+++ b/tests/gadras_tests.py
@@ -5,7 +5,8 @@
 import unittest
 
 import pandas as pd
-from riid.data.synthetic import get_dummy_seeds
+
+from riid import get_dummy_seeds
 from riid.gadras.pcf import (_pack_compressed_text_buffer,
                              _unpack_compressed_text_buffer)
 
diff --git a/tests/model_tests.py b/tests/model_tests.py
index ea430abc..95ff6897 100644
--- a/tests/model_tests.py
+++ b/tests/model_tests.py
@@ -8,15 +8,12 @@
 import numpy as np
 import pandas as pd
 
-from riid.data.sampleset import SampleSet
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
-from riid.models import PyRIIDModel
-from riid.models.bayes import (NegativeSpectrumError, PoissonBayesClassifier,
-                               ZeroTotalCountsError)
-from riid.models.neural_nets import (LabelProportionEstimator, MLPClassifier)
-from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv1, ARADv2
+from riid import SampleSet, SeedMixer, StaticSynthesizer, get_dummy_seeds
+from riid.models import (ARADLatentPredictor, ARADv1, ARADv2,
+                         LabelProportionEstimator, MLPClassifier,
+                         PoissonBayesClassifier)
+from riid.models.base import PyRIIDModel
+from riid.models.bayes import NegativeSpectrumError, ZeroTotalCountsError
 
 
 class TestModels(unittest.TestCase):
diff --git a/tests/sampleset_tests.py b/tests/sampleset_tests.py
index 63a5a969..887f5308 100644
--- a/tests/sampleset_tests.py
+++ b/tests/sampleset_tests.py
@@ -8,13 +8,11 @@
 import numpy as np
 import pandas as pd
 
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 from riid.data.sampleset import (ChannelCountMismatchError,
                                  InvalidSampleCountError, SampleSet,
                                  SpectraState, SpectraStateMismatchError,
                                  SpectraType, _get_row_labels)
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
 
 
 class TestSampleSet(unittest.TestCase):
diff --git a/tests/seedmixer_tests.py b/tests/seedmixer_tests.py
index e986f891..00ac6280 100644
--- a/tests/seedmixer_tests.py
+++ b/tests/seedmixer_tests.py
@@ -6,10 +6,8 @@
 
 import numpy as np
 from scipy.spatial.distance import jensenshannon
-from riid.data.sampleset import SampleSet
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
+from riid import SampleSet, SeedMixer, get_dummy_seeds
 
 
 class TestSeedMixer(unittest.TestCase):
diff --git a/tests/staticsynth_tests.py b/tests/staticsynth_tests.py
index 91684708..afbd297a 100644
--- a/tests/staticsynth_tests.py
+++ b/tests/staticsynth_tests.py
@@ -7,12 +7,11 @@
 import numpy as np
 import pandas as pd
 
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 from riid.data import InvalidSeedError, get_expected_spectra
-from riid.data.synthetic import (Synthesizer, get_dummy_seeds,
-                                 get_merged_sources_samplewise,
-                                 get_samples_per_seed)
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
+from riid.data.synthetic.base import (Synthesizer,
+                                      get_merged_sources_samplewise,
+                                      get_samples_per_seed)
 
 
 class TestStaticSynthesis(unittest.TestCase):
diff --git a/tests/visualize_tests.py b/tests/visualize_tests.py
index 151e9f48..ec318227 100644
--- a/tests/visualize_tests.py
+++ b/tests/visualize_tests.py
@@ -6,11 +6,9 @@
 
 import numpy as np
 
-from riid.data.synthetic import get_dummy_seeds
-from riid.data.synthetic.seed import SeedMixer
-from riid.data.synthetic.static import StaticSynthesizer
+from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds
 from riid.metrics import precision_recall_curve
-from riid.models.neural_nets import MLPClassifier
+from riid.models import MLPClassifier
 from riid.visualize import (plot_correlation_between_all_labels,
                             plot_count_rate_history,
                             plot_label_and_prediction_distributions,