From e9ab4c1988a262690752eb3b856658b61072348b Mon Sep 17 00:00:00 2001 From: Tyler Morrow Date: Thu, 15 Aug 2024 09:03:41 -0600 Subject: [PATCH] Move common imports to top-level; fix model saving bugs. --- examples/courses/Primer 1/Primer1.ipynb | 68 +- examples/data/conversion/pcf_to_ss.py | 3 +- examples/data/difficulty_score.py | 4 +- .../data/preprocessing/energy_calibration.py | 4 +- examples/data/synthesis/mix_seeds.py | 4 +- examples/data/synthesis/synthesize_passbys.py | 3 +- examples/data/synthesis/synthesize_seeds.py | 2 +- .../synthesis/synthesize_seeds_advanced.py | 2 +- examples/data/synthesis/synthesize_spectra.py | 4 +- examples/modeling/anomaly_detection.py | 4 +- examples/modeling/arad.py | 6 +- examples/modeling/arad_latent_prediction.py | 6 +- examples/modeling/classifier_comparison.py | 7 +- .../modeling/label_proportion_estimation.py | 6 +- .../modeling/neural_network_classifier.py | 6 +- examples/run_examples.py | 1 + examples/visualization/confusion_matrix.py | 6 +- examples/visualization/distance_matrix.py | 2 +- .../plot_sampleset_compare_to.py | 4 +- examples/visualization/plot_spectra.py | 2 +- riid/__init__.py | 12 +- riid/data/converters/aipt.py | 3 +- riid/data/converters/topcoder.py | 3 +- riid/data/synthetic/__init__.py | 374 +--------- riid/data/synthetic/base.py | 290 ++++++++ riid/data/synthetic/passby.py | 4 +- riid/data/synthetic/seed.py | 88 ++- riid/data/synthetic/static.py | 4 +- riid/gadras/api.py | 12 +- riid/metrics.py | 2 +- riid/models/__init__.py | 245 +------ riid/models/base.py | 256 +++++++ riid/models/bayes.py | 4 +- riid/models/neural_nets/__init__.py | 669 +----------------- riid/models/neural_nets/arad.py | 6 +- riid/models/neural_nets/basic.py | 197 ++++++ riid/models/neural_nets/lpe.py | 489 +++++++++++++ riid/visualize.py | 2 +- tests/anomaly_tests.py | 4 +- tests/data_tests.py | 6 +- tests/gadras_tests.py | 3 +- tests/model_tests.py | 15 +- tests/sampleset_tests.py | 4 +- tests/seedmixer_tests.py | 4 +- tests/staticsynth_tests.py | 9 +- tests/visualize_tests.py | 6 +- 46 files changed, 1442 insertions(+), 1413 deletions(-) create mode 100644 riid/data/synthetic/base.py create mode 100644 riid/models/base.py create mode 100644 riid/models/neural_nets/basic.py create mode 100644 riid/models/neural_nets/lpe.py diff --git a/examples/courses/Primer 1/Primer1.ipynb b/examples/courses/Primer 1/Primer1.ipynb index ffe4714d..459e3ef8 100755 --- a/examples/courses/Primer 1/Primer1.ipynb +++ b/examples/courses/Primer 1/Primer1.ipynb @@ -155,7 +155,7 @@ "from riid.gadras.api import GADRAS_API_SEEMINGLY_AVAILABLE\n", "\n", "if GADRAS_API_SEEMINGLY_AVAILABLE:\n", - " from riid.data.synthetic.seed import SeedSynthesizer\n", + " from riid import SeedSynthesizer\n", " seed_syn = SeedSynthesizer()\n", " # The YAML file defining the seed synthesis specification is ultimately parsed into a dictionary.\n", " # You can also load it yourself and pass in the dictionary instead - this is useful for varying detector parameters!\n", @@ -163,7 +163,7 @@ "else:\n", " # If you don't have Windows with GADRAS installed, this will use the dummy seeds below which are not actual gamma spectra.\n", " # Another option would be to load a seeds file obtained elsewhere.\n", - " from riid.data.synthetic import get_dummy_seeds\n", + " from riid import get_dummy_seeds\n", " seeds_ss = get_dummy_seeds()" ] }, @@ -251,7 +251,7 @@ "outputs": [], "source": [ "\"\"\"Seed mixing\"\"\"\n", - "from riid.data.synthetic.seed import SeedMixer\n", + "from riid import SeedMixer\n", "\n", "mixed_bg_seeds_ss = SeedMixer(\n", " bg_seeds_ss,\n", @@ -278,7 +278,7 @@ "outputs": [], "source": [ "\"\"\"Combining SampleSets\"\"\"\n", - "from riid.data.sampleset import SampleSet\n", + "from riid import SampleSet\n", "\n", "combined_ss = SampleSet()\n", "combined_ss.concat([fg_seeds_ss, mixed_bg_seeds_ss])\n", @@ -318,14 +318,14 @@ "outputs": [], "source": [ "\"\"\"Static Synthesis\"\"\"\n", - "from riid.data.synthetic.static import StaticSynthesizer\n", + "from riid import StaticSynthesizer\n", "\n", "static_syn = StaticSynthesizer(\n", " samples_per_seed=100,\n", " bg_cps=300,\n", " live_time_function=\"uniform\",\n", " live_time_function_args=(0.25, 8),\n", - " snr_function=\"uniform\",\n", + " snr_function=\"log10\",\n", " snr_function_args=(0.1, 100),\n", " apply_poisson_noise=True,\n", " return_fg=True,\n", @@ -348,8 +348,7 @@ "outputs": [], "source": [ "\"\"\"Normalization\"\"\"\n", - "gross_ss.normalize()\n", - "bg_ss.normalize()" + "fg_ss.normalize()" ] }, { @@ -371,16 +370,10 @@ "outputs": [], "source": [ "\"\"\"Model fitting\"\"\"\n", - "from riid.models.neural_nets import MLPClassifier\n", - "from riid.metrics import single_f1\n", + "from riid.models import MLPClassifier\n", "\n", - "model = MLPClassifier(\n", - " hidden_layers=(256,),\n", - " learning_rate=4e-3,\n", - " metrics=[single_f1]\n", - ")\n", - "\n", - "history = model.fit(gross_ss, bg_ss, epochs=25, patience=5, verbose=True)" + "model = MLPClassifier()\n", + "history = model.fit(fg_ss, epochs=10, verbose=True)" ] }, { @@ -402,12 +395,9 @@ "outputs": [], "source": [ "\"\"\"Generate some in-distribution data the model has not seen.\"\"\"\n", - "test_bg_ss, test_gross_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss)\n", - "test_bg_ss.normalize()\n", - "test_gross_ss.normalize()\n", - "# Adjust ground truth\n", - "#test_gross_ss.sources.drop(test_bg_ss.sources.columns, axis=1, inplace=True)\n", - "#test_gross_ss.normalize_sources()" + "test_fg_ss, test_gross_ss = static_syn.generate(fg_seeds_ss, bg_seeds_ss)\n", + "test_fg_ss.normalize()\n", + "test_gross_ss.normalize()" ] }, { @@ -417,7 +407,7 @@ "outputs": [], "source": [ "\"\"\"Use the model!\"\"\"\n", - "model.predict(test_gross_ss, test_bg_ss) # Saved in your SampleSet containing non-background sources (the gross spectra)" + "model.predict(test_fg_ss) # Results are saved in the SampleSet's prediction_probas DataFrame" ] }, { @@ -429,8 +419,8 @@ "\"\"\"Calculate performance metric\"\"\"\n", "from sklearn.metrics import f1_score\n", "\n", - "labels = test_gross_ss.get_labels()\n", - "predictions = test_gross_ss.get_predictions()\n", + "labels = test_fg_ss.get_labels()\n", + "predictions = test_fg_ss.get_predictions()\n", "f1_score(labels, predictions, average=\"micro\")" ] }, @@ -443,7 +433,7 @@ "\"\"\"Confusion Matrix\"\"\"\n", "from riid.visualize import confusion_matrix\n", "\n", - "_ = confusion_matrix(test_gross_ss)" + "_ = confusion_matrix(test_fg_ss)" ] }, { @@ -455,7 +445,7 @@ "\"\"\"SNR vs. Model Score\"\"\"\n", "from riid.visualize import plot_snr_vs_score\n", "\n", - "_ = plot_snr_vs_score(test_gross_ss, xscale=\"log\")" + "_ = plot_snr_vs_score(test_fg_ss, xscale=\"log\")" ] }, { @@ -465,13 +455,23 @@ "outputs": [], "source": [ "\"\"\"Save model\"\"\"\n", - "import os\n", + "from pathlib import Path\n", + "\n", + "\n", + "def _delete_if_exists(path: Path):\n", + " if path.exists():\n", + " path.unlink()\n", "\n", - "model_path = \"./model.h5\"\n", - "if os.path.exists(model_path):\n", - " os.remove(model_path)\n", + "model_path_json = Path(\"./model.json\")\n", + "model_path_tflite = model_path_json.with_suffix(\".tflite\")\n", + "model_path_onnx = model_path_json.with_suffix(\".onnx\")\n", + "_delete_if_exists(model_path_json)\n", + "_delete_if_exists(model_path_tflite)\n", + "_delete_if_exists(model_path_onnx)\n", "\n", - "model.save(model_path)" + "model.save(str(model_path_json))\n", + "model.to_tflite(str(model_path_tflite))\n", + "model.to_onnx(str(model_path_onnx))" ] }, { @@ -571,7 +571,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.12.4" }, "orig_nbformat": 4, "vscode": { diff --git a/examples/data/conversion/pcf_to_ss.py b/examples/data/conversion/pcf_to_ss.py index 0a5b6d65..e94e0148 100644 --- a/examples/data/conversion/pcf_to_ss.py +++ b/examples/data/conversion/pcf_to_ss.py @@ -12,10 +12,9 @@ import os from pathlib import Path -from riid import SAMPLESET_HDF_FILE_EXTENSION +from riid import SAMPLESET_HDF_FILE_EXTENSION, read_pcf from riid.data.converters import (_validate_and_create_output_dir, convert_directory) -from riid.data.sampleset import read_pcf def convert_and_save(input_file_path: str, output_dir: str = None, diff --git a/examples/data/difficulty_score.py b/examples/data/difficulty_score.py index 103d9e1b..6f838ea6 100644 --- a/examples/data/difficulty_score.py +++ b/examples/data/difficulty_score.py @@ -2,9 +2,7 @@ # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. """This example demonstrates how to compute the difficulty of a given SampleSet.""" -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg() mixed_bg_seed_ss = SeedMixer(bg_seeds_ss, mixture_size=3)\ diff --git a/examples/data/preprocessing/energy_calibration.py b/examples/data/preprocessing/energy_calibration.py index 25061fea..03cefc28 100644 --- a/examples/data/preprocessing/energy_calibration.py +++ b/examples/data/preprocessing/energy_calibration.py @@ -8,9 +8,7 @@ import matplotlib.pyplot as plt import numpy as np -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds SYNTHETIC_DATA_CONFIG = { "samples_per_seed": 10, diff --git a/examples/data/synthesis/mix_seeds.py b/examples/data/synthesis/mix_seeds.py index 911fc56d..a2a7bbac 100644 --- a/examples/data/synthesis/mix_seeds.py +++ b/examples/data/synthesis/mix_seeds.py @@ -3,8 +3,8 @@ # the U.S. Government retains certain rights in this software. """This example demonstrates how to generate synthetic gamma spectra from seeds.""" import numpy as np -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer + +from riid import SeedMixer, get_dummy_seeds fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg() diff --git a/examples/data/synthesis/synthesize_passbys.py b/examples/data/synthesis/synthesize_passbys.py index 4c9ca023..7b2bbb3e 100644 --- a/examples/data/synthesis/synthesize_passbys.py +++ b/examples/data/synthesis/synthesize_passbys.py @@ -7,8 +7,7 @@ import matplotlib.pyplot as plt import numpy as np -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.passby import PassbySynthesizer +from riid import PassbySynthesizer, get_dummy_seeds if len(sys.argv) == 2: import matplotlib diff --git a/examples/data/synthesis/synthesize_seeds.py b/examples/data/synthesis/synthesize_seeds.py index ab837dca..a33eabfc 100644 --- a/examples/data/synthesis/synthesize_seeds.py +++ b/examples/data/synthesis/synthesize_seeds.py @@ -4,7 +4,7 @@ """This example demonstrates how to generate synthetic seeds from GADRAS.""" import yaml -from riid.data.synthetic.seed import SeedSynthesizer +from riid import SeedSynthesizer seed_synth_config = """ --- diff --git a/examples/data/synthesis/synthesize_seeds_advanced.py b/examples/data/synthesis/synthesize_seeds_advanced.py index 390ac72b..d259b242 100644 --- a/examples/data/synthesis/synthesize_seeds_advanced.py +++ b/examples/data/synthesis/synthesize_seeds_advanced.py @@ -5,7 +5,7 @@ configuration expansion features.""" import yaml -from riid.data.synthetic.seed import SeedSynthesizer +from riid import SeedSynthesizer seed_synth_config = """ --- diff --git a/examples/data/synthesis/synthesize_spectra.py b/examples/data/synthesis/synthesize_spectra.py index 95530244..ce150123 100644 --- a/examples/data/synthesis/synthesize_spectra.py +++ b/examples/data/synthesis/synthesize_spectra.py @@ -2,9 +2,7 @@ # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. """This example demonstrates how to generate synthetic gamma spectra from seeds.""" -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds SYNTHETIC_DATA_CONFIG = { "samples_per_seed": 10000, diff --git a/examples/modeling/anomaly_detection.py b/examples/modeling/anomaly_detection.py index e3b27a13..503470e2 100644 --- a/examples/modeling/anomaly_detection.py +++ b/examples/modeling/anomaly_detection.py @@ -10,10 +10,8 @@ import numpy as np from matplotlib import cm +from riid import PassbySynthesizer, SeedMixer, get_dummy_seeds from riid.anomaly import PoissonNChannelEventDetector -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.passby import PassbySynthesizer -from riid.data.synthetic.seed import SeedMixer if len(sys.argv) == 2: import matplotlib diff --git a/examples/modeling/arad.py b/examples/modeling/arad.py index f15d2e4e..120b0d07 100644 --- a/examples/modeling/arad.py +++ b/examples/modeling/arad.py @@ -6,10 +6,8 @@ import numpy as np import pandas as pd -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer -from riid.models.neural_nets.arad import ARADv1, ARADv2 +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds +from riid.models import ARADv1, ARADv2 # Config rng = np.random.default_rng(42) diff --git a/examples/modeling/arad_latent_prediction.py b/examples/modeling/arad_latent_prediction.py index 9b233954..4d3b199b 100644 --- a/examples/modeling/arad_latent_prediction.py +++ b/examples/modeling/arad_latent_prediction.py @@ -8,10 +8,8 @@ from keras.api.metrics import Accuracy, CategoricalCrossentropy from sklearn.metrics import f1_score, mean_squared_error -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer -from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv2 +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds +from riid.models import ARADLatentPredictor, ARADv2 # Config rng = np.random.default_rng(42) diff --git a/examples/modeling/classifier_comparison.py b/examples/modeling/classifier_comparison.py index 4fce43e7..d51af336 100644 --- a/examples/modeling/classifier_comparison.py +++ b/examples/modeling/classifier_comparison.py @@ -7,12 +7,9 @@ import matplotlib.pyplot as plt from sklearn.metrics import f1_score -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer -from riid.models.bayes import PoissonBayesClassifier +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds from riid.metrics import precision_recall_curve -from riid.models.neural_nets import MLPClassifier +from riid.models import MLPClassifier, PoissonBayesClassifier from riid.visualize import plot_precision_recall if len(sys.argv) == 2: diff --git a/examples/modeling/label_proportion_estimation.py b/examples/modeling/label_proportion_estimation.py index b4e5e4c4..1cfe30c7 100644 --- a/examples/modeling/label_proportion_estimation.py +++ b/examples/modeling/label_proportion_estimation.py @@ -4,10 +4,8 @@ from sklearn.metrics import mean_absolute_error -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer -from riid.models.neural_nets import LabelProportionEstimator +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds +from riid.models import LabelProportionEstimator # Generate some mixture training data. fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg() diff --git a/examples/modeling/neural_network_classifier.py b/examples/modeling/neural_network_classifier.py index 277a2dc5..831230c2 100644 --- a/examples/modeling/neural_network_classifier.py +++ b/examples/modeling/neural_network_classifier.py @@ -5,10 +5,8 @@ import numpy as np from sklearn.metrics import f1_score -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer -from riid.models.neural_nets import MLPClassifier +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds +from riid.models import MLPClassifier # Generate some training data fg_seeds_ss, bg_seeds_ss = get_dummy_seeds().split_fg_and_bg() diff --git a/examples/run_examples.py b/examples/run_examples.py index aaa29c0f..1424e7de 100644 --- a/examples/run_examples.py +++ b/examples/run_examples.py @@ -6,6 +6,7 @@ import subprocess import sys from pathlib import Path + import pandas as pd from tabulate import tabulate diff --git a/examples/visualization/confusion_matrix.py b/examples/visualization/confusion_matrix.py index b2e25e72..27dd2705 100644 --- a/examples/visualization/confusion_matrix.py +++ b/examples/visualization/confusion_matrix.py @@ -4,10 +4,8 @@ """This example demonstrates how to obtain confusion matrices.""" import sys -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer -from riid.models.neural_nets import MLPClassifier +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds +from riid.models import MLPClassifier from riid.visualize import confusion_matrix if len(sys.argv) == 2: diff --git a/examples/visualization/distance_matrix.py b/examples/visualization/distance_matrix.py index 5f229f25..49e5af33 100644 --- a/examples/visualization/distance_matrix.py +++ b/examples/visualization/distance_matrix.py @@ -9,7 +9,7 @@ import matplotlib.pyplot as plt import seaborn as sns -from riid.data.synthetic import get_dummy_seeds +from riid import get_dummy_seeds if len(sys.argv) == 2: import matplotlib diff --git a/examples/visualization/plot_sampleset_compare_to.py b/examples/visualization/plot_sampleset_compare_to.py index 9ccf94bb..5a99f05d 100644 --- a/examples/visualization/plot_sampleset_compare_to.py +++ b/examples/visualization/plot_sampleset_compare_to.py @@ -4,9 +4,7 @@ """This example demonstrates how to compare sample sets.""" import sys -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds from riid.visualize import plot_ss_comparison if len(sys.argv) == 2: diff --git a/examples/visualization/plot_spectra.py b/examples/visualization/plot_spectra.py index d0f1e1e7..04f71bd1 100644 --- a/examples/visualization/plot_spectra.py +++ b/examples/visualization/plot_spectra.py @@ -4,7 +4,7 @@ """This example demonstrates how to plot gamma spectra.""" import sys -from riid.data.synthetic import get_dummy_seeds +from riid import get_dummy_seeds from riid.visualize import plot_spectra if len(sys.argv) == 2: diff --git a/riid/__init__.py b/riid/__init__.py index d19e9880..44fe4e4b 100644 --- a/riid/__init__.py +++ b/riid/__init__.py @@ -7,9 +7,15 @@ import logging import os import sys - from importlib.metadata import version +from riid.data.sampleset import (SampleSet, SpectraState, SpectraType, + read_hdf, read_json, read_pcf) +from riid.data.synthetic.passby import PassbySynthesizer +from riid.data.synthetic.seed import (SeedMixer, SeedSynthesizer, + get_dummy_seeds) +from riid.data.synthetic.static import StaticSynthesizer + HANDLER = logging.StreamHandler(sys.stdout) logging.root.addHandler(HANDLER) logging.root.setLevel(logging.DEBUG) @@ -31,3 +37,7 @@ "riid.data.synthetic.passby.PassbySynthesizer._generate_single_passby": True, "riid.data.sampleset.SampleSet._channels_to_energies": True, } + +__all__ = ["SampleSet", "SpectraState", "SpectraType", + "read_hdf", "read_json", "read_pcf", "get_dummy_seeds", + "PassbySynthesizer", "SeedSynthesizer", "StaticSynthesizer", "SeedMixer"] diff --git a/riid/data/converters/aipt.py b/riid/data/converters/aipt.py index d890a440..06f7c91a 100644 --- a/riid/data/converters/aipt.py +++ b/riid/data/converters/aipt.py @@ -10,9 +10,8 @@ import pandas as pd -from riid import SAMPLESET_HDF_FILE_EXTENSION +from riid import SAMPLESET_HDF_FILE_EXTENSION, SampleSet from riid.data.converters import _validate_and_create_output_dir -from riid.data.sampleset import SampleSet ELEMENT_IDS_PER_FILE = [0, 1, 2, 3] DEFAULT_ECAL = [ diff --git a/riid/data/converters/topcoder.py b/riid/data/converters/topcoder.py index e75b07a7..fffd8ada 100644 --- a/riid/data/converters/topcoder.py +++ b/riid/data/converters/topcoder.py @@ -13,10 +13,9 @@ import numpy as np import pandas as pd -from riid import SAMPLESET_HDF_FILE_EXTENSION +from riid import SAMPLESET_HDF_FILE_EXTENSION, SampleSet from riid.data.converters import _validate_and_create_output_dir from riid.data.labeling import label_to_index_element -from riid.data.sampleset import SampleSet SOURCE_ID_TO_LABEL = { 0: "Background", diff --git a/riid/data/synthetic/__init__.py b/riid/data/synthetic/__init__.py index 015c491d..bc07729c 100644 --- a/riid/data/synthetic/__init__.py +++ b/riid/data/synthetic/__init__.py @@ -1,373 +1,9 @@ # Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. -"""This modules contains utilities for synthesizing gamma spectra.""" -from collections import Counter -from typing import Any +"""This module contains utilities for synthesizing gamma spectra.""" +# The following imports are left to not break previous imports; remove in v3 +from riid.data.synthetic.base import Synthesizer, get_distribution_values +from riid.data.synthetic.seed import get_dummy_seeds -import numpy as np -import pandas as pd -from numpy.random import Generator - -from riid.data import get_expected_spectra -from riid.data.sampleset import (SampleSet, SpectraState, SpectraType, - _get_utc_timestamp) - - -class Synthesizer(): - """Base class for synthesizers.""" - - SYNTHETIC_STR = "synthetic" - SUPPORTED_SAMPLING_FUNCTIONS = ["uniform", "log10", "discrete", "list"] - - def __init__(self, bg_cps: float = 300.0, long_bg_live_time: float = 120.0, - apply_poisson_noise: bool = True, - normalize_sources: bool = True, - return_fg: bool = True, - return_gross: bool = False, - rng: Generator = np.random.default_rng()): - """ - Args: - bg_cps: constant rate of gammas from background - long_bg_live_time: live time on which to base background subtractions - apply_poisson_noise: whether to apply Poisson noise to spectra - normalize_sources: whether to normalize ground truth proportions to sum to 1 - return_fg: whether to compute and return background subtracted spectra - return_gross: whether to return gross spectra (always computed) - rng: NumPy random number generator, useful for experiment repeatability - """ - self.bg_cps = bg_cps - self.long_bg_live_time = long_bg_live_time - self.apply_poisson_noise = apply_poisson_noise - self.normalize_sources = normalize_sources - self.return_fg = return_fg - self.return_gross = return_gross - self._rng = rng - self._synthesis_start_dt = None - self._n_samples_synthesized = 0 - - def __str__(self): - output = "SynthesizerConfig" - for k, v in sorted(vars(self).items()): - output += " {}: {}".format(k, str(v)) - return output - - def _reset_progress(self): - self._n_samples_synthesized = 0 - self._synthesis_start_dt = _get_utc_timestamp() - - def _report_progress(self, n_samples_expected, batch_name): - percent_complete = 100 * self._n_samples_synthesized / n_samples_expected - msg = ( - f"Synthesizing ... {percent_complete:.0f}% " - f"(currently on {batch_name}" - ) - MAX_MSG_LEN = 80 - msg = (msg[:MAX_MSG_LEN] + "...") if len(msg) > MAX_MSG_LEN else msg - msg += ")" - print("\033[K" + msg, end="\r") - - def _report_completion(self, delay): - summary = ( - f"Synthesis complete!\n" - f"Generated {self._n_samples_synthesized} samples in ~{delay:.2f}s " - f"(~{(self._n_samples_synthesized / delay):.2f} samples/sec)." - ) - print("\033[K" + summary) - - def _verify_n_samples_synthesized(self, actual: int, expected: int): - assert expected == actual, ( - f"{actual} generated, but {expected} were expected. " - "Be sure to remove any columns from your seeds' sources DataFrame that " - "contain all zeroes.") - - def _get_batch(self, fg_seed, fg_sources, bg_seed, bg_sources, ecal, - lt_targets, snr_targets, rt_targets=None, distance_cm=None): - if not (self.return_fg or self.return_gross): - raise ValueError("Computing to return nothing.") - - bg_counts_expected = lt_targets * self.bg_cps - fg_counts_expected = snr_targets * np.sqrt(bg_counts_expected) - - fg_spectra = get_expected_spectra(fg_seed.values, fg_counts_expected) - bg_spectra = get_expected_spectra(bg_seed.values, bg_counts_expected) - - long_bg_counts_expected = self.long_bg_live_time * self.bg_cps - long_bg_spectrum_expected = bg_seed.values * long_bg_counts_expected - - gross_spectra = None - long_bg_spectra = None - fg_counts = 0 - bg_counts = 0 - long_bg_counts = 0 - fg_ss = None - gross_ss = None - - # Spectra - if self.apply_poisson_noise: - gross_spectra = self._rng.poisson(fg_spectra + bg_spectra) - if self.return_fg: - long_bg_spectrum = self._rng.poisson(long_bg_spectrum_expected) - long_bg_seed = long_bg_spectrum / long_bg_spectrum.sum() - long_bg_spectra = get_expected_spectra(long_bg_seed, bg_counts_expected) - fg_spectra = gross_spectra - long_bg_spectra - else: - gross_spectra = fg_spectra + bg_spectra - if self.return_fg: - long_bg_spectra = bg_spectra - fg_spectra = gross_spectra - long_bg_spectra - - # Counts - fg_counts = fg_spectra.sum(axis=1, dtype=float) - if self.return_fg: - long_bg_counts = long_bg_spectra.sum(axis=1, dtype=float) - if self.return_gross: - bg_counts = bg_spectra.sum(axis=1, dtype=float) - - # Sample sets - if self.return_fg: - snrs = fg_counts / np.sqrt(long_bg_counts.clip(1)) - fg_ss = get_fg_sample_set(fg_spectra, fg_sources, ecal, lt_targets, - snrs=snrs, total_counts=fg_counts, - real_times=rt_targets, distance_cm=distance_cm, - timestamps=self._synthesis_start_dt) - self._n_samples_synthesized += fg_ss.n_samples - if self.return_gross: - tiled_fg_sources = _tile_sources_and_scale( - fg_sources, - gross_spectra.shape[0], - fg_counts, - ) - tiled_bg_sources = _tile_sources_and_scale( - bg_sources, - gross_spectra.shape[0], - bg_counts, - ) - gross_sources = get_merged_sources_samplewise(tiled_fg_sources, tiled_bg_sources) - gross_counts = gross_spectra.sum(axis=1) - snrs = fg_counts / np.sqrt(bg_counts.clip(1)) - gross_ss = get_gross_sample_set(gross_spectra, gross_sources, ecal, - lt_targets, snrs, gross_counts, - real_times=rt_targets, distance_cm=distance_cm, - timestamps=self._synthesis_start_dt) - self._n_samples_synthesized += gross_ss.n_samples - - return fg_ss, gross_ss - - -def get_sample_set(spectra, sources, ecal, live_times, snrs, total_counts=None, - real_times=None, distance_cm=None, timestamps=None, - descriptions=None) -> SampleSet: - n_samples = spectra.shape[0] - - ss = SampleSet() - ss.spectra_state = SpectraState.Counts - ss.spectra = pd.DataFrame(spectra) - ss.sources = sources - ss.info.description = np.full(n_samples, "") # Ensures the length of info equal n_samples - if descriptions: - ss.info.description = descriptions - ss.info.snr = snrs - ss.info.timestamp = timestamps - ss.info.total_counts = total_counts if total_counts is not None else spectra.sum(axis=1) - ss.info.ecal_order_0 = ecal[0] - ss.info.ecal_order_1 = ecal[1] - ss.info.ecal_order_2 = ecal[2] - ss.info.ecal_order_3 = ecal[3] - ss.info.ecal_low_e = ecal[4] - ss.info.live_time = live_times - ss.info.real_time = real_times if real_times is not None else live_times - ss.info.distance_cm = distance_cm - ss.info.occupancy_flag = 0 - ss.info.tag = " " # TODO: test if this can be empty string - - return ss - - -def _tile_sources_and_scale(sources, n_samples, scalars) -> pd.DataFrame: - tiled_sources = pd.DataFrame( - np.tile(sources.values, (n_samples, 1)), - columns=sources.index - ) - # Multiplying normalized source values by spectrum counts. - # This is REQUIRED for properly merging sources DataFrames later when synthesizing - # multiple isotopes. - tiled_sources = tiled_sources.multiply(scalars, axis="index") - return tiled_sources - - -def get_fg_sample_set(spectra, sources, ecal, live_times, snrs, total_counts, - real_times=None, distance_cm=None, timestamps=None, - descriptions=None) -> SampleSet: - tiled_sources = _tile_sources_and_scale( - sources, - spectra.shape[0], - spectra.sum(axis=1) - ) - ss = get_sample_set( - spectra=spectra, - sources=tiled_sources, - ecal=ecal, - live_times=live_times, - snrs=snrs, - total_counts=total_counts, - real_times=real_times, - distance_cm=distance_cm, - timestamps=timestamps, - descriptions=descriptions - ) - ss.spectra_type = SpectraType.Foreground - return ss - - -def get_gross_sample_set(spectra, sources, ecal, live_times, snrs, total_counts, - real_times=None, distance_cm=None, timestamps=None, - descriptions=None) -> SampleSet: - ss = get_sample_set( - spectra=spectra, - sources=sources, - ecal=ecal, - live_times=live_times, - snrs=snrs, - total_counts=total_counts, - real_times=real_times, - distance_cm=distance_cm, - timestamps=timestamps, - descriptions=descriptions - ) - ss.spectra_type = SpectraType.Gross - return ss - - -def get_distribution_values(function: str, function_args: Any, n_values: int, - rng: Generator = np.random.default_rng()): - """Randomly sample a list of values based one of many distributions. - - Args: - function: name of the distribution function - function_args: argument or collection of arguments to be - passed to the function, if any. - n_values: size of the distribution - rng: NumPy random number generator, useful for experiment repeatability - - Returns: - Value or collection of sampled values - - Raises: - `ValueError` when an unsupported function type is provided - """ - values = None - if function == "uniform": - values = rng.uniform(*function_args, size=n_values) - elif function == "log10": - log10_args = tuple(map(np.log10, function_args)) - values = np.power(10, rng.uniform(*log10_args, size=n_values)) - elif function == "discrete": - values = rng.choice(function_args, size=n_values) - elif function == "list": - values = np.array(function_args) - else: - raise ValueError(f"{function} function not supported for sampling.") - - return values - - -def get_merged_sources_samplewise(sources1: pd.DataFrame, sources2: pd.DataFrame) -> pd.DataFrame: - merged_sources_df = sources1.add(sources2, axis=1, fill_value=0) - return merged_sources_df - - -def get_samples_per_seed(columns: pd.MultiIndex, min_samples_per_seed: int, balance_level: int): - level_values = columns.get_level_values(level=balance_level) - level_value_to_n_seeds = Counter(level_values) - unique_level_values = list(level_value_to_n_seeds.keys()) - occurences = np.array(list(level_value_to_n_seeds.values())) - max_samples_per_level_value = occurences.max() * min_samples_per_seed - samples_per_level_value = np.ceil(max_samples_per_level_value / occurences).astype(int) - lv_to_samples_per_seed = {k: v for (k, v) in zip(unique_level_values, samples_per_level_value)} - total_samples_expected = sum([x * y for x, y in zip(occurences, samples_per_level_value)]) - - return lv_to_samples_per_seed, total_samples_expected - - -def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0, - count_rate: float = 1000.0, normalize: bool = True, - rng: Generator = np.random.default_rng()) -> SampleSet: - """Get a random, dummy `SampleSet` of ideal seeds. - - WARNING: the spectra returned by this function each contain one gaussian peak that does - not overlap with the peaks of other spectra. Such data is about as *ideal* as one - could hope to be working with and does not represent anything real. - Therefore, **do not** use this data for any purpose other than testing, debugging, or - examples where code, not results, is being demonstrated. Any use in scientific studies - does not make sense. - - Args: - n_channels: number of channels in the spectra DataFrame - live_time: collection time on which to base seeds - (higher creates a less noisy shape) - count_rate: count rate on which to base seeds - (higher creates a less noisy shape) - normalize: whether to apply an L1-norm to the spectra - rng: NumPy random number generator, useful for experiment repeatability - - Returns: - `SampleSet` with randomly generated spectra - """ - ss = SampleSet() - ss.measured_or_synthetic = "synthetic" - ss.spectra_state = SpectraState.Counts - ss.spectra_type = SpectraType.BackgroundForeground - ss.synthesis_info = { - "subtract_background": True, - } - sources = [ - ("Industrial", "Am241", "Unshielded Am241"), - ("Industrial", "Ba133", "Unshielded Ba133"), - ("NORM", "K40", "PotassiumInSoil"), - ("NORM", "K40", "Moderately Shielded K40"), - ("NORM", "Ra226", "UraniumInSoil"), - ("NORM", "Th232", "ThoriumInSoil"), - ("SNM", "U238", "Unshielded U238"), - ("SNM", "Pu239", "Unshielded Pu239"), - ("SNM", "Pu239", "Moderately Shielded Pu239"), - ("SNM", "Pu239", "Heavily Shielded Pu239"), - ] - n_sources = len(sources) - n_fg_sources = n_sources - sources_cols = pd.MultiIndex.from_tuples( - sources, - names=SampleSet.SOURCES_MULTI_INDEX_NAMES - ) - sources_data = np.identity(n_sources) - ss.sources = pd.DataFrame(data=sources_data, columns=sources_cols) - - histograms = [] - N_FG_COUNTS = int(count_rate * live_time) - fg_std = np.sqrt(n_channels / n_sources) - channels_per_sources = n_channels / n_fg_sources - for i in range(n_fg_sources): - mu = i * channels_per_sources + channels_per_sources / 2 - counts = rng.normal(mu, fg_std, size=N_FG_COUNTS) - fg_histogram, _ = np.histogram(counts, bins=n_channels, range=(0, n_channels)) - histograms.append(fg_histogram) - histograms = np.array(histograms) - - ss.spectra = pd.DataFrame(data=histograms) - - ss.info.total_counts = ss.spectra.sum(axis=1) - ss.info.live_time = live_time - ss.info.real_time = live_time - ss.info.snr = None - ss.info.ecal_order_0 = 0 - ss.info.ecal_order_1 = 3000 - ss.info.ecal_order_2 = 100 - ss.info.ecal_order_3 = 0 - ss.info.ecal_low_e = 0 - ss.info.description = "" - ss.update_timestamp() - - if normalize: - ss.normalize() - - return ss +__all__ = ["get_dummy_seeds", "Synthesizer", "get_distribution_values"] diff --git a/riid/data/synthetic/base.py b/riid/data/synthetic/base.py new file mode 100644 index 00000000..9209c7f3 --- /dev/null +++ b/riid/data/synthetic/base.py @@ -0,0 +1,290 @@ +# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +# Under the terms of Contract DE-NA0003525 with NTESS, +# the U.S. Government retains certain rights in this software. +"""This module contains utilities for synthesizing gamma spectra.""" +from collections import Counter +from typing import Any + +import numpy as np +import pandas as pd +from numpy.random import Generator + +from riid.data import get_expected_spectra +from riid.data.sampleset import (SampleSet, SpectraState, SpectraType, + _get_utc_timestamp) + + +class Synthesizer(): + """Base class for synthesizers.""" + + SYNTHETIC_STR = "synthetic" + SUPPORTED_SAMPLING_FUNCTIONS = ["uniform", "log10", "discrete", "list"] + + def __init__(self, bg_cps: float = 300.0, long_bg_live_time: float = 120.0, + apply_poisson_noise: bool = True, + normalize_sources: bool = True, + return_fg: bool = True, + return_gross: bool = False, + rng: Generator = np.random.default_rng()): + """ + Args: + bg_cps: constant rate of gammas from background + long_bg_live_time: live time on which to base background subtractions + apply_poisson_noise: whether to apply Poisson noise to spectra + normalize_sources: whether to normalize ground truth proportions to sum to 1 + return_fg: whether to compute and return background subtracted spectra + return_gross: whether to return gross spectra (always computed) + rng: NumPy random number generator, useful for experiment repeatability + """ + self.bg_cps = bg_cps + self.long_bg_live_time = long_bg_live_time + self.apply_poisson_noise = apply_poisson_noise + self.normalize_sources = normalize_sources + self.return_fg = return_fg + self.return_gross = return_gross + self._rng = rng + self._synthesis_start_dt = None + self._n_samples_synthesized = 0 + + def __str__(self): + output = "SynthesizerConfig" + for k, v in sorted(vars(self).items()): + output += " {}: {}".format(k, str(v)) + return output + + def _reset_progress(self): + self._n_samples_synthesized = 0 + self._synthesis_start_dt = _get_utc_timestamp() + + def _report_progress(self, n_samples_expected, batch_name): + percent_complete = 100 * self._n_samples_synthesized / n_samples_expected + msg = ( + f"Synthesizing ... {percent_complete:.0f}% " + f"(currently on {batch_name}" + ) + MAX_MSG_LEN = 80 + msg = (msg[:MAX_MSG_LEN] + "...") if len(msg) > MAX_MSG_LEN else msg + msg += ")" + print("\033[K" + msg, end="\r") + + def _report_completion(self, delay): + summary = ( + f"Synthesis complete!\n" + f"Generated {self._n_samples_synthesized} samples in ~{delay:.2f}s " + f"(~{(self._n_samples_synthesized / delay):.2f} samples/sec)." + ) + print("\033[K" + summary) + + def _verify_n_samples_synthesized(self, actual: int, expected: int): + assert expected == actual, ( + f"{actual} generated, but {expected} were expected. " + "Be sure to remove any columns from your seeds' sources DataFrame that " + "contain all zeroes.") + + def _get_batch(self, fg_seed, fg_sources, bg_seed, bg_sources, ecal, + lt_targets, snr_targets, rt_targets=None, distance_cm=None): + if not (self.return_fg or self.return_gross): + raise ValueError("Computing to return nothing.") + + bg_counts_expected = lt_targets * self.bg_cps + fg_counts_expected = snr_targets * np.sqrt(bg_counts_expected) + + fg_spectra = get_expected_spectra(fg_seed.values, fg_counts_expected) + bg_spectra = get_expected_spectra(bg_seed.values, bg_counts_expected) + + long_bg_counts_expected = self.long_bg_live_time * self.bg_cps + long_bg_spectrum_expected = bg_seed.values * long_bg_counts_expected + + gross_spectra = None + long_bg_spectra = None + fg_counts = 0 + bg_counts = 0 + long_bg_counts = 0 + fg_ss = None + gross_ss = None + + # Spectra + if self.apply_poisson_noise: + gross_spectra = self._rng.poisson(fg_spectra + bg_spectra) + if self.return_fg: + long_bg_spectrum = self._rng.poisson(long_bg_spectrum_expected) + long_bg_seed = long_bg_spectrum / long_bg_spectrum.sum() + long_bg_spectra = get_expected_spectra(long_bg_seed, bg_counts_expected) + fg_spectra = gross_spectra - long_bg_spectra + else: + gross_spectra = fg_spectra + bg_spectra + if self.return_fg: + long_bg_spectra = bg_spectra + fg_spectra = gross_spectra - long_bg_spectra + + # Counts + fg_counts = fg_spectra.sum(axis=1, dtype=float) + if self.return_fg: + long_bg_counts = long_bg_spectra.sum(axis=1, dtype=float) + if self.return_gross: + bg_counts = bg_spectra.sum(axis=1, dtype=float) + + # Sample sets + if self.return_fg: + snrs = fg_counts / np.sqrt(long_bg_counts.clip(1)) + fg_ss = get_fg_sample_set(fg_spectra, fg_sources, ecal, lt_targets, + snrs=snrs, total_counts=fg_counts, + real_times=rt_targets, distance_cm=distance_cm, + timestamps=self._synthesis_start_dt) + self._n_samples_synthesized += fg_ss.n_samples + if self.return_gross: + tiled_fg_sources = _tile_sources_and_scale( + fg_sources, + gross_spectra.shape[0], + fg_counts, + ) + tiled_bg_sources = _tile_sources_and_scale( + bg_sources, + gross_spectra.shape[0], + bg_counts, + ) + gross_sources = get_merged_sources_samplewise(tiled_fg_sources, tiled_bg_sources) + gross_counts = gross_spectra.sum(axis=1) + snrs = fg_counts / np.sqrt(bg_counts.clip(1)) + gross_ss = get_gross_sample_set(gross_spectra, gross_sources, ecal, + lt_targets, snrs, gross_counts, + real_times=rt_targets, distance_cm=distance_cm, + timestamps=self._synthesis_start_dt) + self._n_samples_synthesized += gross_ss.n_samples + + return fg_ss, gross_ss + + +def get_sample_set(spectra, sources, ecal, live_times, snrs, total_counts=None, + real_times=None, distance_cm=None, timestamps=None, + descriptions=None) -> SampleSet: + n_samples = spectra.shape[0] + + ss = SampleSet() + ss.spectra_state = SpectraState.Counts + ss.spectra = pd.DataFrame(spectra) + ss.sources = sources + ss.info.description = np.full(n_samples, "") # Ensures the length of info equal n_samples + if descriptions: + ss.info.description = descriptions + ss.info.snr = snrs + ss.info.timestamp = timestamps + ss.info.total_counts = total_counts if total_counts is not None else spectra.sum(axis=1) + ss.info.ecal_order_0 = ecal[0] + ss.info.ecal_order_1 = ecal[1] + ss.info.ecal_order_2 = ecal[2] + ss.info.ecal_order_3 = ecal[3] + ss.info.ecal_low_e = ecal[4] + ss.info.live_time = live_times + ss.info.real_time = real_times if real_times is not None else live_times + ss.info.distance_cm = distance_cm + ss.info.occupancy_flag = 0 + ss.info.tag = " " # TODO: test if this can be empty string + + return ss + + +def _tile_sources_and_scale(sources, n_samples, scalars) -> pd.DataFrame: + tiled_sources = pd.DataFrame( + np.tile(sources.values, (n_samples, 1)), + columns=sources.index + ) + # Multiplying normalized source values by spectrum counts. + # This is REQUIRED for properly merging sources DataFrames later when synthesizing + # multiple isotopes. + tiled_sources = tiled_sources.multiply(scalars, axis="index") + return tiled_sources + + +def get_fg_sample_set(spectra, sources, ecal, live_times, snrs, total_counts, + real_times=None, distance_cm=None, timestamps=None, + descriptions=None) -> SampleSet: + tiled_sources = _tile_sources_and_scale( + sources, + spectra.shape[0], + spectra.sum(axis=1) + ) + ss = get_sample_set( + spectra=spectra, + sources=tiled_sources, + ecal=ecal, + live_times=live_times, + snrs=snrs, + total_counts=total_counts, + real_times=real_times, + distance_cm=distance_cm, + timestamps=timestamps, + descriptions=descriptions + ) + ss.spectra_type = SpectraType.Foreground + return ss + + +def get_gross_sample_set(spectra, sources, ecal, live_times, snrs, total_counts, + real_times=None, distance_cm=None, timestamps=None, + descriptions=None) -> SampleSet: + ss = get_sample_set( + spectra=spectra, + sources=sources, + ecal=ecal, + live_times=live_times, + snrs=snrs, + total_counts=total_counts, + real_times=real_times, + distance_cm=distance_cm, + timestamps=timestamps, + descriptions=descriptions + ) + ss.spectra_type = SpectraType.Gross + return ss + + +def get_distribution_values(function: str, function_args: Any, n_values: int, + rng: Generator = np.random.default_rng()): + """Randomly sample a list of values based one of many distributions. + + Args: + function: name of the distribution function + function_args: argument or collection of arguments to be + passed to the function, if any. + n_values: size of the distribution + rng: NumPy random number generator, useful for experiment repeatability + + Returns: + Value or collection of sampled values + + Raises: + `ValueError` when an unsupported function type is provided + """ + values = None + if function == "uniform": + values = rng.uniform(*function_args, size=n_values) + elif function == "log10": + log10_args = tuple(map(np.log10, function_args)) + values = np.power(10, rng.uniform(*log10_args, size=n_values)) + elif function == "discrete": + values = rng.choice(function_args, size=n_values) + elif function == "list": + values = np.array(function_args) + else: + raise ValueError(f"{function} function not supported for sampling.") + + return values + + +def get_merged_sources_samplewise(sources1: pd.DataFrame, sources2: pd.DataFrame) -> pd.DataFrame: + merged_sources_df = sources1.add(sources2, axis=1, fill_value=0) + return merged_sources_df + + +def get_samples_per_seed(columns: pd.MultiIndex, min_samples_per_seed: int, balance_level: int): + level_values = columns.get_level_values(level=balance_level) + level_value_to_n_seeds = Counter(level_values) + unique_level_values = list(level_value_to_n_seeds.keys()) + occurences = np.array(list(level_value_to_n_seeds.values())) + max_samples_per_level_value = occurences.max() * min_samples_per_seed + samples_per_level_value = np.ceil(max_samples_per_level_value / occurences).astype(int) + lv_to_samples_per_seed = {k: v for (k, v) in zip(unique_level_values, samples_per_level_value)} + total_samples_expected = sum([x * y for x, y in zip(occurences, samples_per_level_value)]) + + return lv_to_samples_per_seed, total_samples_expected diff --git a/riid/data/synthetic/passby.py b/riid/data/synthetic/passby.py index 12b1a68f..84234618 100644 --- a/riid/data/synthetic/passby.py +++ b/riid/data/synthetic/passby.py @@ -11,8 +11,8 @@ import pandas as pd from numpy.random import Generator -from riid.data.sampleset import SampleSet -from riid.data.synthetic import Synthesizer, get_distribution_values +from riid import SampleSet +from riid.data.synthetic.base import Synthesizer, get_distribution_values class PassbySynthesizer(Synthesizer): diff --git a/riid/data/synthetic/seed.py b/riid/data/synthetic/seed.py index 9fd1b2c4..02f70412 100644 --- a/riid/data/synthetic/seed.py +++ b/riid/data/synthetic/seed.py @@ -1,7 +1,7 @@ # Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. -"""This modules contains utilities for generating synthetic gamma spectrum templates from GADRAS.""" +"""This module contains utilities for generating synthetic gamma spectrum templates from GADRAS.""" import os from contextlib import contextmanager from copy import deepcopy @@ -12,7 +12,8 @@ import yaml from numpy.random import Generator -from riid.data.sampleset import SampleSet, _get_utc_timestamp, read_pcf +from riid import SampleSet, SpectraState, SpectraType, read_pcf +from riid.data.sampleset import _get_utc_timestamp from riid.gadras.api import (DETECTOR_PARAMS, GADRAS_ASSEMBLY_PATH, INJECT_PARAMS, SourceInjector, get_gadras_api, get_inject_setups, validate_inject_config) @@ -405,3 +406,86 @@ def get_choices(choices_so_far: list, options: list, options_probas: np.array, n_choices_remaining -= 1 return get_choices(choices_so_far, options, options_probas, restricted_pairs, n_choices_remaining, rng) + + +def get_dummy_seeds(n_channels: int = 512, live_time: float = 600.0, + count_rate: float = 1000.0, normalize: bool = True, + rng: Generator = np.random.default_rng()) -> SampleSet: + """Get a random, dummy `SampleSet` of ideal seeds. + + WARNING: the spectra returned by this function each contain one gaussian peak that does + not overlap with the peaks of other spectra. Such data is about as *ideal* as one + could hope to be working with and does not represent anything real. + Therefore, **do not** use this data for any purpose other than testing, debugging, or + examples where code, not results, is being demonstrated. Any use in scientific studies + does not make sense. + + Args: + n_channels: number of channels in the spectra DataFrame + live_time: collection time on which to base seeds + (higher creates a less noisy shape) + count_rate: count rate on which to base seeds + (higher creates a less noisy shape) + normalize: whether to apply an L1-norm to the spectra + rng: NumPy random number generator, useful for experiment repeatability + + Returns: + `SampleSet` with randomly generated spectra + """ + ss = SampleSet() + ss.measured_or_synthetic = "synthetic" + ss.spectra_state = SpectraState.Counts + ss.spectra_type = SpectraType.BackgroundForeground + ss.synthesis_info = { + "subtract_background": True, + } + sources = [ + ("Industrial", "Am241", "Unshielded Am241"), + ("Industrial", "Ba133", "Unshielded Ba133"), + ("NORM", "K40", "PotassiumInSoil"), + ("NORM", "K40", "Moderately Shielded K40"), + ("NORM", "Ra226", "UraniumInSoil"), + ("NORM", "Th232", "ThoriumInSoil"), + ("SNM", "U238", "Unshielded U238"), + ("SNM", "Pu239", "Unshielded Pu239"), + ("SNM", "Pu239", "Moderately Shielded Pu239"), + ("SNM", "Pu239", "Heavily Shielded Pu239"), + ] + n_sources = len(sources) + n_fg_sources = n_sources + sources_cols = pd.MultiIndex.from_tuples( + sources, + names=SampleSet.SOURCES_MULTI_INDEX_NAMES + ) + sources_data = np.identity(n_sources) + ss.sources = pd.DataFrame(data=sources_data, columns=sources_cols) + + histograms = [] + N_FG_COUNTS = int(count_rate * live_time) + fg_std = np.sqrt(n_channels / n_sources) + channels_per_sources = n_channels / n_fg_sources + for i in range(n_fg_sources): + mu = i * channels_per_sources + channels_per_sources / 2 + counts = rng.normal(mu, fg_std, size=N_FG_COUNTS) + fg_histogram, _ = np.histogram(counts, bins=n_channels, range=(0, n_channels)) + histograms.append(fg_histogram) + histograms = np.array(histograms) + + ss.spectra = pd.DataFrame(data=histograms) + + ss.info.total_counts = ss.spectra.sum(axis=1) + ss.info.live_time = live_time + ss.info.real_time = live_time + ss.info.snr = None + ss.info.ecal_order_0 = 0 + ss.info.ecal_order_1 = 3000 + ss.info.ecal_order_2 = 100 + ss.info.ecal_order_3 = 0 + ss.info.ecal_low_e = 0 + ss.info.description = "" + ss.update_timestamp() + + if normalize: + ss.normalize() + + return ss diff --git a/riid/data/synthetic/static.py b/riid/data/synthetic/static.py index f0fd82dd..cdf0aed0 100644 --- a/riid/data/synthetic/static.py +++ b/riid/data/synthetic/static.py @@ -10,8 +10,8 @@ import numpy as np from numpy.random import Generator -from riid.data.sampleset import SampleSet, SpectraState, SpectraType -from riid.data.synthetic import Synthesizer, get_distribution_values +from riid import SampleSet, SpectraState, SpectraType +from riid.data.synthetic.base import Synthesizer, get_distribution_values class StaticSynthesizer(Synthesizer): diff --git a/riid/gadras/api.py b/riid/gadras/api.py index 187dc95a..c7af0e94 100644 --- a/riid/gadras/api.py +++ b/riid/gadras/api.py @@ -2,20 +2,20 @@ # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. """This module contains utilities for working with the GADRAS API.""" +import copy +import itertools import json import os import sys -import numpy as np -from numpy.random import Generator from typing import List -import copy -import itertools +import numpy as np import tqdm from jsonschema import validate +from numpy.random import Generator -from riid.data.sampleset import SampleSet, read_pcf -from riid.data.synthetic import get_distribution_values +from riid import SampleSet, read_pcf +from riid.data.synthetic.base import get_distribution_values GADRAS_API_SEEMINGLY_AVAILABLE = False GADRAS_DIR_ENV_VAR_KEY = "GADRAS_DIR" diff --git a/riid/metrics.py b/riid/metrics.py index aa677448..81b5a9c9 100644 --- a/riid/metrics.py +++ b/riid/metrics.py @@ -5,7 +5,7 @@ import numpy as np import sklearn -from riid.data.sampleset import SampleSet +from riid import SampleSet def multi_f1(y_true: np.ndarray, y_pred: np.ndarray) -> float: diff --git a/riid/models/__init__.py b/riid/models/__init__.py index 0825b251..3a73b7a0 100644 --- a/riid/models/__init__.py +++ b/riid/models/__init__.py @@ -1,243 +1,10 @@ # Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. -"""This module contains functionality shared across all PyRIID models.""" -import json -import os -import uuid -from abc import abstractmethod -from enum import Enum +"""This module contains PyRIID models.""" +from riid.models.bayes import PoissonBayesClassifier +from riid.models.neural_nets import LabelProportionEstimator, MLPClassifier +from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv1, ARADv2 -import numpy as np -import tensorflow as tf -import tf2onnx -from keras.api.models import Model -from keras.api.utils import get_custom_objects - -import riid -from riid.data.labeling import label_to_index_element -from riid.data.sampleset import SampleSet, SpectraState -from riid.losses import mish -from riid.metrics import multi_f1, single_f1 - -get_custom_objects().update({ - "multi_f1": multi_f1, - "single_f1": single_f1, - "mish": mish, -}) - - -class ModelInput(int, Enum): - """Enumerates the potential input sources for a model.""" - GrossSpectrum = 0 - BackgroundSpectrum = 1 - ForegroundSpectrum = 2 - - -class PyRIIDModel: - """Base class for PyRIID models.""" - - def __init__(self, *args, **kwargs): - self._info = {} - self._temp_file_path = "temp_model.json" - self._custom_objects = {} - self._initialize_info() - - @property - def seeds(self): - return self._info["seeds"] - - @seeds.setter - def seeds(self, value): - self._info["seeds"] = value - - @property - def info(self): - return self._info - - @info.setter - def info(self, value): - self._info = value - - @property - def target_level(self): - return self._info["target_level"] - - @target_level.setter - def target_level(self, value): - if value in SampleSet.SOURCES_MULTI_INDEX_NAMES: - self._info["target_level"] = value - else: - msg = ( - f"Target level '{value}' is invalid. " - f"Acceptable levels: {SampleSet.SOURCES_MULTI_INDEX_NAMES}" - ) - raise ValueError(msg) - - @property - def model(self) -> Model: - return self._model - - @model.setter - def model(self, value: Model): - self._model = value - - @property - def model_id(self): - return self._info["model_id"] - - @model_id.setter - def model_id(self, value): - self._info["model_id"] = value - - @property - def model_inputs(self): - return self._info["model_inputs"] - - @model_inputs.setter - def model_inputs(self, value): - self._info["model_inputs"] = value - - @property - def model_outputs(self): - return self._info["model_outputs"] - - @model_outputs.setter - def model_outputs(self, value): - self._info["model_outputs"] = value - - def get_model_outputs_as_label_tuples(self): - return [ - label_to_index_element(v, self.target_level) for v in self.model_outputs - ] - - def _get_model_dict(self) -> dict: - model_json = self.model.to_json() - model_dict = json.loads(model_json) - model_weights = self.model.get_weights() - model_dict = { - "info": self._info, - "model": model_dict, - "weights": model_weights, - } - return model_dict - - def _get_model_str(self) -> str: - model_dict = self._get_model_dict() - model_str = json.dumps(model_dict, indent=4, cls=PyRIIDModelJsonEncoder) - return model_str - - def _initialize_info(self): - init_info = { - "model_id": str(uuid.uuid4()), - "model_type": self.__class__.__name__, - "normalization": SpectraState.Unknown, - "pyriid_version": riid.__version__, - } - self._update_info(**init_info) - - def _update_info(self, **kwargs): - self._info.update(kwargs) - - def _update_custom_objects(self, key, value): - self._custom_objects.update({key: value}) - - def load(self, model_path: str): - """Load the model from a path. - - Args: - model_path: path from which to load the model. - """ - if not os.path.exists(model_path): - raise ValueError("Model file does not exist.") - - with open(model_path) as fin: - model = json.load(fin) - - model_str = json.dumps(model["model"]) - self.model = tf.keras.models.model_from_json(model_str, custom_objects=self._custom_objects) - self.model.set_weights([np.array(x) for x in model["weights"]]) - self.info = model["info"] - - def save(self, model_path: str, overwrite=False): - """Save the model to a path. - - Args: - model_path: path at which to save the model. - overwrite: whether to overwrite an existing file if it already exists. - - Raises: - `ValueError` when the given path already exists - """ - if os.path.exists(model_path) and not overwrite: - raise ValueError("Model file already exists.") - - model_str = self._get_model_str() - with open(model_path, "w") as fout: - fout.write(model_str) - - def to_onnx(self, model_path: str = None, **tf2onnx_kwargs: dict): - """Convert the model to an ONNX model. - - Args: - model_path: path at which to save the model - tf2onnx_kwargs: additional kwargs to pass to the conversion - """ - if not model_path.endswith(riid.ONNX_MODEL_FILE_EXTENSION): - raise ValueError(f"ONNX file path must end with {riid.ONNX_MODEL_FILE_EXTENSION}") - if os.path.exists(model_path): - raise ValueError("Model file already exists.") - - tf2onnx.convert.from_keras( - self.model, - output_path=model_path, - **tf2onnx_kwargs - ) - - def to_tflite(self, model_path: str, quantize: bool = False, prune: bool = False): - """Convert the model to a TFLite model and optionally applying quantization or pruning. - - Args: - model_path: file path at which to save the model - quantize: whether to apply quantization - prune: whether to apply pruning - """ - if not model_path.endswith(riid.TFLITE_MODEL_FILE_EXTENSION): - raise ValueError(f"TFLite file path must end with {riid.TFLITE_MODEL_FILE_EXTENSION}") - if os.path.exists(model_path): - raise ValueError("Model file already exists.") - - optimizations = [] - if quantize: - optimizations.append(tf.lite.Optimize.DEFAULT) - if prune: - optimizations.append(tf.lite.Optimize.EXPERIMENTAL_SPARSITY) - - converter = tf.lite.TFLiteConverter.from_keras_model(self.model) - converter.optimizations = optimizations - tflite_model = converter.convert() - - with open(model_path, "wb") as fout: - fout.write(tflite_model) - - @abstractmethod - def fit(self): - pass - - @abstractmethod - def predict(self): - pass - - -class PyRIIDModelJsonEncoder(json.JSONEncoder): - """Custom JSON encoder for saving models. - """ - def default(self, o): - """Converts certain types to JSON-compatible types. - """ - if isinstance(o, np.ndarray): - return o.tolist() - elif isinstance(o, np.float32): - return o.astype(float) - - return super().default(o) +__all__ = ["PoissonBayesClassifier", "LabelProportionEstimator", "MLPClassifier", + "ARADLatentPredictor", "ARADv1", "ARADv2"] diff --git a/riid/models/base.py b/riid/models/base.py new file mode 100644 index 00000000..8f4e0329 --- /dev/null +++ b/riid/models/base.py @@ -0,0 +1,256 @@ +# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +# Under the terms of Contract DE-NA0003525 with NTESS, +# the U.S. Government retains certain rights in this software. +"""This module contains functionality shared across all PyRIID models.""" +import json +import os +from pathlib import Path +import uuid +from abc import abstractmethod +from enum import Enum + +import numpy as np +import tensorflow as tf +import tf2onnx +from keras.api.models import Model +from keras.api.utils import get_custom_objects + +import riid +from riid import SampleSet, SpectraState +from riid.data.labeling import label_to_index_element +from riid.losses import mish +from riid.metrics import multi_f1, single_f1 + +get_custom_objects().update({ + "multi_f1": multi_f1, + "single_f1": single_f1, + "mish": mish, +}) + + +class ModelInput(int, Enum): + """Enumerates the potential input sources for a model.""" + GrossSpectrum = 0 + BackgroundSpectrum = 1 + ForegroundSpectrum = 2 + + +class PyRIIDModel: + """Base class for PyRIID models.""" + + def __init__(self, *args, **kwargs): + self._info = {} + self._temp_file_path = "temp_model.json" + self._custom_objects = {} + self._initialize_info() + + @property + def seeds(self): + return self._info["seeds"] + + @seeds.setter + def seeds(self, value): + self._info["seeds"] = value + + @property + def info(self): + return self._info + + @info.setter + def info(self, value): + self._info = value + + @property + def target_level(self): + return self._info["target_level"] + + @target_level.setter + def target_level(self, value): + if value in SampleSet.SOURCES_MULTI_INDEX_NAMES: + self._info["target_level"] = value + else: + msg = ( + f"Target level '{value}' is invalid. " + f"Acceptable levels: {SampleSet.SOURCES_MULTI_INDEX_NAMES}" + ) + raise ValueError(msg) + + @property + def model(self) -> Model: + return self._model + + @model.setter + def model(self, value: Model): + self._model = value + + @property + def model_id(self): + return self._info["model_id"] + + @model_id.setter + def model_id(self, value): + self._info["model_id"] = value + + @property + def model_inputs(self): + return self._info["model_inputs"] + + @model_inputs.setter + def model_inputs(self, value): + self._info["model_inputs"] = value + + @property + def model_outputs(self): + return self._info["model_outputs"] + + @model_outputs.setter + def model_outputs(self, value): + self._info["model_outputs"] = value + + def get_model_outputs_as_label_tuples(self): + return [ + label_to_index_element(v, self.target_level) for v in self.model_outputs + ] + + def _get_model_dict(self) -> dict: + model_json = self.model.to_json() + model_dict = json.loads(model_json) + model_weights = self.model.get_weights() + model_dict = { + "info": self._info, + "model": model_dict, + "weights": model_weights, + } + return model_dict + + def _get_model_str(self) -> str: + model_dict = self._get_model_dict() + model_str = json.dumps(model_dict, indent=4, cls=PyRIIDModelJsonEncoder) + return model_str + + def _initialize_info(self): + init_info = { + "model_id": str(uuid.uuid4()), + "model_type": self.__class__.__name__, + "normalization": SpectraState.Unknown, + "pyriid_version": riid.__version__, + } + self._update_info(**init_info) + + def _update_info(self, **kwargs): + self._info.update(kwargs) + + def _update_custom_objects(self, key, value): + self._custom_objects.update({key: value}) + + def load(self, model_path: str): + """Load the model from a path. + + Args: + model_path: path from which to load the model. + """ + if not os.path.exists(model_path): + raise ValueError("Model file does not exist.") + + with open(model_path) as fin: + model = json.load(fin) + + model_str = json.dumps(model["model"]) + self.model = tf.keras.models.model_from_json(model_str, custom_objects=self._custom_objects) + self.model.set_weights([np.array(x) for x in model["weights"]]) + self.info = model["info"] + + def save(self, model_path: str, overwrite=False): + """Save the model to a path. + + Args: + model_path: path at which to save the model. + overwrite: whether to overwrite an existing file if it already exists. + + Raises: + `ValueError` when the given path already exists + """ + if os.path.exists(model_path) and not overwrite: + raise ValueError("Model file already exists.") + + model_str = self._get_model_str() + with open(model_path, "w") as fout: + fout.write(model_str) + + def to_onnx(self, model_path, **tf2onnx_kwargs: dict): + """Convert the model to an ONNX model. + + Args: + model_path: path at which to save the model + tf2onnx_kwargs: additional kwargs to pass to the conversion + """ + model_path = Path(model_path) + if not str(model_path).endswith(riid.ONNX_MODEL_FILE_EXTENSION): + raise ValueError(f"ONNX file path must end with {riid.ONNX_MODEL_FILE_EXTENSION}") + if model_path.exists(): + raise ValueError("Model file already exists.") + + tf2onnx.convert.from_keras( + self.model, + input_signature=[ + tf.TensorSpec( + shape=input_tensor.shape, + dtype=input_tensor.dtype, + name=input_tensor.name + ) + for input_tensor in self.model.inputs + ], + output_path=str(model_path), + **tf2onnx_kwargs + ) + + def to_tflite(self, model_path, quantize: bool = False, prune: bool = False): + """Convert the model to a TFLite model and optionally applying quantization or pruning. + + Args: + model_path: file path at which to save the model + quantize: whether to apply quantization + prune: whether to apply pruning + """ + model_path = Path(model_path) + if not str(model_path).endswith(riid.TFLITE_MODEL_FILE_EXTENSION): + raise ValueError(f"TFLite file path must end with {riid.TFLITE_MODEL_FILE_EXTENSION}") + if model_path.exists(): + raise ValueError("Model file already exists.") + + optimizations = [] + if quantize: + optimizations.append(tf.lite.Optimize.DEFAULT) + if prune: + optimizations.append(tf.lite.Optimize.EXPERIMENTAL_SPARSITY) + + saved_model_dir = model_path.stem + self.model.export(saved_model_dir) + converter = tf.lite.TFLiteConverter.from_saved_model(str(saved_model_dir)) + converter.optimizations = optimizations + tflite_model = converter.convert() + + with open(model_path, "wb") as fout: + fout.write(tflite_model) + + @abstractmethod + def fit(self): + pass + + @abstractmethod + def predict(self): + pass + + +class PyRIIDModelJsonEncoder(json.JSONEncoder): + """Custom JSON encoder for saving models. + """ + def default(self, o): + """Converts certain types to JSON-compatible types. + """ + if isinstance(o, np.ndarray): + return o.tolist() + elif isinstance(o, np.float32): + return o.astype(float) + + return super().default(o) diff --git a/riid/models/bayes.py b/riid/models/bayes.py index 6850cbb1..762d38c8 100644 --- a/riid/models/bayes.py +++ b/riid/models/bayes.py @@ -8,8 +8,8 @@ from keras.api.layers import Add, Input, Multiply, Subtract from keras.api.models import Model -from riid.data.sampleset import SampleSet -from riid.models import PyRIIDModel +from riid import SampleSet +from riid.models.base import PyRIIDModel from riid.models.layers import (ClipByValueLayer, DivideLayer, ExpandDimsLayer, PoissonLogProbabilityLayer, ReduceMaxLayer, ReduceSumLayer, SeedLayer) diff --git a/riid/models/neural_nets/__init__.py b/riid/models/neural_nets/__init__.py index 42443f88..1dd06420 100644 --- a/riid/models/neural_nets/__init__.py +++ b/riid/models/neural_nets/__init__.py @@ -2,670 +2,7 @@ # Under the terms of Contract DE-NA0003525 with NTESS, # the U.S. Government retains certain rights in this software. """This module contains neural network-based classifiers and regressors.""" -import keras -import numpy as np -import pandas as pd -import tensorflow as tf -from keras.api.activations import sigmoid, softmax -from keras.api.callbacks import EarlyStopping -from keras.api.layers import Dense, Dropout, Input -from keras.api.losses import CategoricalCrossentropy, MeanSquaredError -from keras.api.metrics import F1Score, Precision, Recall -from keras.api.models import Model -from keras.api.optimizers import Adam -from keras.api.regularizers import L1L2, l1, l2 -from keras.api.utils import split_dataset -from scipy.interpolate import UnivariateSpline +from riid.models.neural_nets.basic import MLPClassifier +from riid.models.neural_nets.lpe import LabelProportionEstimator -from riid.data.sampleset import SampleSet, SpectraType -from riid.losses import (build_keras_semisupervised_loss_func, - chi_squared_diff, jensen_shannon_divergence, - normal_nll_diff, poisson_nll_diff, - reconstruction_error, sse_diff, weighted_sse_diff) -from riid.losses.sparsemax import SparsemaxLoss, sparsemax -from riid.metrics import build_keras_semisupervised_metric_func -from riid.models import ModelInput, PyRIIDModel -from riid.models.layers import L1NormLayer - - -class MLPClassifier(PyRIIDModel): - """Multi-layer perceptron classifier.""" - def __init__(self, activation=None, loss=None, optimizer=None, - metrics=None, l2_alpha: float = 1e-4, - activity_regularizer=None, final_activation=None): - """ - Args: - activation: activate function to use for each dense layer - loss: loss function to use for training - optimizer: tensorflow optimizer or optimizer name to use for training - metrics: list of metrics to be evaluating during training - l2_alpha: alpha value for the L2 regularization of each dense layer - activity_regularizer: regularizer function applied each dense layer output - final_activation: final activation function to apply to model output - """ - super().__init__() - - self.activation = activation - self.loss = loss - self.optimizer = optimizer - self.final_activation = final_activation - self.metrics = metrics - self.l2_alpha = l2_alpha - self.activity_regularizer = activity_regularizer - self.final_activation = final_activation - - if self.activation is None: - self.activation = "relu" - if self.loss is None: - self.loss = CategoricalCrossentropy() - if optimizer is None: - self.optimizer = Adam(learning_rate=0.01, clipnorm=0.001) - if self.metrics is None: - self.metrics = [F1Score(), Precision(), Recall()] - if self.activity_regularizer is None: - self.activity_regularizer = l1(0.0) - if self.final_activation is None: - self.final_activation = "softmax" - self.model = None - self._predict_fn = None - - def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20, - validation_split: float = 0.2, callbacks=None, - patience: int = 15, es_monitor: str = "val_loss", - es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False): - """Fit a model to the given `SampleSet`(s). - - Args: - ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either - foreground (AKA, "net") or gross. - batch_size: number of samples per gradient update - epochs: maximum number of training iterations - validation_split: percentage of the training data to use as validation data - callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method - patience: number of epochs to wait for `EarlyStopping` object - es_monitor: quantity to be monitored for `EarlyStopping` object - es_mode: mode for `EarlyStopping` object - es_verbose: verbosity level for `EarlyStopping` object - target_level: `SampleSet.sources` column level to use - verbose: whether to show detailed model training output - - Returns: - `tf.History` object. - - Raises: - `ValueError` when no spectra are provided as input - """ - if ss.n_samples <= 0: - raise ValueError("No spectr[a|um] provided!") - - if ss.spectra_type == SpectraType.Gross: - self.model_inputs = (ModelInput.GrossSpectrum,) - elif ss.spectra_type == SpectraType.Foreground: - self.model_inputs = (ModelInput.ForegroundSpectrum,) - elif ss.spectra_type == SpectraType.Background: - self.model_inputs = (ModelInput.BackgroundSpectrum,) - else: - raise ValueError(f"{ss.spectra_type} is not supported in this model.") - - X = ss.get_samples() - source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T - model_outputs = source_contributions_df.columns.values.tolist() - Y = source_contributions_df.values - - spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) - labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32) - training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor)) - training_dataset, validation_dataset = split_dataset( - training_dataset, - left_size=validation_split, - shuffle=True - ) - training_dataset = training_dataset.batch(batch_size=batch_size) - validation_dataset = validation_dataset.batch(batch_size=batch_size) - - if not self.model: - inputs = Input(shape=(X.shape[1],), name="Spectrum") - dense_layer_size = X.shape[1] // 2 - dense_layer = Dense( - dense_layer_size, - activation=self.activation, - activity_regularizer=self.activity_regularizer, - kernel_regularizer=l2(self.l2_alpha), - )(inputs) - outputs = Dense(Y.shape[1], activation=self.final_activation)(dense_layer) - self.model = Model(inputs, outputs) - self.model.compile(loss=self.loss, optimizer=self.optimizer, - metrics=self.metrics) - - es = EarlyStopping( - monitor=es_monitor, - patience=patience, - verbose=es_verbose, - restore_best_weights=True, - mode=es_mode, - ) - if callbacks: - callbacks.append(es) - else: - callbacks = [es] - - history = self.model.fit( - training_dataset, - epochs=epochs, - verbose=verbose, - validation_data=validation_dataset, - callbacks=callbacks, - ) - - # Update model information - self._update_info( - target_level=target_level, - model_outputs=model_outputs, - normalization=ss.spectra_state, - ) - - # Define the predict function with tf.function and input_signature - self._predict_fn = tf.function( - self._predict, - # input_signature=[tf.TensorSpec(shape=[None, X.shape[1]], dtype=tf.float32)] - experimental_relax_shapes=True - ) - - return history - - def _predict(self, input_tensor): - return self.model(input_tensor, training=False) - - def predict(self, ss: SampleSet, bg_ss: SampleSet = None): - """Classify the spectra in the provided `SampleSet`(s). - - Results are stored inside the first SampleSet's prediction-related properties. - - Args: - ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either - foreground (AKA, "net") or gross - bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background - """ - x_test = ss.get_samples().astype(float) - if bg_ss: - X = [x_test, bg_ss.get_samples().astype(float)] - else: - X = x_test - - spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) - results = self._predict_fn(spectra_tensor) - - col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) - col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] - ss.prediction_probas = pd.DataFrame( - data=results, - columns=pd.MultiIndex.from_tuples( - self.get_model_outputs_as_label_tuples(), - names=col_level_subset - ) - ) - - ss.classified_by = self.model_id - - -class LabelProportionEstimator(PyRIIDModel): - """Regressor for predicting label proportions that uses a semi-supervised loss. - - Optionally, a U-spline-based out-of-distribution detection model can be fit to target a desired - false positive rate. - """ - UNSUPERVISED_LOSS_FUNCS = { - "poisson_nll": poisson_nll_diff, - "normal_nll": normal_nll_diff, - "sse": sse_diff, - "weighted_sse": weighted_sse_diff, - "jsd": jensen_shannon_divergence, - "chi_squared": chi_squared_diff - } - SUPERVISED_LOSS_FUNCS = { - "sparsemax": ( - SparsemaxLoss, - { - "from_logits": True, - "reduction": tf.keras.losses.Reduction.NONE, - }, - sparsemax, - ), - "categorical_crossentropy": ( - CategoricalCrossentropy, - { - "from_logits": True, - "reduction": tf.keras.losses.Reduction.NONE, - }, - softmax, - ), - "mse": ( - MeanSquaredError, - { - "reduction": tf.keras.losses.Reduction.NONE, - }, - sigmoid, - ) - } - INFO_KEYS = ( - # model architecture - "hidden_layers", - "learning_rate", - "epsilon", - "sup_loss", - "unsup_loss", - "metrics", - "beta", - "hidden_layer_activation", - "kernel_l1_regularization", - "kernel_l2_regularization", - "bias_l1_regularization", - "bias_l2_regularization", - "activity_l1_regularization", - "activity_l2_regularization", - "dropout", - "ood_fp_rate", - "fit_spline", - "spline_bins", - "spline_k", - "spline_s", - # dictionaries - "source_dict", - # populated when loading model - "spline_snrs", - "spline_recon_errors", - ) - - def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_loss="sse", - metrics: list = ["mae", "categorical_crossentropy"], beta=0.9, source_dict=None, - optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3, - hidden_layer_activation: str = "mish", - kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, - bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0, - activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0, - dropout: float = 0.0, ood_fp_rate: float = 0.05, - fit_spline: bool = True, spline_bins: int = 15, spline_k: int = 3, - spline_s: int = 0, spline_snrs=None, spline_recon_errors=None): - """ - Args: - hidden_layers: tuple defining the number and size of dense layers - sup_loss: supervised loss function to use for training - unsup_loss: unsupervised loss function to use for training the - foreground branch of the network (options: "sse", "poisson_nll", - "normal_nll", "weighted_sse", "jsd", or "chi_squared") - metrics: list of metrics to be evaluating during training - beta: tradeoff parameter between the supervised and unsupervised foreground loss - source_dict: 2D array of pure, long-collect foreground spectra - optimizer: tensorflow optimizer or optimizer name to use for training - optimizer_kwargs: kwargs for optimizer - learning_rate: learning rate for the optimizer - hidden_layer_activation: activation function to use for each dense layer - kernel_l1_regularization: l1 regularization value for the kernel regularizer - kernel_l2_regularization: l2 regularization value for the kernel regularizer - bias_l1_regularization: l1 regularization value for the bias regularizer - bias_l2_regularization: l2 regularization value for the bias regularizer - activity_l1_regularization: l1 regularization value for the activity regularizer - activity_l2_regularization: l2 regularization value for the activity regularizer - dropout: amount of dropout to apply to each dense layer - ood_fp_rate: false positive rate used to determine threshold for - out-of-distribution (OOD) detection - fit_spline: whether or not to fit UnivariateSpline for OOD threshold function - spline_bins: number of bins used when fitting the UnivariateSpline threshold - function for OOD detection - spline_k: degree of smoothing for the UnivariateSpline - spline_s: positive smoothing factor used to choose the number of knots in the - UnivariateSpline (s=0 forces the spline through all the datapoints, equivalent to - InterpolatedUnivariateSpline) - spline_snrs: SNRs from training used as the x-values to fit the UnivariateSpline - spline_recon_errors: reconstruction errors from training used as the y-values to - fit the UnivariateSpline - """ - super().__init__() - - self.hidden_layers = hidden_layers - self.sup_loss = sup_loss - self.unsup_loss = unsup_loss - self.sup_loss_func, self.activation = self._get_sup_loss_func( - sup_loss, - prefix="sup" - ) - self.sup_loss_func_name = self.sup_loss_func.name - - self.optimizer = optimizer - if isinstance(optimizer, str): - self.optimizer = keras.optimizers.get(optimizer) - if optimizer_kwargs is not None: - for key, value in optimizer_kwargs.items(): - setattr(self.optimizer, key, value) - self.optimizer.learning_rate = learning_rate - - self.unsup_loss_func = self._get_unsup_loss_func(unsup_loss) - self.unsup_loss_func_name = f"unsup_{unsup_loss}_loss" - self.metrics = metrics - self.beta = beta - self.source_dict = source_dict - self.semisup_loss_func_name = "semisup_loss" - self.hidden_layer_activation = hidden_layer_activation - self.kernel_l1_regularization = kernel_l1_regularization - self.kernel_l2_regularization = kernel_l2_regularization - self.bias_l1_regularization = bias_l1_regularization - self.bias_l2_regularization = bias_l2_regularization - self.activity_l1_regularization = activity_l1_regularization - self.activity_l2_regularization = activity_l2_regularization - self.dropout = dropout - self.ood_fp_rate = ood_fp_rate - self.fit_spline = fit_spline - self.spline_bins = spline_bins - self.spline_k = spline_k - self.spline_s = spline_s - self.spline_snrs = spline_snrs - self.spline_recon_errors = spline_recon_errors - self.model = None - - self._update_custom_objects("L1NormLayer", L1NormLayer) - - @property - def source_dict(self) -> dict: - return self.info["source_dict"] - - @source_dict.setter - def source_dict(self, value: dict): - self.info["source_dict"] = value - - def _get_sup_loss_func(self, loss_func_str, prefix): - if loss_func_str not in self.SUPERVISED_LOSS_FUNCS: - raise KeyError(f"'{loss_func_str}' is not a supported supervised loss function.") - func, kwargs, activation = self.SUPERVISED_LOSS_FUNCS[loss_func_str] - loss_func_name = f"{prefix}_{loss_func_str}_loss" - return func(name=loss_func_name, **kwargs), activation - - def _get_unsup_loss_func(self, loss_func_str): - if loss_func_str not in self.UNSUPERVISED_LOSS_FUNCS: - raise KeyError(f"'{loss_func_str}' is not a supported unsupervised loss function.") - return self.UNSUPERVISED_LOSS_FUNCS[loss_func_str] - - def _initialize_model(self, input_size, output_size): - spectra_input = Input(input_size, name="input_spectrum") - spectra_norm = L1NormLayer(name="normalized_input_spectrum")(spectra_input) - x = spectra_norm - for layer, nodes in enumerate(self.hidden_layers): - x = Dense( - nodes, - activation=self.hidden_layer_activation, - kernel_regularizer=L1L2( - l1=self.kernel_l1_regularization, - l2=self.kernel_l2_regularization - ), - bias_regularizer=L1L2( - l1=self.bias_l1_regularization, - l2=self.bias_l2_regularization - ), - activity_regularizer=L1L2( - l1=self.activity_l1_regularization, - l2=self.activity_l2_regularization - ), - name=f"dense_{layer}" - )(x) - - if self.dropout > 0: - x = Dropout(self.dropout)(x) - output = Dense( - output_size, - activation="linear", - name="output" - )(x) - - self.model = Model(inputs=[spectra_input], outputs=[output]) - - def _get_info_as_dict(self): - info_dict = {} - for k, v in vars(self).items(): - if k not in self.INFO_KEYS: - continue - if isinstance(v, np.ndarray): - info_dict[k] = v.tolist() - else: - info_dict[k] = v - return info_dict - - def _get_spline_threshold_func(self): - return UnivariateSpline( - self.info["avg_snrs"], - self.info["thresholds"], - k=self.spline_k, - s=self.spline_s - ) - - def _fit_spline_threshold_func(self): - out = pd.qcut( - np.array(self.spline_snrs), - self.spline_bins, - labels=False, - ) - thresholds = [ - np.quantile(np.array(self.spline_recon_errors)[out == int(i)], 1-self.ood_fp_rate) - for i in range(self.spline_bins) - ] - avg_snrs = [ - np.mean(np.array(self.spline_snrs)[out == int(i)]) for i in range(self.spline_bins) - ] - self._update_info( - avg_snrs=avg_snrs, - thresholds=thresholds, - spline_k=self.spline_k, - spline_s=self.spline_s, - ) - - def _get_snrs(self, ss: SampleSet, bg_cps: float, is_gross: bool) -> np.ndarray: - fg_counts = ss.info.total_counts.values.astype("float64") - bg_counts = ss.info.live_time.values * bg_cps - if is_gross: - fg_counts = fg_counts - bg_counts - snrs = fg_counts / np.sqrt(bg_counts) - return snrs - - def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, - batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, - callbacks=None, patience: int = 15, es_monitor: str = "val_loss", - es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0, - normalize_sup_loss: bool = True, normalize_func=tf.math.tanh, - normalize_scaler: float = 1.0, target_level="Isotope", verbose: bool = False): - """Fit a model to the given SampleSet(s). - - Args: - seeds_ss: `SampleSet` of pure, long-collect spectra - ss: `SampleSet` of `n` gross or foreground spectra where `n` >= 1 - bg_cps: background rate assumption used for calculating SNR in spline function - using in OOD detection - is_gross: whether `ss` contains gross spectra - batch_size: number of samples per gradient update - epochs: maximum number of training iterations - validation_split: proportion of training data to use as validation data - callbacks: list of callbacks to be passed to TensorFlow Model.fit() method - patience: number of epochs to wait for `EarlyStopping` object - es_monitor: quantity to be monitored for `EarlyStopping` object - es_mode: mode for `EarlyStopping` object - es_verbose: verbosity level for `EarlyStopping` object - es_min_delta: minimum change to count as an improvement for early stopping - normalize_sup_loss: whether to normalize the supervised loss term - normalize_func: normalization function used for supervised loss term - normalize_scaler: scalar that sets the steepness of the normalization function - target_level: source level to target for model output - verbose: whether model training output is printed to the terminal - """ - spectra = ss.get_samples().astype(float) - sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T - sources = sources_df.values.astype(float) - self.sources_columns = sources_df.columns - - if verbose: - print("Building dictionary...") - - if self.source_dict is None: - self.source_dict = _get_reordered_spectra( - seeds_ss.spectra, - seeds_ss.sources, - self.sources_columns, - target_level=target_level - ).values - - if not self.model: - if verbose: - print("Initializing model...") - self._initialize_model( - (ss.n_channels,), - sources.shape[1], - ) - elif verbose: - print("Model already initialized.") - - if verbose: - print("Building loss functions...") - - self.semisup_loss_func = build_keras_semisupervised_loss_func( - self.sup_loss_func, - self.unsup_loss_func, - self.source_dict, - self.beta, - self.activation, - n_labels=sources.shape[1], - normalize=normalize_sup_loss, - normalize_func=normalize_func, - normalize_scaler=normalize_scaler - ) - - semisup_metrics = None - if self.metrics: - if verbose: - print("Building metric functions...") - semisup_metrics = [] - for each in self.metrics: - if isinstance(each, str): - semisup_metrics.append( - build_keras_semisupervised_metric_func( - tf.keras.metrics.get(each), - self.activation, - sources.shape[1] - ) - ) - else: - semisup_metrics.append( - build_keras_semisupervised_metric_func( - each, - self.activation, - sources.shape[1] - ) - ) - - self.model.compile( - loss=self.semisup_loss_func, - optimizer=self.optimizer, - metrics=semisup_metrics - ) - - es = EarlyStopping( - monitor=es_monitor, - patience=patience, - verbose=es_verbose, - restore_best_weights=True, - mode=es_mode, - min_delta=es_min_delta, - ) - - if callbacks: - callbacks.append(es) - else: - callbacks = [es] - - history = self.model.fit( - spectra, - np.append(sources, spectra, axis=1), - epochs=epochs, - verbose=verbose, - validation_split=validation_split, - callbacks=callbacks, - shuffle=True, - batch_size=batch_size - ) - - if self.fit_spline: - if verbose: - print("Finding OOD detection threshold function...") - - train_logits = self.model.predict(spectra, verbose=0) - train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32)) - self.spline_recon_errors = reconstruction_error( - tf.convert_to_tensor(spectra, dtype=tf.float32), - train_lpes, - self.source_dict, - self.unsup_loss_func - ).numpy() - self.spline_snrs = self._get_snrs(ss, bg_cps, is_gross) - self._fit_spline_threshold_func() - - info = self._get_info_as_dict() - self._update_info( - target_level=target_level, - model_outputs=sources_df.columns.values.tolist(), - normalization=ss.spectra_state, - **info, - ) - - return history - - def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verbose=False): - """Estimate the proportions of counts present in each sample of the provided SampleSet. - - Results are stored inside the SampleSet's prediction_probas property. - - Args: - ss: `SampleSet` of `n` foreground or gross spectra where `n` >= 1 - bg_cps: background rate used for estimating sample SNRs. - If background rate varies to a significant degree, split up sampleset - by SNR and make multiple calls to this method. - is_gross: whether `ss` contains gross spectra - """ - test_spectra = ss.get_samples().astype(float) - - logits = self.model.predict(test_spectra, verbose=verbose) - lpes = self.activation(tf.convert_to_tensor(logits, dtype=tf.float32)) - - col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) - col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] - ss.prediction_probas = pd.DataFrame( - data=lpes, - columns=pd.MultiIndex.from_tuples( - self.get_model_outputs_as_label_tuples(), - names=col_level_subset - ) - ) - - # Fill in unsupervised losses - recon_errors = reconstruction_error( - tf.convert_to_tensor(test_spectra, dtype=tf.float32), - lpes, - self.source_dict, - self.unsup_loss_func - ).numpy() - - if self.fit_spline: - snrs = self._get_snrs(ss, bg_cps, is_gross) - thresholds = self._get_spline_threshold_func()(snrs) - is_ood = recon_errors > thresholds - ss.info["ood"] = is_ood - - ss.info["recon_error"] = recon_errors - - -def _get_reordered_spectra(old_spectra_df: pd.DataFrame, old_sources_df: pd.DataFrame, - new_sources_columns, target_level) -> pd.DataFrame: - collapsed_sources_df = old_sources_df\ - .T.groupby(target_level)\ - .sum().T - reordered_spectra_df = old_spectra_df.iloc[ - collapsed_sources_df[ - new_sources_columns - ].idxmax() - ].reset_index(drop=True) - - return reordered_spectra_df +__all__ = ["LabelProportionEstimator", "MLPClassifier"] diff --git a/riid/models/neural_nets/arad.py b/riid/models/neural_nets/arad.py index 3679bfe6..0fd6b321 100644 --- a/riid/models/neural_nets/arad.py +++ b/riid/models/neural_nets/arad.py @@ -21,10 +21,10 @@ from scipy.spatial.distance import jensenshannon from scipy.stats import entropy -from riid.data.sampleset import SampleSet, SpectraState +from riid import SampleSet, SpectraState from riid.losses import mish -from riid.models import PyRIIDModel -from riid.models.bayes import ExpandDimsLayer +from riid.models.base import PyRIIDModel +from riid.models.layers import ExpandDimsLayer class ARADv1TF(Model): diff --git a/riid/models/neural_nets/basic.py b/riid/models/neural_nets/basic.py new file mode 100644 index 00000000..81db0962 --- /dev/null +++ b/riid/models/neural_nets/basic.py @@ -0,0 +1,197 @@ +# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +# Under the terms of Contract DE-NA0003525 with NTESS, +# the U.S. Government retains certain rights in this software. +"""This module contains a simple neural network.""" +import pandas as pd +import tensorflow as tf +from keras.api.callbacks import EarlyStopping +from keras.api.layers import Dense, Input +from keras.api.losses import CategoricalCrossentropy +from keras.api.metrics import F1Score, Precision, Recall +from keras.api.models import Model +from keras.api.optimizers import Adam +from keras.api.regularizers import l1, l2 +from keras.api.utils import split_dataset + +from riid import SampleSet, SpectraType +from riid.models.base import ModelInput, PyRIIDModel + + +class MLPClassifier(PyRIIDModel): + """Multi-layer perceptron classifier.""" + def __init__(self, activation=None, loss=None, optimizer=None, + metrics=None, l2_alpha: float = 1e-4, + activity_regularizer=None, final_activation=None): + """ + Args: + activation: activate function to use for each dense layer + loss: loss function to use for training + optimizer: tensorflow optimizer or optimizer name to use for training + metrics: list of metrics to be evaluating during training + l2_alpha: alpha value for the L2 regularization of each dense layer + activity_regularizer: regularizer function applied each dense layer output + final_activation: final activation function to apply to model output + """ + super().__init__() + + self.activation = activation + self.loss = loss + self.optimizer = optimizer + self.final_activation = final_activation + self.metrics = metrics + self.l2_alpha = l2_alpha + self.activity_regularizer = activity_regularizer + self.final_activation = final_activation + + if self.activation is None: + self.activation = "relu" + if self.loss is None: + self.loss = CategoricalCrossentropy() + if optimizer is None: + self.optimizer = Adam(learning_rate=0.01, clipnorm=0.001) + if self.metrics is None: + self.metrics = [F1Score(), Precision(), Recall()] + if self.activity_regularizer is None: + self.activity_regularizer = l1(0.0) + if self.final_activation is None: + self.final_activation = "softmax" + self.model = None + self._predict_fn = None + + def fit(self, ss: SampleSet, batch_size: int = 200, epochs: int = 20, + validation_split: float = 0.2, callbacks=None, + patience: int = 15, es_monitor: str = "val_loss", + es_mode: str = "min", es_verbose=0, target_level="Isotope", verbose: bool = False): + """Fit a model to the given `SampleSet`(s). + + Args: + ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either + foreground (AKA, "net") or gross. + batch_size: number of samples per gradient update + epochs: maximum number of training iterations + validation_split: percentage of the training data to use as validation data + callbacks: list of callbacks to be passed to the TensorFlow `Model.fit()` method + patience: number of epochs to wait for `EarlyStopping` object + es_monitor: quantity to be monitored for `EarlyStopping` object + es_mode: mode for `EarlyStopping` object + es_verbose: verbosity level for `EarlyStopping` object + target_level: `SampleSet.sources` column level to use + verbose: whether to show detailed model training output + + Returns: + `tf.History` object. + + Raises: + `ValueError` when no spectra are provided as input + """ + if ss.n_samples <= 0: + raise ValueError("No spectr[a|um] provided!") + + if ss.spectra_type == SpectraType.Gross: + self.model_inputs = (ModelInput.GrossSpectrum,) + elif ss.spectra_type == SpectraType.Foreground: + self.model_inputs = (ModelInput.ForegroundSpectrum,) + elif ss.spectra_type == SpectraType.Background: + self.model_inputs = (ModelInput.BackgroundSpectrum,) + else: + raise ValueError(f"{ss.spectra_type} is not supported in this model.") + + X = ss.get_samples() + source_contributions_df = ss.sources.T.groupby(target_level, sort=False).sum().T + model_outputs = source_contributions_df.columns.values.tolist() + Y = source_contributions_df.values + + spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) + labels_tensor = tf.convert_to_tensor(Y, dtype=tf.float32) + training_dataset = tf.data.Dataset.from_tensor_slices((spectra_tensor, labels_tensor)) + training_dataset, validation_dataset = split_dataset( + training_dataset, + left_size=validation_split, + shuffle=True + ) + training_dataset = training_dataset.batch(batch_size=batch_size) + validation_dataset = validation_dataset.batch(batch_size=batch_size) + + if not self.model: + inputs = Input(shape=(X.shape[1],), name="Spectrum") + dense_layer_size = X.shape[1] // 2 + dense_layer = Dense( + dense_layer_size, + activation=self.activation, + activity_regularizer=self.activity_regularizer, + kernel_regularizer=l2(self.l2_alpha), + )(inputs) + outputs = Dense(Y.shape[1], activation=self.final_activation)(dense_layer) + self.model = Model(inputs, outputs) + self.model.compile(loss=self.loss, optimizer=self.optimizer, + metrics=self.metrics) + + es = EarlyStopping( + monitor=es_monitor, + patience=patience, + verbose=es_verbose, + restore_best_weights=True, + mode=es_mode, + ) + if callbacks: + callbacks.append(es) + else: + callbacks = [es] + + history = self.model.fit( + training_dataset, + epochs=epochs, + verbose=verbose, + validation_data=validation_dataset, + callbacks=callbacks, + ) + + # Update model information + self._update_info( + target_level=target_level, + model_outputs=model_outputs, + normalization=ss.spectra_state, + ) + + # Define the predict function with tf.function and input_signature + self._predict_fn = tf.function( + self._predict, + # input_signature=[tf.TensorSpec(shape=[None, X.shape[1]], dtype=tf.float32)] + experimental_relax_shapes=True + ) + + return history + + def _predict(self, input_tensor): + return self.model(input_tensor, training=False) + + def predict(self, ss: SampleSet, bg_ss: SampleSet = None): + """Classify the spectra in the provided `SampleSet`(s). + + Results are stored inside the first SampleSet's prediction-related properties. + + Args: + ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are either + foreground (AKA, "net") or gross + bg_ss: `SampleSet` of `n` spectra where `n` >= 1 and the spectra are background + """ + x_test = ss.get_samples().astype(float) + if bg_ss: + X = [x_test, bg_ss.get_samples().astype(float)] + else: + X = x_test + + spectra_tensor = tf.convert_to_tensor(X, dtype=tf.float32) + results = self._predict_fn(spectra_tensor) + + col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) + col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] + ss.prediction_probas = pd.DataFrame( + data=results, + columns=pd.MultiIndex.from_tuples( + self.get_model_outputs_as_label_tuples(), + names=col_level_subset + ) + ) + + ss.classified_by = self.model_id diff --git a/riid/models/neural_nets/lpe.py b/riid/models/neural_nets/lpe.py new file mode 100644 index 00000000..346d5b3f --- /dev/null +++ b/riid/models/neural_nets/lpe.py @@ -0,0 +1,489 @@ +# Copyright 2021 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +# Under the terms of Contract DE-NA0003525 with NTESS, +# the U.S. Government retains certain rights in this software. +"""This module contains the label proportion estimator.""" + +import keras +import numpy as np +import pandas as pd +import tensorflow as tf +from keras.api.activations import sigmoid, softmax +from keras.api.callbacks import EarlyStopping +from keras.api.layers import Dense, Dropout, Input +from keras.api.losses import CategoricalCrossentropy, MeanSquaredError +from keras.api.models import Model +from keras.api.regularizers import L1L2 +from scipy.interpolate import UnivariateSpline + +from riid import SampleSet +from riid.losses import (build_keras_semisupervised_loss_func, + chi_squared_diff, jensen_shannon_divergence, + normal_nll_diff, poisson_nll_diff, + reconstruction_error, sse_diff, weighted_sse_diff) +from riid.losses.sparsemax import SparsemaxLoss, sparsemax +from riid.metrics import build_keras_semisupervised_metric_func +from riid.models.base import PyRIIDModel +from riid.models.layers import L1NormLayer + + +class LabelProportionEstimator(PyRIIDModel): + """Regressor for predicting label proportions that uses a semi-supervised loss. + + Optionally, a U-spline-based out-of-distribution detection model can be fit to target a desired + false positive rate. + """ + UNSUPERVISED_LOSS_FUNCS = { + "poisson_nll": poisson_nll_diff, + "normal_nll": normal_nll_diff, + "sse": sse_diff, + "weighted_sse": weighted_sse_diff, + "jsd": jensen_shannon_divergence, + "chi_squared": chi_squared_diff + } + SUPERVISED_LOSS_FUNCS = { + "sparsemax": ( + SparsemaxLoss, + { + "from_logits": True, + "reduction": tf.keras.losses.Reduction.NONE, + }, + sparsemax, + ), + "categorical_crossentropy": ( + CategoricalCrossentropy, + { + "from_logits": True, + "reduction": tf.keras.losses.Reduction.NONE, + }, + softmax, + ), + "mse": ( + MeanSquaredError, + { + "reduction": tf.keras.losses.Reduction.NONE, + }, + sigmoid, + ) + } + INFO_KEYS = ( + # model architecture + "hidden_layers", + "learning_rate", + "epsilon", + "sup_loss", + "unsup_loss", + "metrics", + "beta", + "hidden_layer_activation", + "kernel_l1_regularization", + "kernel_l2_regularization", + "bias_l1_regularization", + "bias_l2_regularization", + "activity_l1_regularization", + "activity_l2_regularization", + "dropout", + "ood_fp_rate", + "fit_spline", + "spline_bins", + "spline_k", + "spline_s", + # dictionaries + "source_dict", + # populated when loading model + "spline_snrs", + "spline_recon_errors", + ) + + def __init__(self, hidden_layers: tuple = (256,), sup_loss="sparsemax", unsup_loss="sse", + metrics: list = ["mae", "categorical_crossentropy"], beta=0.9, source_dict=None, + optimizer="adam", optimizer_kwargs=None, learning_rate: float = 1e-3, + hidden_layer_activation: str = "mish", + kernel_l1_regularization: float = 0.0, kernel_l2_regularization: float = 0.0, + bias_l1_regularization: float = 0.0, bias_l2_regularization: float = 0.0, + activity_l1_regularization: float = 0.0, activity_l2_regularization: float = 0.0, + dropout: float = 0.0, ood_fp_rate: float = 0.05, + fit_spline: bool = True, spline_bins: int = 15, spline_k: int = 3, + spline_s: int = 0, spline_snrs=None, spline_recon_errors=None): + """ + Args: + hidden_layers: tuple defining the number and size of dense layers + sup_loss: supervised loss function to use for training + unsup_loss: unsupervised loss function to use for training the + foreground branch of the network (options: "sse", "poisson_nll", + "normal_nll", "weighted_sse", "jsd", or "chi_squared") + metrics: list of metrics to be evaluating during training + beta: tradeoff parameter between the supervised and unsupervised foreground loss + source_dict: 2D array of pure, long-collect foreground spectra + optimizer: tensorflow optimizer or optimizer name to use for training + optimizer_kwargs: kwargs for optimizer + learning_rate: learning rate for the optimizer + hidden_layer_activation: activation function to use for each dense layer + kernel_l1_regularization: l1 regularization value for the kernel regularizer + kernel_l2_regularization: l2 regularization value for the kernel regularizer + bias_l1_regularization: l1 regularization value for the bias regularizer + bias_l2_regularization: l2 regularization value for the bias regularizer + activity_l1_regularization: l1 regularization value for the activity regularizer + activity_l2_regularization: l2 regularization value for the activity regularizer + dropout: amount of dropout to apply to each dense layer + ood_fp_rate: false positive rate used to determine threshold for + out-of-distribution (OOD) detection + fit_spline: whether or not to fit UnivariateSpline for OOD threshold function + spline_bins: number of bins used when fitting the UnivariateSpline threshold + function for OOD detection + spline_k: degree of smoothing for the UnivariateSpline + spline_s: positive smoothing factor used to choose the number of knots in the + UnivariateSpline (s=0 forces the spline through all the datapoints, equivalent to + InterpolatedUnivariateSpline) + spline_snrs: SNRs from training used as the x-values to fit the UnivariateSpline + spline_recon_errors: reconstruction errors from training used as the y-values to + fit the UnivariateSpline + """ + super().__init__() + + self.hidden_layers = hidden_layers + self.sup_loss = sup_loss + self.unsup_loss = unsup_loss + self.sup_loss_func, self.activation = self._get_sup_loss_func( + sup_loss, + prefix="sup" + ) + self.sup_loss_func_name = self.sup_loss_func.name + + self.optimizer = optimizer + if isinstance(optimizer, str): + self.optimizer = keras.optimizers.get(optimizer) + if optimizer_kwargs is not None: + for key, value in optimizer_kwargs.items(): + setattr(self.optimizer, key, value) + self.optimizer.learning_rate = learning_rate + + self.unsup_loss_func = self._get_unsup_loss_func(unsup_loss) + self.unsup_loss_func_name = f"unsup_{unsup_loss}_loss" + self.metrics = metrics + self.beta = beta + self.source_dict = source_dict + self.semisup_loss_func_name = "semisup_loss" + self.hidden_layer_activation = hidden_layer_activation + self.kernel_l1_regularization = kernel_l1_regularization + self.kernel_l2_regularization = kernel_l2_regularization + self.bias_l1_regularization = bias_l1_regularization + self.bias_l2_regularization = bias_l2_regularization + self.activity_l1_regularization = activity_l1_regularization + self.activity_l2_regularization = activity_l2_regularization + self.dropout = dropout + self.ood_fp_rate = ood_fp_rate + self.fit_spline = fit_spline + self.spline_bins = spline_bins + self.spline_k = spline_k + self.spline_s = spline_s + self.spline_snrs = spline_snrs + self.spline_recon_errors = spline_recon_errors + self.model = None + + self._update_custom_objects("L1NormLayer", L1NormLayer) + + @property + def source_dict(self) -> dict: + return self.info["source_dict"] + + @source_dict.setter + def source_dict(self, value: dict): + self.info["source_dict"] = value + + def _get_sup_loss_func(self, loss_func_str, prefix): + if loss_func_str not in self.SUPERVISED_LOSS_FUNCS: + raise KeyError(f"'{loss_func_str}' is not a supported supervised loss function.") + func, kwargs, activation = self.SUPERVISED_LOSS_FUNCS[loss_func_str] + loss_func_name = f"{prefix}_{loss_func_str}_loss" + return func(name=loss_func_name, **kwargs), activation + + def _get_unsup_loss_func(self, loss_func_str): + if loss_func_str not in self.UNSUPERVISED_LOSS_FUNCS: + raise KeyError(f"'{loss_func_str}' is not a supported unsupervised loss function.") + return self.UNSUPERVISED_LOSS_FUNCS[loss_func_str] + + def _initialize_model(self, input_size, output_size): + spectra_input = Input(input_size, name="input_spectrum") + spectra_norm = L1NormLayer(name="normalized_input_spectrum")(spectra_input) + x = spectra_norm + for layer, nodes in enumerate(self.hidden_layers): + x = Dense( + nodes, + activation=self.hidden_layer_activation, + kernel_regularizer=L1L2( + l1=self.kernel_l1_regularization, + l2=self.kernel_l2_regularization + ), + bias_regularizer=L1L2( + l1=self.bias_l1_regularization, + l2=self.bias_l2_regularization + ), + activity_regularizer=L1L2( + l1=self.activity_l1_regularization, + l2=self.activity_l2_regularization + ), + name=f"dense_{layer}" + )(x) + + if self.dropout > 0: + x = Dropout(self.dropout)(x) + output = Dense( + output_size, + activation="linear", + name="output" + )(x) + + self.model = Model(inputs=[spectra_input], outputs=[output]) + + def _get_info_as_dict(self): + info_dict = {} + for k, v in vars(self).items(): + if k not in self.INFO_KEYS: + continue + if isinstance(v, np.ndarray): + info_dict[k] = v.tolist() + else: + info_dict[k] = v + return info_dict + + def _get_spline_threshold_func(self): + return UnivariateSpline( + self.info["avg_snrs"], + self.info["thresholds"], + k=self.spline_k, + s=self.spline_s + ) + + def _fit_spline_threshold_func(self): + out = pd.qcut( + np.array(self.spline_snrs), + self.spline_bins, + labels=False, + ) + thresholds = [ + np.quantile(np.array(self.spline_recon_errors)[out == int(i)], 1-self.ood_fp_rate) + for i in range(self.spline_bins) + ] + avg_snrs = [ + np.mean(np.array(self.spline_snrs)[out == int(i)]) for i in range(self.spline_bins) + ] + self._update_info( + avg_snrs=avg_snrs, + thresholds=thresholds, + spline_k=self.spline_k, + spline_s=self.spline_s, + ) + + def _get_snrs(self, ss: SampleSet, bg_cps: float, is_gross: bool) -> np.ndarray: + fg_counts = ss.info.total_counts.values.astype("float64") + bg_counts = ss.info.live_time.values * bg_cps + if is_gross: + fg_counts = fg_counts - bg_counts + snrs = fg_counts / np.sqrt(bg_counts) + return snrs + + def fit(self, seeds_ss: SampleSet, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, + batch_size: int = 10, epochs: int = 20, validation_split: float = 0.2, + callbacks=None, patience: int = 15, es_monitor: str = "val_loss", + es_mode: str = "min", es_verbose=0, es_min_delta: float = 0.0, + normalize_sup_loss: bool = True, normalize_func=tf.math.tanh, + normalize_scaler: float = 1.0, target_level="Isotope", verbose: bool = False): + """Fit a model to the given SampleSet(s). + + Args: + seeds_ss: `SampleSet` of pure, long-collect spectra + ss: `SampleSet` of `n` gross or foreground spectra where `n` >= 1 + bg_cps: background rate assumption used for calculating SNR in spline function + using in OOD detection + is_gross: whether `ss` contains gross spectra + batch_size: number of samples per gradient update + epochs: maximum number of training iterations + validation_split: proportion of training data to use as validation data + callbacks: list of callbacks to be passed to TensorFlow Model.fit() method + patience: number of epochs to wait for `EarlyStopping` object + es_monitor: quantity to be monitored for `EarlyStopping` object + es_mode: mode for `EarlyStopping` object + es_verbose: verbosity level for `EarlyStopping` object + es_min_delta: minimum change to count as an improvement for early stopping + normalize_sup_loss: whether to normalize the supervised loss term + normalize_func: normalization function used for supervised loss term + normalize_scaler: scalar that sets the steepness of the normalization function + target_level: source level to target for model output + verbose: whether model training output is printed to the terminal + """ + spectra = ss.get_samples().astype(float) + sources_df = ss.sources.T.groupby(target_level, sort=False).sum().T + sources = sources_df.values.astype(float) + self.sources_columns = sources_df.columns + + if verbose: + print("Building dictionary...") + + if self.source_dict is None: + self.source_dict = _get_reordered_spectra( + seeds_ss.spectra, + seeds_ss.sources, + self.sources_columns, + target_level=target_level + ).values + + if not self.model: + if verbose: + print("Initializing model...") + self._initialize_model( + (ss.n_channels,), + sources.shape[1], + ) + elif verbose: + print("Model already initialized.") + + if verbose: + print("Building loss functions...") + + self.semisup_loss_func = build_keras_semisupervised_loss_func( + self.sup_loss_func, + self.unsup_loss_func, + self.source_dict, + self.beta, + self.activation, + n_labels=sources.shape[1], + normalize=normalize_sup_loss, + normalize_func=normalize_func, + normalize_scaler=normalize_scaler + ) + + semisup_metrics = None + if self.metrics: + if verbose: + print("Building metric functions...") + semisup_metrics = [] + for each in self.metrics: + if isinstance(each, str): + semisup_metrics.append( + build_keras_semisupervised_metric_func( + tf.keras.metrics.get(each), + self.activation, + sources.shape[1] + ) + ) + else: + semisup_metrics.append( + build_keras_semisupervised_metric_func( + each, + self.activation, + sources.shape[1] + ) + ) + + self.model.compile( + loss=self.semisup_loss_func, + optimizer=self.optimizer, + metrics=semisup_metrics + ) + + es = EarlyStopping( + monitor=es_monitor, + patience=patience, + verbose=es_verbose, + restore_best_weights=True, + mode=es_mode, + min_delta=es_min_delta, + ) + + if callbacks: + callbacks.append(es) + else: + callbacks = [es] + + history = self.model.fit( + spectra, + np.append(sources, spectra, axis=1), + epochs=epochs, + verbose=verbose, + validation_split=validation_split, + callbacks=callbacks, + shuffle=True, + batch_size=batch_size + ) + + if self.fit_spline: + if verbose: + print("Finding OOD detection threshold function...") + + train_logits = self.model.predict(spectra, verbose=0) + train_lpes = self.activation(tf.convert_to_tensor(train_logits, dtype=tf.float32)) + self.spline_recon_errors = reconstruction_error( + tf.convert_to_tensor(spectra, dtype=tf.float32), + train_lpes, + self.source_dict, + self.unsup_loss_func + ).numpy() + self.spline_snrs = self._get_snrs(ss, bg_cps, is_gross) + self._fit_spline_threshold_func() + + info = self._get_info_as_dict() + self._update_info( + target_level=target_level, + model_outputs=sources_df.columns.values.tolist(), + normalization=ss.spectra_state, + **info, + ) + + return history + + def predict(self, ss: SampleSet, bg_cps: int = 300, is_gross: bool = False, verbose=False): + """Estimate the proportions of counts present in each sample of the provided SampleSet. + + Results are stored inside the SampleSet's prediction_probas property. + + Args: + ss: `SampleSet` of `n` foreground or gross spectra where `n` >= 1 + bg_cps: background rate used for estimating sample SNRs. + If background rate varies to a significant degree, split up sampleset + by SNR and make multiple calls to this method. + is_gross: whether `ss` contains gross spectra + """ + test_spectra = ss.get_samples().astype(float) + + logits = self.model.predict(test_spectra, verbose=verbose) + lpes = self.activation(tf.convert_to_tensor(logits, dtype=tf.float32)) + + col_level_idx = SampleSet.SOURCES_MULTI_INDEX_NAMES.index(self.target_level) + col_level_subset = SampleSet.SOURCES_MULTI_INDEX_NAMES[:col_level_idx+1] + ss.prediction_probas = pd.DataFrame( + data=lpes, + columns=pd.MultiIndex.from_tuples( + self.get_model_outputs_as_label_tuples(), + names=col_level_subset + ) + ) + + # Fill in unsupervised losses + recon_errors = reconstruction_error( + tf.convert_to_tensor(test_spectra, dtype=tf.float32), + lpes, + self.source_dict, + self.unsup_loss_func + ).numpy() + + if self.fit_spline: + snrs = self._get_snrs(ss, bg_cps, is_gross) + thresholds = self._get_spline_threshold_func()(snrs) + is_ood = recon_errors > thresholds + ss.info["ood"] = is_ood + + ss.info["recon_error"] = recon_errors + + +def _get_reordered_spectra(old_spectra_df: pd.DataFrame, old_sources_df: pd.DataFrame, + new_sources_columns, target_level) -> pd.DataFrame: + collapsed_sources_df = old_sources_df\ + .T.groupby(target_level)\ + .sum().T + reordered_spectra_df = old_spectra_df.iloc[ + collapsed_sources_df[ + new_sources_columns + ].idxmax() + ].reset_index(drop=True) + + return reordered_spectra_df diff --git a/riid/visualize.py b/riid/visualize.py index 81dc3c3d..ef72aa93 100644 --- a/riid/visualize.py +++ b/riid/visualize.py @@ -15,7 +15,7 @@ from seaborn import heatmap from sklearn.metrics import confusion_matrix as confusion_matrix_sklearn -from riid.data.sampleset import SampleSet +from riid import SampleSet # DO NOT TOUCH what is set below nor override them inside a function. plt.style.use("default") diff --git a/tests/anomaly_tests.py b/tests/anomaly_tests.py index 35143a1d..daddb95d 100644 --- a/tests/anomaly_tests.py +++ b/tests/anomaly_tests.py @@ -6,10 +6,8 @@ import numpy as np +from riid import PassbySynthesizer, SeedMixer, get_dummy_seeds from riid.anomaly import PoissonNChannelEventDetector -from riid.data.synthetic.passby import PassbySynthesizer -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic import get_dummy_seeds class TestAnomaly(unittest.TestCase): diff --git a/tests/data_tests.py b/tests/data_tests.py index 022bedb7..cc8a1c90 100644 --- a/tests/data_tests.py +++ b/tests/data_tests.py @@ -6,10 +6,10 @@ import tempfile import unittest -from riid import SAMPLESET_HDF_FILE_EXTENSION +from riid import (SAMPLESET_HDF_FILE_EXTENSION, SampleSet, get_dummy_seeds, + read_hdf) from riid.data.labeling import label_to_index_element -from riid.data.sampleset import SampleSet, _write_hdf, read_hdf -from riid.data.synthetic import get_dummy_seeds +from riid.data.sampleset import _write_hdf class TestData(unittest.TestCase): diff --git a/tests/gadras_tests.py b/tests/gadras_tests.py index 9210312d..680048eb 100644 --- a/tests/gadras_tests.py +++ b/tests/gadras_tests.py @@ -5,7 +5,8 @@ import unittest import pandas as pd -from riid.data.synthetic import get_dummy_seeds + +from riid import get_dummy_seeds from riid.gadras.pcf import (_pack_compressed_text_buffer, _unpack_compressed_text_buffer) diff --git a/tests/model_tests.py b/tests/model_tests.py index ea430abc..95ff6897 100644 --- a/tests/model_tests.py +++ b/tests/model_tests.py @@ -8,15 +8,12 @@ import numpy as np import pandas as pd -from riid.data.sampleset import SampleSet -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer -from riid.models import PyRIIDModel -from riid.models.bayes import (NegativeSpectrumError, PoissonBayesClassifier, - ZeroTotalCountsError) -from riid.models.neural_nets import (LabelProportionEstimator, MLPClassifier) -from riid.models.neural_nets.arad import ARADLatentPredictor, ARADv1, ARADv2 +from riid import SampleSet, SeedMixer, StaticSynthesizer, get_dummy_seeds +from riid.models import (ARADLatentPredictor, ARADv1, ARADv2, + LabelProportionEstimator, MLPClassifier, + PoissonBayesClassifier) +from riid.models.base import PyRIIDModel +from riid.models.bayes import NegativeSpectrumError, ZeroTotalCountsError class TestModels(unittest.TestCase): diff --git a/tests/sampleset_tests.py b/tests/sampleset_tests.py index 63a5a969..887f5308 100644 --- a/tests/sampleset_tests.py +++ b/tests/sampleset_tests.py @@ -8,13 +8,11 @@ import numpy as np import pandas as pd +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds from riid.data.sampleset import (ChannelCountMismatchError, InvalidSampleCountError, SampleSet, SpectraState, SpectraStateMismatchError, SpectraType, _get_row_labels) -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer class TestSampleSet(unittest.TestCase): diff --git a/tests/seedmixer_tests.py b/tests/seedmixer_tests.py index e986f891..00ac6280 100644 --- a/tests/seedmixer_tests.py +++ b/tests/seedmixer_tests.py @@ -6,10 +6,8 @@ import numpy as np from scipy.spatial.distance import jensenshannon -from riid.data.sampleset import SampleSet -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer +from riid import SampleSet, SeedMixer, get_dummy_seeds class TestSeedMixer(unittest.TestCase): diff --git a/tests/staticsynth_tests.py b/tests/staticsynth_tests.py index 91684708..afbd297a 100644 --- a/tests/staticsynth_tests.py +++ b/tests/staticsynth_tests.py @@ -7,12 +7,11 @@ import numpy as np import pandas as pd +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds from riid.data import InvalidSeedError, get_expected_spectra -from riid.data.synthetic import (Synthesizer, get_dummy_seeds, - get_merged_sources_samplewise, - get_samples_per_seed) -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer +from riid.data.synthetic.base import (Synthesizer, + get_merged_sources_samplewise, + get_samples_per_seed) class TestStaticSynthesis(unittest.TestCase): diff --git a/tests/visualize_tests.py b/tests/visualize_tests.py index 151e9f48..ec318227 100644 --- a/tests/visualize_tests.py +++ b/tests/visualize_tests.py @@ -6,11 +6,9 @@ import numpy as np -from riid.data.synthetic import get_dummy_seeds -from riid.data.synthetic.seed import SeedMixer -from riid.data.synthetic.static import StaticSynthesizer +from riid import SeedMixer, StaticSynthesizer, get_dummy_seeds from riid.metrics import precision_recall_curve -from riid.models.neural_nets import MLPClassifier +from riid.models import MLPClassifier from riid.visualize import (plot_correlation_between_all_labels, plot_count_rate_history, plot_label_and_prediction_distributions,