From 9fa268e4119e5fcf2de644ebd061361360836417 Mon Sep 17 00:00:00 2001 From: Jon Barker Date: Thu, 4 Apr 2024 20:41:30 +0100 Subject: [PATCH 1/2] Fix to the CEC3 baseline recipes The amplifier was appearing in the 'evaluate' stage, which was how things were done for ICASSP 2023 where teams were not expected to change it. It has now been moved into the 'enhance' stage so that it can be changed by the teams. Note, this has not changed the scores. The enhance + evaluate still perform the same operations as before. ie, enhance + (amplify + score) => (enhance + amplify) + score --- recipes/cec3/README.md | 19 ++++++++++---- recipes/cec3/baseline/enhance.py | 39 +++++++++++++++++++++++----- recipes/cec3/baseline/evaluate.py | 42 +++---------------------------- 3 files changed, 51 insertions(+), 49 deletions(-) diff --git a/recipes/cec3/README.md b/recipes/cec3/README.md index 8d8b6c4d..2b51b0f5 100644 --- a/recipes/cec3/README.md +++ b/recipes/cec3/README.md @@ -65,22 +65,31 @@ These can be set in the `config.yaml` file or provided on the command line. In t The baseline enhancement simply takes the 6-channel hearing aid inputs and reduces this to a stereo hearing aid output by passing through the 'front' microphone signal of the left and right ear. -Alternatively, you can provide the root variable on the command line, e.g., +The stereo pair is then passed through a provided hearing aid amplification stage using a NAL-R [[1](#references)] fitting amplification and a simple automatic gain compressor. The amplification is determined by the audiograms defined by the scene-listener pairs in `clarity_data/metadata/scenes_listeners.dev.json` for the development set. After amplification, the evaluate function calculates the better-ear HASPI [[2](#references)]. + +To run the baseline enhancement system use, first set the `task`, `path.root` and `path.exp` variables in the `config.yaml` file and then run, + +```bash +python enhance.py +``` + +Alternatively, you can provide the task and paths on the command line, e.g., ```bash python enhance.py task=task1 path.root=/Users/jon/clarity_CEC3_data path.exp=/Users/jon/exp ``` -Where '/Users/jon' is replaced with the path to the root of the clarity data and the experiment folder. +Where `/Users/jon` is replaced with the path to the root of the clarity data and the experiment folder. -The folder `enhanced_signals` will appear in the `exp` folder. Note, the experiment folder will be created if it does not already exist. +The folders `enhanced_signals` and `amplified_signals` will appear in the `exp` folder. Note, the experiment folder will be created if it does not already exist. ### Evaluation -The `evaluate.py` will first pass signals through a provided hearing aid amplification stage using a NAL-R [[1](#references)] fitting amplification and a simple automatic gain compressor. The amplification is determined by the audiograms defined by the scene-listener pairs in `clarity_data/metadata/scenes_listeners.dev.json` for the development set. After amplification, the evaluate function calculates the better-ear HASPI [[2](#references)]. +The evaluate script computes the HASPI scores for the signals stored in the `amplified_signals` folder. The script will read the scene-listener pairs from the development set and calculate the HASPI score for each pair. The final score is the mean HASPI score across all pairs. It can be run as, ```bash -python evaluate.py +python evaluate.py task=task1 path.root=/Users/jon/clarity_CEC3_data path.exp=/Users/jon/exp + ``` The full evaluation set is 7500 scene-listener pairs and will take a long time to run, i.e., around 8 hours on a MacBook Pro. A standard small set which uses 1/15 of the data has been defined. This takes around 30 minutes to evaluate and can be run with, diff --git a/recipes/cec3/baseline/enhance.py b/recipes/cec3/baseline/enhance.py index 626f6ecd..b6f0464f 100644 --- a/recipes/cec3/baseline/enhance.py +++ b/recipes/cec3/baseline/enhance.py @@ -10,12 +10,22 @@ from scipy.io import wavfile from tqdm import tqdm -from clarity.utils.audiogram import Listener +from clarity.enhancer.compressor import Compressor +from clarity.enhancer.nalr import NALR +from clarity.utils.audiogram import Audiogram, Listener from recipes.icassp_2023.baseline.evaluate import make_scene_listener_list logger = logging.getLogger(__name__) +def amplify_signal(signal, audiogram: Audiogram, enhancer, compressor): + """Amplify signal for a given audiogram""" + nalr_fir, _ = enhancer.build(audiogram) + out = enhancer.apply(nalr_fir, signal) + out, _, _ = compressor.process(out) + return out + + @hydra.main(config_path=".", config_name="config") def enhance(cfg: DictConfig) -> None: """Run the dummy enhancement.""" @@ -27,6 +37,10 @@ def enhance(cfg: DictConfig) -> None: scenes_listeners = json.load(fp) listener_dict = Listener.load_listener_dict(cfg.path.listeners_file) + enhancer = NALR(**cfg.nalr) + compressor = Compressor(**cfg.compressor) + amplified_folder = pathlib.Path(cfg.path.exp) / "amplified_signals" + amplified_folder.mkdir(parents=True, exist_ok=True) # Make list of all scene listener pairs that will be run scene_listener_pairs = make_scene_listener_list( @@ -56,14 +70,27 @@ def enhance(cfg: DictConfig) -> None: # pylint: disable=unused-variable listener = listener_dict[listener_id] # noqa: F841 - # Note: The audiograms are stored in the listener object, - # but they are not needed for the baseline + wavfile.write( + enhanced_folder / f"{scene}_{listener_id}_enhanced.wav", sample_rate, signal + ) + + # Apply the baseline NALR amplification - # Baseline just reads the signal from the front microphone pair - # and write it out as the enhanced signal + out_l = amplify_signal( + signal[:, 0], listener.audiogram_left, enhancer, compressor + ) + out_r = amplify_signal( + signal[:, 1], listener.audiogram_right, enhancer, compressor + ) + amplified = np.stack([out_l, out_r], axis=1) + + if cfg.soft_clip: + amplified = np.tanh(amplified) wavfile.write( - enhanced_folder / f"{scene}_{listener_id}_enhanced.wav", sample_rate, signal + amplified_folder / f"{scene}_{listener_id}_HA-output.wav", + sample_rate, + amplified.astype(np.float32), ) diff --git a/recipes/cec3/baseline/evaluate.py b/recipes/cec3/baseline/evaluate.py index c3b9d620..c352a697 100644 --- a/recipes/cec3/baseline/evaluate.py +++ b/recipes/cec3/baseline/evaluate.py @@ -13,22 +13,12 @@ from scipy.io import wavfile from tqdm import tqdm -from clarity.enhancer.compressor import Compressor -from clarity.enhancer.nalr import NALR from clarity.evaluator.haspi import haspi_v2_be -from clarity.utils.audiogram import Audiogram, Listener +from clarity.utils.audiogram import Listener logger = logging.getLogger(__name__) -def amplify_signal(signal, audiogram: Audiogram, enhancer, compressor): - """Amplify signal for a given audiogram""" - nalr_fir, _ = enhancer.build(audiogram) - out = enhancer.apply(nalr_fir, signal) - out, _, _ = compressor.process(out) - return out - - def set_scene_seed(scene): """Set a seed that is unique for the given scene""" scene_encoded = hashlib.md5(scene.encode("utf-8")).hexdigest() @@ -100,16 +90,12 @@ def run_calculate_si(cfg: DictConfig) -> None: scenes_listeners = json.load(fp) listeners_dict = Listener.load_listener_dict(cfg.path.listeners_file) - enhancer = NALR(**cfg.nalr) - compressor = Compressor(**cfg.compressor) - enhanced_folder = pathlib.Path(cfg.path.exp) / "enhanced_signals" amplified_folder = pathlib.Path(cfg.path.exp) / "amplified_signals" scenes_folder = pathlib.Path(cfg.path.scenes_folder) amplified_folder.mkdir(parents=True, exist_ok=True) # Make list of all scene listener pairs that will be run - scene_listener_pairs = make_scene_listener_list( scenes_listeners, cfg.evaluate.small_test ) @@ -135,9 +121,8 @@ def run_calculate_si(cfg: DictConfig) -> None: set_scene_seed(scene) # Read signals - sr_signal, signal = wavfile.read( - enhanced_folder / f"{scene}_{listener_id}_enhanced.wav" + amplified_folder / f"{scene}_{listener_id}_HA-output.wav", ) _, reference = wavfile.read(scenes_folder / f"{scene}_reference.wav") @@ -147,30 +132,11 @@ def run_calculate_si(cfg: DictConfig) -> None: reference = reference / 32768.0 - # amplify left and right ear signals + # Evaluate the HA-output signals listener = listeners_dict[listener_id] - out_l = amplify_signal( - signal[:, 0], listener.audiogram_left, enhancer, compressor - ) - out_r = amplify_signal( - signal[:, 1], listener.audiogram_right, enhancer, compressor - ) - amplified = np.stack([out_l, out_r], axis=1) - - if cfg.soft_clip: - amplified = np.tanh(amplified) - - wavfile.write( - amplified_folder / f"{scene}_{listener_id}_HA-output.wav", - sr_signal, - amplified.astype(np.float32), - ) - - # Evaluate the amplified signal - haspi_score = compute_metric( - haspi_v2_be, amplified, reference, listener, sr_signal + haspi_v2_be, signal, reference, listener, sr_signal ) results_file.add_result(scene, listener_id, haspi_score) From 98e615aa705c19709a2b0b0662bdb6bdcce24c18 Mon Sep 17 00:00:00 2001 From: Jon Barker Date: Thu, 4 Apr 2024 21:13:15 +0100 Subject: [PATCH 2/2] the new scipy 1.13 has breaking changes. Block for now --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1f9fa9d2..5f364040 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "pytorch-lightning", "resampy", "scikit-learn>=1.0.2", - "scipy>=1.7.3", + "scipy>=1.7.3, <1.13.0", "SoundFile>=0.10.3.post1", "soxr", "torch>=2",