From 27ba9178115dc68b100e0b756bce92c72e157a55 Mon Sep 17 00:00:00 2001 From: styagi130 Date: Thu, 24 Aug 2023 22:58:12 +0530 Subject: [PATCH] specify explicitly to set pretrained model paths (#7305) Signed-off-by: Siddharth Tyagi Co-authored-by: Siddharth Tyagi --- .../tts/FastPitch_Adapter_Finetuning.ipynb | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb index 0499c12c90ec1..263d22b60599d 100644 --- a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -80,6 +80,16 @@ "!wandb login #PASTE_WANDB_APIKEY_HERE" ] }, + { + "cell_type": "markdown", + "id": "b73283fc", + "metadata": {}, + "source": [ + "## Set finetuning params\n", + "\n", + "This notebook expects a pretrained model to finetune. If you have a pretrained multispeaker checkpoint, set the path in next block to the path of pretrained checkpoint. You can also pretrain a multispeaker adapter checkpoint using the [FastPitch_MultiSpeaker_Pretraining tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb)." + ] + }, { "cell_type": "code", "execution_count": null, @@ -88,8 +98,9 @@ "outputs": [], "source": [ "# .nemo files for your pre-trained FastPitch and HiFiGAN\n", - "pretrained_fastpitch_checkpoint = \"\"\n", - "finetuned_hifigan_on_multispeaker_checkpoint = \"\"" + "pretrained_fastpitch_checkpoint = \"\"\n", + "finetuned_hifigan_on_multispeaker_checkpoint = \"\"\n", + "use_ipa = True #Set to False while using Arpabet." ] }, { @@ -430,12 +441,15 @@ "metadata": {}, "outputs": [], "source": [ - "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"ipa_cmudict-0.7b_nv23.01.txt\"))\n", + "phone_dict_name = \"ipa_cmudict-0.7b_nv23.01.txt\" if use_ipa else \"cmudict-0.7b_nv22.10\"\n", + "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", phone_dict_name))\n", "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", "\n", "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to override pitch_mean and pitch_std configs below.\n", "PITCH_MEAN=175.48513793945312\n", - "PITCH_STD=42.3786735534668" + "PITCH_STD=42.3786735534668\n", + "\n", + "config_filename = \"fastpitch_align_ipa_adapter.yaml\" if use_ipa else \"fastpitch_align_44100_adapter.yaml\"" ] }, { @@ -468,7 +482,7 @@ "source": [ "# Normally 200 epochs\n", "!cd {code_dir} && python examples/tts/fastpitch_finetune_adapters.py \\\n", - "--config-name=fastpitch_align_ipa_adapter.yaml \\\n", + "--config-name={config_filename} \\\n", "+init_from_nemo_model={pretrained_fastpitch_checkpoint} \\\n", "train_dataset={train_manifest} \\\n", "validation_datasets={valid_manifest} \\\n",