diff --git a/colab.ipynb b/colab.ipynb index 33b0697..af154c6 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","metadata":{"id":"__mzyUl4-S-B"},"source":["# Omniscient Mozart\n","\n","This is a colab for demonstrating the python package `omnizart` developed by [MCTLab](https://sites.google.com/view/mctl/home).\n","\n","Github repository can be found in [Music-and-Culture-Technology-Lab/omnizart](https://github.com/Music-and-Culture-Technology-Lab/omnizart).\n","\n","Official documentation page can be found in [omnizart-doc](https://music-and-culture-technology-lab.github.io/omnizart-doc/)"]},{"cell_type":"markdown","metadata":{},"source":["## Environment Setup\n","\n","**IMPORTANT!** Don't restart the session when the pop up window ask you to! Just press cancel and continue."]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"O-YxojSStkE8"},"outputs":[],"source":["#@title Environment Setup\n","\n","!sudo apt-get install python3.8 python3.8-distutils python3.8-dev\n","!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1\n","!sudo apt-get install python3-pip\n","!pip install numpy Cython\n","!sudo apt-get install libsndfile-dev fluidsynth ffmpeg\n","\n","!pip install git+https://github.com/Music-and-Culture-Technology-Lab/omnizart.git\n","!omnizart download-checkpoints\n","!pip install yt-dlp\n","\n","import sys\n","sys.path.insert(0, '/usr/local/lib/python3.8/dist-packages')"]},{"cell_type":"markdown","metadata":{"id":"bacqEXNO-gYd"},"source":["# Choose an Audio\n","\n","Either upload your own MP3 file, or choose from YouTube.\n"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"fFxihIG2ZpCM"},"outputs":[],"source":["#@title Upload MP3 File\n","import os\n","from google.colab import files\n","from IPython import display as dsp\n","\n","ff = files.upload()\n","uploaded_audio = list(ff.keys())[0].replace(\".mp3\", \"\")\n","\n","!test -f \"$uploaded_audio\".wav && rm \"$uploaded_audio\".wav\n","!ffmpeg -i \"$uploaded_audio\".mp3 \"$uploaded_audio\".wav &>/dev/null\n","\n","dsp.Audio(uploaded_audio + \".mp3\") if os.path.exists(uploaded_audio + \".mp3\") else None"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"O2EulE2RojKQ"},"outputs":[],"source":["#@title Choose from YouTube\n","import os\n","from google.colab import files\n","from IPython import display as dsp\n","\n","url = input(\"Enter your YouTube link: \")\n","\n","try:\n"," id = url.split(\"watch?v=\")[1].split(\"&\")[0]\n"," vid = dsp.YouTubeVideo(id)\n"," dsp.display(vid)\n","except Exception:\n"," pass\n","\n","print(\"Downloading...\")\n","\n","!yt-dlp -x --audio-format mp3 --no-playlist \"$url\"\n","!yt-dlp --get-filename --no-playlist \"$url\" > tmp\n","\n","uploaded_audio = os.path.splitext(open(\"tmp\").readline().strip())[0]\n","!ffmpeg -i \"$uploaded_audio\".mp3 \"$uploaded_audio\".wav &> /dev/null\n","\n","print(f\"Finished: {uploaded_audio}\")"]},{"cell_type":"markdown","metadata":{"id":"cY6bEuFm_fn9"},"source":["# Transcribe the Audio\n","\n","There are several modes you can choose.\n","* `music-piano`: transcribe piano solo clips.\n","* `music-assemble`: transcribe classical assemble pieces.\n","* `chord`: transcribe chord progressions.\n","* `drum`: transcribe drum percussion in the audio.\n","* `vocal`: transcribe note-level vocal notes.\n","* `vocal-contour`: transcribe frame-level vocal pitch contour.\n","* `beat`: transcribe beat and down beat positions on symbolic domain *(see note 1)*.\n","\n","## Notes\n","1. The beat module only supports MIDI inputs, and thus you have to upload the MIDI file through the **Upload MP3 File** block. "]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"kq5amjSovASe"},"outputs":[],"source":["#@title Transcribe\n","\n","mode = \"music-piano-v2\" #@param [\"music-piano\", \"music-piano-v2\", \"music-assemble\", \"chord\", \"drum\", \"vocal\", \"vocal-contour\", \"beat\"]\n","\n","model = \"\"\n","if mode.startswith(\"music\"):\n"," mode_list = mode.split(\"-\")\n"," mode = mode_list[0]\n"," model = \"-\".join(mode_list[1:])\n","\n","model_path = {\n"," \"piano\": \"Piano\",\n"," \"piano-v2\": \"PianoV2\",\n"," \"assemble\": \"Stream\",\n"," \"pop-song\": \"Pop\",\n"," \"\": None\n","}[model]\n","\n","!omnizart \"$mode\" transcribe \"$uploaded_audio\".wav --model-path \"$model_path\"\n","\n","# Synthesize MIDI and play\n","import wave\n","import numpy as np\n","from pretty_midi import PrettyMIDI\n","from omnizart.remote import download_large_file_from_google_drive\n","\n","\n","def write_audio_file(filename, sample_rate, data):\n"," with wave.open(filename, 'w') as wf:\n"," # Set the parameters for the wave file\n"," wf.setnchannels(1) # Mono\n"," wf.setsampwidth(2) # 2 bytes per sample (16-bit)\n"," wf.setframerate(sample_rate)\n","\n"," # Convert the data type if necessary\n"," if data.dtype != np.int16:\n"," data = np.int16(data * 32767) # Scale to 16-bit integer range\n","\n"," # Write the audio frames to the file\n"," wf.writeframes(data.tobytes())\n","\n","\n","SF2_FILE = \"general_soundfont.sf2\"\n","if not os.path.exists(SF2_FILE):\n"," print(\"Downloading soundfont...\")\n"," !curl \"https://ftp.osuosl.org/pub/musescore/soundfont/MuseScore_General/MuseScore_General.sf2\" -o $SF2_FILE\n","\n","synth_name = f\"{uploaded_audio}_synth.wav\"\n","if mode == \"vocal-contour\":\n"," os.rename(f\"{uploaded_audio}_trans.wav\", f\"{uploaded_audio}_synth.wav\")\n","else:\n"," print(\"Synthesizing MIDI...\")\n"," midi = PrettyMIDI(f\"{uploaded_audio}.mid\")\n"," raw_wav = midi.fluidsynth(fs=44100, sf2_path=SF2_FILE)\n"," write_audio_file(synth_name, 44100, raw_wav)\n","\n","!ffmpeg -i \"$synth_name\" \"tmp_synth.mp3\" &> /dev/null\n","!mv tmp_synth.mp3 \"$uploaded_audio\"_synth.mp3\n","\n","out_name = synth_name.replace(\".wav\", \".mp3\")\n","print(f\"Finished: {out_name}\")\n","dsp.Audio(out_name)"]},{"cell_type":"markdown","metadata":{"id":"0s43jRukAhUz"},"source":["# Download the Transribed MIDI/MP3"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"spemscTwpzG-"},"outputs":[],"source":["#@title Download MIDI\n","\n","files.download(f\"{uploaded_audio}.mid\")"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"buVg5sYZqLoG"},"outputs":[],"source":["#@title Download MP3\n","\n","files.download(out_name)"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":[],"name":"Omnizart Colab","private_outputs":true,"provenance":[{"file_id":"1welXF4OEJwr_2Ju8JuFAZr-oVsl0aLew","timestamp":1607912446590}]},"kernelspec":{"display_name":"Python 3.9.6 64-bit","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.9.6 (default, Oct 18 2022, 12:41:40) \n[Clang 14.0.0 (clang-1400.0.29.202)]"},"vscode":{"interpreter":{"hash":"31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"}}},"nbformat":4,"nbformat_minor":0} +{"cells":[{"cell_type":"markdown","metadata":{"id":"__mzyUl4-S-B"},"source":["# Omniscient Mozart\n","\n","This is a colab for demonstrating the python package `omnizart` developed by [MCTLab](https://sites.google.com/view/mctl/home).\n","\n","Github repository can be found in [Music-and-Culture-Technology-Lab/omnizart](https://github.com/Music-and-Culture-Technology-Lab/omnizart).\n","\n","Official documentation page can be found in [omnizart-doc](https://music-and-culture-technology-lab.github.io/omnizart-doc/)"]},{"cell_type":"markdown","metadata":{},"source":["## Environment Setup\n","\n","**IMPORTANT!** Don't restart the session when the pop up window ask you to! Just press cancel and continue."]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"O-YxojSStkE8"},"outputs":[],"source":["#@title Environment Setup\n","\n","!sudo apt-get install python3.8 python3.8-distutils python3.8-dev\n","!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1\n","!sudo apt-get install python3-pip\n","!pip install numpy Cython\n","!sudo apt-get install libsndfile-dev fluidsynth ffmpeg\n","\n","!pip install git+https://github.com/Music-and-Culture-Technology-Lab/omnizart.git\n","!omnizart download-checkpoints\n","!pip install yt-dlp numpy==1.23.5\n","\n","import sys\n","sys.path.insert(0, '/usr/local/lib/python3.8/dist-packages')"]},{"cell_type":"markdown","metadata":{"id":"bacqEXNO-gYd"},"source":["# Choose an Audio\n","\n","Either upload your own MP3 file, or choose from YouTube.\n"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"fFxihIG2ZpCM"},"outputs":[],"source":["#@title Upload MP3 File\n","import os\n","from google.colab import files\n","from IPython import display as dsp\n","\n","ff = files.upload()\n","uploaded_audio = list(ff.keys())[0].replace(\".mp3\", \"\")\n","\n","!test -f \"$uploaded_audio\".wav && rm \"$uploaded_audio\".wav\n","!ffmpeg -i \"$uploaded_audio\".mp3 \"$uploaded_audio\".wav &>/dev/null\n","\n","dsp.Audio(uploaded_audio + \".mp3\") if os.path.exists(uploaded_audio + \".mp3\") else None"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"O2EulE2RojKQ"},"outputs":[],"source":["#@title Choose from YouTube\n","import os\n","from google.colab import files\n","from IPython import display as dsp\n","\n","url = input(\"Enter your YouTube link: \")\n","\n","try:\n"," id = url.split(\"watch?v=\")[1].split(\"&\")[0]\n"," vid = dsp.YouTubeVideo(id)\n"," dsp.display(vid)\n","except Exception:\n"," pass\n","\n","print(\"Downloading...\")\n","\n","!yt-dlp -x --audio-format mp3 --no-playlist \"$url\"\n","!yt-dlp --get-filename --no-playlist \"$url\" > tmp\n","\n","uploaded_audio = os.path.splitext(open(\"tmp\").readline().strip())[0]\n","!ffmpeg -i \"$uploaded_audio\".mp3 \"$uploaded_audio\".wav &> /dev/null\n","\n","print(f\"Finished: {uploaded_audio}\")"]},{"cell_type":"markdown","metadata":{"id":"cY6bEuFm_fn9"},"source":["# Transcribe the Audio\n","\n","There are several modes you can choose.\n","* `music-piano`: transcribe piano solo clips.\n","* `music-assemble`: transcribe classical assemble pieces.\n","* `chord`: transcribe chord progressions.\n","* `drum`: transcribe drum percussion in the audio.\n","* `vocal`: transcribe note-level vocal notes.\n","* `vocal-contour`: transcribe frame-level vocal pitch contour.\n","* `beat`: transcribe beat and down beat positions on symbolic domain *(see note 1)*.\n","\n","## Notes\n","1. The beat module only supports MIDI inputs, and thus you have to upload the MIDI file through the **Upload MP3 File** block. "]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"kq5amjSovASe"},"outputs":[],"source":["#@title Transcribe\n","\n","mode = \"vocal\" #@param [\"music-piano\", \"music-piano-v2\", \"music-assemble\", \"chord\", \"drum\", \"vocal\", \"vocal-contour\", \"beat\"]\n","\n","model = \"\"\n","if mode.startswith(\"music\"):\n"," mode_list = mode.split(\"-\")\n"," mode = mode_list[0]\n"," model = \"-\".join(mode_list[1:])\n","\n","model_path = {\n"," \"piano\": \"Piano\",\n"," \"piano-v2\": \"PianoV2\",\n"," \"assemble\": \"Stream\",\n"," \"pop-song\": \"Pop\",\n"," \"\": None\n","}[model]\n","\n","if model_path:\n"," !omnizart \"$mode\" transcribe \"$uploaded_audio\".wav --model-path \"$model_path\"\n","else:\n"," !omnizart \"$mode\" transcribe \"$uploaded_audio\".wav\n","\n","# Synthesize MIDI and play\n","import wave\n","import numpy as np\n","from pretty_midi import PrettyMIDI\n","from omnizart.remote import download_large_file_from_google_drive\n","\n","\n","def write_audio_file(filename, sample_rate, data):\n"," with wave.open(filename, 'w') as wf:\n"," # Set the parameters for the wave file\n"," wf.setnchannels(1) # Mono\n"," wf.setsampwidth(2) # 2 bytes per sample (16-bit)\n"," wf.setframerate(sample_rate)\n","\n"," # Convert the data type if necessary\n"," if data.dtype != np.int16:\n"," data = np.int16(data * 32767) # Scale to 16-bit integer range\n","\n"," # Write the audio frames to the file\n"," wf.writeframes(data.tobytes())\n","\n","\n","SF2_FILE = \"general_soundfont.sf2\"\n","if not os.path.exists(SF2_FILE):\n"," print(\"Downloading soundfont...\")\n"," !curl \"https://ftp.osuosl.org/pub/musescore/soundfont/MuseScore_General/MuseScore_General.sf2\" -o $SF2_FILE\n","\n","synth_name = f\"{uploaded_audio}_synth.wav\"\n","if mode == \"vocal-contour\":\n"," os.rename(f\"{uploaded_audio}_trans.wav\", f\"{uploaded_audio}_synth.wav\")\n","else:\n"," print(\"Synthesizing MIDI...\")\n"," midi = PrettyMIDI(f\"{uploaded_audio}.mid\")\n"," raw_wav = midi.fluidsynth(fs=44100, sf2_path=SF2_FILE)\n"," write_audio_file(synth_name, 44100, raw_wav)\n","\n","!ffmpeg -i \"$synth_name\" \"tmp_synth.mp3\" &> /dev/null\n","!mv tmp_synth.mp3 \"$uploaded_audio\"_synth.mp3\n","\n","out_name = synth_name.replace(\".wav\", \".mp3\")\n","print(f\"Finished: {out_name}\")\n","dsp.Audio(out_name)"]},{"cell_type":"markdown","metadata":{"id":"0s43jRukAhUz"},"source":["# Download the Transribed MIDI/MP3"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"spemscTwpzG-"},"outputs":[],"source":["#@title Download MIDI\n","\n","files.download(f\"{uploaded_audio}.mid\")"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"buVg5sYZqLoG"},"outputs":[],"source":["#@title Download MP3\n","\n","files.download(out_name)"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":[],"name":"Omnizart Colab","private_outputs":true,"provenance":[{"file_id":"1welXF4OEJwr_2Ju8JuFAZr-oVsl0aLew","timestamp":1607912446590}]},"kernelspec":{"display_name":"Python 3.9.6 64-bit","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.9.6 (default, Oct 18 2022, 12:41:40) \n[Clang 14.0.0 (clang-1400.0.29.202)]"},"vscode":{"interpreter":{"hash":"31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"}}},"nbformat":4,"nbformat_minor":0}