From 2a6a4489c6a121d9e2926f058a6a26f6dba9f8ef Mon Sep 17 00:00:00 2001 From: Swapnil Singh Date: Thu, 24 Oct 2024 22:12:15 +0530 Subject: [PATCH 1/3] Adds script to generate audio --- generate_audio.py | 0 scripts/law_of_karma.txt | 15 +++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 generate_audio.py create mode 100644 scripts/law_of_karma.txt diff --git a/generate_audio.py b/generate_audio.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/law_of_karma.txt b/scripts/law_of_karma.txt new file mode 100644 index 0000000..6b9a8f0 --- /dev/null +++ b/scripts/law_of_karma.txt @@ -0,0 +1,15 @@ +Once upon a time, in a peaceful village nestled between the mountains, there lived a wise old teacher. She often spoke about a powerful force that guided everything in life—this force was called ‘Karma.’ + +Karma, she explained, is like a mirror of our actions. Everything we do—whether it’s a kind gesture or an unkind one—comes back to us, like ripples in a pond. If we plant good seeds, we grow beautiful flowers. But if we plant bad seeds, we get weeds. + +Imagine you help someone by giving them food when they’re hungry. That kindness is like planting a good seed, and soon, good things start happening to you. Maybe someone helps you when you need it most, or you just feel happier inside. + +On the other hand, if you’re mean to someone or hurt them, that’s like planting a bad seed. Over time, those bad actions can grow into problems that come back into your life, causing stress or unhappiness. + +The law of Karma teaches us that we are responsible for what we put into the world. Every action has a reaction—like a boomerang, it comes back to us. + +But here’s the good news! Karma also gives us a chance to learn and grow. If we’ve made mistakes, we can choose to act with kindness, compassion, and love from today. By planting more good seeds, we can create a life full of joy and peace. + +So remember, every small action matters. When we live with good intentions and help others, we make the world—and our own lives—a little brighter. + +That is the simple beauty of the Law of Karma. \ No newline at end of file From 641d5b84b466fe4381c4cd8f9be345beed2a73f2 Mon Sep 17 00:00:00 2001 From: Swapnil Singh Date: Thu, 24 Oct 2024 22:43:54 +0530 Subject: [PATCH 2/3] Adds script to generate audio --- .gitignore | 1 + generate_audio.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/.gitignore b/.gitignore index cb281b3..934dbc5 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,4 @@ tags .ruff_cache wandb +myenv \ No newline at end of file diff --git a/generate_audio.py b/generate_audio.py index e69de29..29a9ddb 100644 --- a/generate_audio.py +++ b/generate_audio.py @@ -0,0 +1,53 @@ +import torch +from parler_tts import ParlerTTSForConditionalGeneration +from transformers import AutoTokenizer +import soundfile as sf +import os +import sys + +device = "mps" if torch.backends.mps.is_available() else torch.device("cpu") + +# Load the model and tokenizer +model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(device) +tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1") + +# Function to load a script from the 'scripts' folder +def load_script(file_name): + script_path = os.path.join("scripts", file_name) + with open(script_path, "r") as file: + script = file.read().strip() + return script + +# Get script name from command line argument +if len(sys.argv) < 2: + print("Please provide a script file name.") + sys.exit(1) + +script_file = sys.argv[1] # First argument from the command line, e.g., 'your_script.txt' +script_name = os.path.splitext(script_file)[0] # Remove .txt extension + +# Create a folder with the script name (if it doesn't exist) +output_folder = os.path.join("output", script_name) +os.makedirs(output_folder, exist_ok=True) + +# Load the script +prompt = load_script(script_file) + +# Define the description for the desired voice +description = ("A soothing, mesmerizing, and mystical Indian female voice with soft tones, " + "gentle delivery, and a captivating presence. The voice should evoke calmness " + "and wonder, perfect for spiritual or ethereal stories. High quality and clear.") + +# Convert the description and prompt to input IDs +input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device) +prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) + +# Generate the speech +generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids) +audio_arr = generation.cpu().numpy().squeeze() + +# Save the output as a .wav file in the corresponding folder +output_file = os.path.join(output_folder, f"{script_name}.wav") +sf.write(output_file, audio_arr, model.config.sampling_rate) + +print(f"Audio generated and saved as {output_file}") \ No newline at end of file From 6c8679f396b91c20ee5cefb4a6308061346e87af Mon Sep 17 00:00:00 2001 From: Swapnil Singh Date: Thu, 24 Oct 2024 23:05:36 +0530 Subject: [PATCH 3/3] uses cuda --- .gitignore | 4 +++- generate_audio.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 934dbc5..32d2cbd 100644 --- a/.gitignore +++ b/.gitignore @@ -171,4 +171,6 @@ tags .ruff_cache wandb -myenv \ No newline at end of file +myenv +scripts/* +output/* \ No newline at end of file diff --git a/generate_audio.py b/generate_audio.py index 29a9ddb..d25b345 100644 --- a/generate_audio.py +++ b/generate_audio.py @@ -5,7 +5,8 @@ import os import sys -device = "mps" if torch.backends.mps.is_available() else torch.device("cpu") +# device = "mps" if torch.backends.mps.is_available() else torch.device("cpu") +device = "cuda" if torch.cuda.is_available() else "cpu" # Load the model and tokenizer model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(device) @@ -42,8 +43,11 @@ def load_script(file_name): input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device) prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) +# Create an attention mask +attention_mask = torch.ones(input_ids.shape, device=device) + # Generate the speech -generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids) +generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids, attention_mask=attention_mask) audio_arr = generation.cpu().numpy().squeeze() # Save the output as a .wav file in the corresponding folder