added support for quality set to hd by boolean value in the VoiceProc…

…essor.py.
kristofferv98 · Jun 1, 2024 · 64db00f · 64db00f
1 parent 8919319
commit 64db00f
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ from openai_voicestream import VoiceProcessor
 api_key = os.getenv("OPENAI_API_KEY")
 
 # Initialize the VoiceProcessor with the API key and desired voice
-processor = VoiceProcessor(api_key, voice="nova")  # Using the "nova" voice
+processor = VoiceProcessor(api_key, voice="nova", full_quality=True)  # Using the "nova" voice
 
 # Example text with paragraphs to be processed
 text = """This is an example using the nova voice.
@@ -152,7 +152,7 @@ Yes, you can use OpenAI VoiceStream for commercial purposes, subject to the term
 
 ### How can I customize the voice output?
 
-OpenAI VoiceStream provides multiple voice options that you can choose from. You can specify the desired voice by passing the voice name or index to the `VoiceProcessor` constructor. Available voices include: alloy, echo, fable, onyx, nova, and shimmer.
+OpenAI VoiceStream provides multiple voice options that you can choose from. You can specify the desired voice by passing the voice name or index to the `VoiceProcessor` constructor. Available voices include: alloy, echo, fable, onyx, nova, and shimmer. You can set the quality to full_quality=True in the VoiceProcessor for hd voice generation (this might cause a small delay but should be unnoticeable)
 
 ### Can I control the speed or pitch of the generated audio?
 

diff --git a/Voice_generator/VoiceProcessor.py b/Voice_generator/VoiceProcessor.py
@@ -39,7 +39,7 @@ class VoiceProcessor:
         6: "shimmer"
     }
 
-    def __init__(self, api_key, voice: Union[str, int] = "alloy"):
+    def __init__(self, api_key, voice: Union[str, int] = "alloy", full_quality=False):
         """
         Initialize the VoiceProcessor with the provided API key and voice.
 
@@ -49,6 +49,7 @@ def __init__(self, api_key, voice: Union[str, int] = "alloy"):
         """
         self.api_key = api_key
         self.voice = self.validate_voice(voice)
+        self.full_quality = full_quality
         self.format = pyaudio.paInt16
         self.channels = 1
         self.rate = 24000
@@ -100,9 +101,12 @@ def process_audio(self):
 
                 if sentence is None:
                     break
-
+                if self.full_quality:
+                    quality = ""
+                else:
+                    quality = "-hd"
                 data = {
-                    "model": "tts-1",
+                    "model": f"tts-1{quality}",
                     "voice": self.voice,
                     "input": str(sentence),
                     "response_format": "pcm"
@@ -254,7 +258,7 @@ def wait_for_completion(self):
         raise ValueError("Please set the OPENAI_API_KEY environment variable.")
 
     # Initialize the VoiceProcessor with the API key and desired voice
-    processor = VoiceProcessor(api_key, voice=3)  # You can change the voice here
+    processor = VoiceProcessor(api_key, voice=3, full_quality=True)  # You can change the voice here
 
     # Example text with paragraphs to be processed
     text = """This is the first paragraph.