feat(podcast_generator): add new podcast generation tools

- Introduced podcast generator with text-to-speech functionality using OpenAI's API. - Implemented credential validation for TTS services and API keys. - Added support for generating podcast audio with alternating host voices. - Included user-friendly setup with internationalized YAML configuration. - Added SVG icon to enhance visual identification.
langgenius · Oct 12, 2024 · 2459188 · 2459188
1 parent 2846277
commit 2459188
Show file tree

Hide file tree

Showing 5 changed files with 263 additions and 0 deletions.
diff --git a/api/core/tools/provider/builtin/podcast_generator/_assets/icon.svg b/api/core/tools/provider/builtin/podcast_generator/_assets/icon.svg
diff --git a/api/core/tools/provider/builtin/podcast_generator/podcast_generator.py b/api/core/tools/provider/builtin/podcast_generator/podcast_generator.py
@@ -0,0 +1,33 @@
+from typing import Any
+
+import openai
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class PodcastGeneratorProvider(BuiltinToolProviderController):
+ def _validate_credentials(self, credentials: dict[str, Any]) -> None:
+ tts_service = credentials.get("tts_service")
+ api_key = credentials.get("api_key")
+
+ if not tts_service:
+ raise ToolProviderCredentialValidationError("TTS service is not specified")
+
+ if not api_key:
+ raise ToolProviderCredentialValidationError("API key is missing")
+
+ if tts_service == "openai":
+ self._validate_openai_credentials(api_key)
+ else:
+ raise ToolProviderCredentialValidationError(f"Unsupported TTS service: {tts_service}")
+
+ def _validate_openai_credentials(self, api_key: str) -> None:
+ client = openai.OpenAI(api_key=api_key)
+ try:
+ # We're using a simple API call to validate the credentials
+ client.models.list()
+ except openai.AuthenticationError:
+ raise ToolProviderCredentialValidationError("Invalid OpenAI API key")
+ except Exception as e:
+ raise ToolProviderCredentialValidationError(f"Error validating OpenAI API key: {str(e)}")
diff --git a/api/core/tools/provider/builtin/podcast_generator/podcast_generator.yaml b/api/core/tools/provider/builtin/podcast_generator/podcast_generator.yaml
@@ -0,0 +1,34 @@
+identity:
+ author: Dify
+ name: podcast_generator
+ label:
+ en_US: Podcast Generator
+ zh_Hans: 播客生成器
+ description:
+ en_US: Generate podcast audio using Text-to-Speech services
+ zh_Hans: 使用文字转语音服务生成播客音频
+ icon: icon.svg
+credentials_for_provider:
+ tts_service:
+ type: select
+ required: true
+ label:
+ en_US: TTS Service
+ zh_Hans: TTS 服务
+ placeholder:
+ en_US: Select a TTS service
+ zh_Hans: 选择一个 TTS 服务
+ options:
+ - label:
+ en_US: OpenAI TTS
+ zh_Hans: OpenAI TTS
+ value: openai
+ api_key:
+ type: secret-input
+ required: true
+ label:
+ en_US: API Key
+ zh_Hans: API 密钥
+ placeholder:
+ en_US: Enter your TTS service API key
+ zh_Hans: 输入您的 TTS 服务 API 密钥
diff --git a/api/core/tools/provider/builtin/podcast_generator/tools/podcast_audio_generator.py b/api/core/tools/provider/builtin/podcast_generator/tools/podcast_audio_generator.py
@@ -0,0 +1,77 @@
+import random
+import struct
+from typing import Any, Union
+
+import openai
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.errors import ToolParameterValidationError, ToolProviderCredentialValidationError
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class PodcastAudioGeneratorTool(BuiltinTool):
+ def generate_silence(self, duration):
+ # Generate silent MP3 data
+ # This is a simplified version and may not work perfectly with all MP3 players
+ # For production use, consider using a proper audio library or pre-generated silence MP3
+ sample_rate = 44100
+ num_samples = int(duration * sample_rate)
+ silence_data = struct.pack("<" + "h" * num_samples, *([0] * num_samples))
+
+ # Add a simple MP3 header (this is not a complete MP3 file, but might work for basic needs)
+ mp3_header = b"\xff\xfb\x90\x04" # A very basic MP3 header
+ return mp3_header + silence_data
+
+ def _invoke(
+ self, user_id: str, tool_parameters: dict[str, Any]
+ ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+ # Extract parameters
+ script = tool_parameters.get("script", "")
+ host1_voice = tool_parameters.get("host1_voice")
+ host2_voice = tool_parameters.get("host2_voice")
+
+ # Split the script into lines
+ script_lines = script.split("\n")
+
+ # Ensure voices are provided
+ if not host1_voice or not host2_voice:
+ raise ToolParameterValidationError("Host voices are required")
+
+ # Get OpenAI API key from credentials
+ if not self.runtime or not self.runtime.credentials:
+ raise ToolProviderCredentialValidationError("Tool runtime or credentials are missing")
+ api_key = self.runtime.credentials.get("api_key")
+ if not api_key:
+ raise ToolProviderCredentialValidationError("OpenAI API key is missing")
+
+ # Initialize OpenAI client
+ client = openai.OpenAI(api_key=api_key)
+
+ audio_segments = []
+ for i, line in enumerate(script_lines):
+ if line.strip(): # Skip empty lines
+ voice = host1_voice if i % 2 == 0 else host2_voice
+ try:
+ response = client.audio.speech.create(model="tts-1", voice=voice, input=line.strip())
+ audio_segments.append(response.content)
+
+ # Add silence between lines (except for the last line)
+ if i < len(script_lines) - 1:
+ silence_duration = random.uniform(2, 5) # Random duration between 1 and 3 seconds
+ silence = self.generate_silence(silence_duration)
+ audio_segments.append(silence)
+ except Exception as e:
+ return self.create_text_message(f"Error generating audio: {str(e)}")
+
+ # Combine audio segments
+ combined_audio = b"".join(audio_segments)
+
+ # Create a blob message with the combined audio
+ return [
+ self.create_text_message("Audio generated successfully"),
+ self.create_blob_message(
+ blob=combined_audio,
+ meta={"mime_type": "audio/mpeg"},
+ save_as=self.VariableKey.AUDIO,
+ ),
+ ]
diff --git a/api/core/tools/provider/builtin/podcast_generator/tools/podcast_audio_generator.yaml b/api/core/tools/provider/builtin/podcast_generator/tools/podcast_audio_generator.yaml
@@ -0,0 +1,95 @@
+identity:
+ name: podcast_audio_generator
+ author: Dify
+ label:
+ en_US: Podcast Audio Generator
+ zh_Hans: 播客音频生成器
+description:
+ human:
+ en_US: Generate a podcast audio file from a script with two alternating voices using OpenAI's TTS service.
+ zh_Hans: 使用 OpenAI 的 TTS 服务，从包含两个交替声音的脚本生成播客音频文件。
+ llm: This tool converts a prepared podcast script into an audio file using OpenAI's Text-to-Speech service, with two specified voices for alternating hosts.
+parameters:
+ - name: script
+ type: string
+ required: true
+ label:
+ en_US: Podcast Script
+ zh_Hans: 播客脚本
+ human_description:
+ en_US: A string containing alternating lines for two hosts, separated by newline characters.
+ zh_Hans: 包含两位主持人交替台词的字符串，每行用换行符分隔。
+ llm_description: A string representing the script, with alternating lines for two hosts separated by newline characters.
+ form: llm
+ - name: host1_voice
+ type: select
+ required: true
+ label:
+ en_US: Host 1 Voice
+ zh_Hans: 主持人1 音色
+ human_description:
+ en_US: The voice for the first host.
+ zh_Hans: 第一位主持人的音色。
+ llm_description: The voice identifier for the first host's voice.
+ options:
+ - label:
+ en_US: Alloy
+ zh_Hans: Alloy
+ value: alloy
+ - label:
+ en_US: Echo
+ zh_Hans: Echo
+ value: echo
+ - label:
+ en_US: Fable
+ zh_Hans: Fable
+ value: fable
+ - label:
+ en_US: Onyx
+ zh_Hans: Onyx
+ value: onyx
+ - label:
+ en_US: Nova
+ zh_Hans: Nova
+ value: nova
+ - label:
+ en_US: Shimmer
+ zh_Hans: Shimmer
+ value: shimmer
+ form: form
+ - name: host2_voice
+ type: select
+ required: true
+ label:
+ en_US: Host 2 Voice
+ zh_Hans: 主持人2 音色
+ human_description:
+ en_US: The voice for the second host.
+ zh_Hans: 第二位主持人的音色。
+ llm_description: The voice identifier for the second host's voice.
+ options:
+ - label:
+ en_US: Alloy
+ zh_Hans: Alloy
+ value: alloy
+ - label:
+ en_US: Echo
+ zh_Hans: Echo
+ value: echo
+ - label:
+ en_US: Fable
+ zh_Hans: Fable
+ value: fable
+ - label:
+ en_US: Onyx
+ zh_Hans: Onyx
+ value: onyx
+ - label:
+ en_US: Nova
+ zh_Hans: Nova
+ value: nova
+ - label:
+ en_US: Shimmer
+ zh_Hans: Shimmer
+ value: shimmer
+ form: form