diff --git a/locales/en/translation.json b/locales/en/translation.json index cc10132..a237d00 100644 --- a/locales/en/translation.json +++ b/locales/en/translation.json @@ -59,15 +59,18 @@ "VoicevoxIntonation": "Intonation", "UsingGoogleTTS": "Google TTS", "UsingStyleBertVITS2": "Style-Bert-VITS2", - "StyleBertVITS2Info": "Using Style-Bert-VITS2. It supports only Japanese, English, and Chinese. It uses a local API, you need to download and launch the app that suits your environment from the site below.", + "StyleBertVITS2Info": "Using Style-Bert-VITS2. It supports only Japanese, English, and Chinese. If using a local API, you need to download and launch the app that suits your environment from the site below. Please also set up an API key if necessary.", "SpeakerSelection": "Speaker Selection", "GoogleTTSInfo": "Using Google Cloud Text-to-Speech. It supports multiple languages.", "AuthFileInstruction": "Obtain the authentication JSON file below and place it in the root folder of the repository as 'credentials.json'.", "LanguageModelURL": "Select the language model from the URL below.", "LanguageChoice": "Language Choice", - "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 Local Server URL", - "StyleBeatVITS2ModelID": "Style-Bert-VITS2 Model ID", - "StyleBeatVITS2Style": "Style-Bert-VITS2 Style", + "StyleBeatVITS2ServerURL": "Server URL", + "StyleBeatVITS2ApiKey": "API Key", + "StyleBeatVITS2ModelID": "Model ID", + "StyleBeatVITS2Style": "Style", + "StyleBeatVITS2SdpRatio": "SDP/DP Mixing Ratio", + "StyleBeatVITS2Length": "Speech Rate", "ConversationHistory": "Conversation History", "ConversationHistoryInfo": "The latest 10 conversation texts are stored as memories.", "ConversationHistoryReset": "Reset Conversation History", diff --git a/locales/ja/translation.json b/locales/ja/translation.json index d3cc726..e332a9d 100644 --- a/locales/ja/translation.json +++ b/locales/ja/translation.json @@ -59,16 +59,19 @@ "VoicevoxIntonation": "抑揚", "UsingGoogleTTS": "Google TTSを使用する", "UsingStyleBertVITS2": "Style-Bert-VITS2を使用する", - "StyleBertVITS2Info": "Style-Bert-VITS2を使用しています。日・英・中のみに対応しています。ローカルAPIを使用するので下記のサイトから環境にあったアプリをダウンロードし、起動しておく必要があります。", + "StyleBertVITS2Info": "Style-Bert-VITS2を使用しています。日・英・中のみに対応しています。ローカルAPIを使用する場合は、下記のサイトから環境にあったアプリをダウンロードし起動しておく必要があります。必要な場合はAPIキーも設定してください。", "SpeakerSelection": "ボイスタイプ選択", "EnglishToJapanese": "英単語を日本語で読み上げる", "GoogleTTSInfo": "Google Cloud Text-to-Speechを使用しています。多言語に対応可能です。", "AuthFileInstruction": "認証用のJSONファイルを下記から取得し、リポジトリのルートフォルダに credentials.json という名称で配置してください。", "LanguageModelURL": "言語モデルは下記のURLから選択してください。", "LanguageChoice": "言語選択", - "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 ローカルサーバーURL", - "StyleBeatVITS2ModelID": "Style-Bert-VITS2 モデルID", - "StyleBeatVITS2Style": "Style-Bert-VITS2 スタイル", + "StyleBeatVITS2ServerURL": "サーバーURL", + "StyleBeatVITS2ApiKey": "API キー", + "StyleBeatVITS2ModelID": "モデルID", + "StyleBeatVITS2Style": "スタイル", + "StyleBeatVITS2SdpRatio": "SDP/DP混合比", + "StyleBeatVITS2Length": "話速", "ConversationHistory": "会話履歴", "ConversationHistoryInfo": "直近の10会話文が記憶として保持されます。", "ConversationHistoryReset": "会話履歴リセット", diff --git a/locales/ko/translation.json b/locales/ko/translation.json index 9a9c26d..c7a835c 100644 --- a/locales/ko/translation.json +++ b/locales/ko/translation.json @@ -59,15 +59,18 @@ "VoicevoxIntonation": "억양", "UsingGoogleTTS": "Google TTS 사용", "UsingStyleBertVITS2": "Style-Bert-VITS2 사용", - "StyleBertVITS2Info": "Style-Bert-VITS2를 사용하고 있습니다. 일본어, 영어, 중국어만 지원됩니다. 로컬 API를 사용하기 때문에 아래 사이트에서 환경에 맞는 앱을 다운로드하고 실행해야 합니다.", + "StyleBertVITS2Info": "Style-Bert-VITS2를 사용하고 있습니다. 일본어, 영어, 중국어만 지원됩니다. 로컬 API를 사용하는 경우 아래 사이트에서 환경에 맞는 앱을 다운로드하고 실행해야 합니다. 필요한 경우 API 키도 설정해 주세요.", "SpeakerSelection": "보이스 타입 선택", "GoogleTTSInfo": "Google Cloud Text-to-Speech를 사용합니다. 다국어 지원이 가능합니다.", "AuthFileInstruction": "인증용 JSON 파일을 아래에서 얻어 리포지토리 루트 폴더에 credentials.json이라는 이름으로 배치하십시오.", "LanguageModelURL": "언어 모델은 아래 URL에서 선택하십시오.", "LanguageChoice": "언어 선택", - "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 로컬 서버 URL", - "StyleBeatVITS2ModelID": "Style-Bert-VITS2 모델 ID", - "StyleBeatVITS2Style": "Style-Bert-VITS2 스타일", + "StyleBeatVITS2ServerURL": "서버 URL", + "StyleBeatVITS2ApiKey": "API 키", + "StyleBeatVITS2ModelID": "모델 ID", + "StyleBeatVITS2Style": "스타일", + "StyleBeatVITS2SdpRatio": "SDP/DP 혼합 비율", + "StyleBeatVITS2Length": "말하는 속도", "ConversationHistory": "대화 기록", "ConversationHistoryInfo": "최근 10개의 대화 문장이 기억으로 유지됩니다.", "ConversationHistoryReset": "대화 기록 재설정", diff --git a/locales/zh/translation.json b/locales/zh/translation.json index f9b6643..d3ef949 100644 --- a/locales/zh/translation.json +++ b/locales/zh/translation.json @@ -59,15 +59,18 @@ "VoicevoxIntonation": "語調", "UsingGoogleTTS": "使用 Google TTS", "UsingStyleBertVITS2": "使用 Style-Bert-VITS2", - "StyleBertVITS2Info": "使用 Style-Bert-VITS2。僅支援日語、英語和中文。它使用本地 API,您需要從以下網站下載並啟動適合您環境的應用程式。", + "StyleBertVITS2Info": "使用 Style-Bert-VITS2。僅支援日語、英語和中文。如果使用本地 API,您需要從以下網站下載並啟動適合您環境的應用程式。如有需要,請也設定 API 金鑰。", "SpeakerSelection": "選擇語音角色", "GoogleTTSInfo": "使用 Google Cloud 文字轉語音。支援多種語言。", "AuthFileInstruction": "在下方獲取認證 JSON 檔案,並將其放置於儲存庫的根目錄下,命名為 'credentials.json'。", "LanguageModelURL": "從下方 URL 選擇語言模型。", "LanguageChoice": "語言選擇", - "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 本地伺服器URL", - "StyleBeatVITS2ModelID": "Style-Bert-VITS2 模型ID", - "StyleBeatVITS2Style": "Style-Bert-VITS2 風格", + "StyleBeatVITS2ServerURL": "伺服器 URL", + "StyleBeatVITS2ApiKey": "API 金鑰", + "StyleBeatVITS2ModelID": "模型 ID", + "StyleBeatVITS2Style": "風格", + "StyleBeatVITS2SdpRatio": "SDP/DP 混合比", + "StyleBeatVITS2Length": "語速", "ConversationHistory": "聊天記錄", "ConversationHistoryInfo": "最新的 10 個對話內容會被儲存為記憶。", "ConversationHistoryReset": "重設聊天記錄", diff --git a/src/components/settings/voice.tsx b/src/components/settings/voice.tsx index 18f33f4..91cc400 100644 --- a/src/components/settings/voice.tsx +++ b/src/components/settings/voice.tsx @@ -27,8 +27,11 @@ const Voice = () => { const stylebertvits2ServerUrl = settingsStore( (s) => s.stylebertvits2ServerUrl ) + const stylebertvits2ApiKey = settingsStore((s) => s.stylebertvits2ApiKey) const stylebertvits2ModelId = settingsStore((s) => s.stylebertvits2ModelId) const stylebertvits2Style = settingsStore((s) => s.stylebertvits2Style) + const stylebertvits2SdpRatio = settingsStore((s) => s.stylebertvits2SdpRatio) + const stylebertvits2Length = settingsStore((s) => s.stylebertvits2Length) const gsviTtsServerUrl = settingsStore((s) => s.gsviTtsServerUrl) const gsviTtsModelId = settingsStore((s) => s.gsviTtsModelId) const gsviTtsBatchSize = settingsStore((s) => s.gsviTtsBatchSize) @@ -309,7 +312,7 @@ const Voice = () => {
- {t('StyleBeatVITS2LocalServerURL')} + {t('StyleBeatVITS2ServerURL')}
{ } />
+
+ {t('StyleBeatVITS2ApiKey')} +
+
+ + settingsStore.setState({ + stylebertvits2ApiKey: e.target.value, + }) + } + /> +
{t('StyleBeatVITS2ModelID')}
@@ -356,6 +375,38 @@ const Voice = () => { } /> +
+ {t('StyleBeatVITS2SdpRatio')}: {stylebertvits2SdpRatio} +
+ { + settingsStore.setState({ + stylebertvits2SdpRatio: Number(e.target.value), + }) + }} + > +
+ {t('StyleBeatVITS2Length')}: {stylebertvits2Length} +
+ { + settingsStore.setState({ + stylebertvits2Length: Number(e.target.value), + }) + }} + > ) } else if (selectVoice === 'gsvitts') { diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts index 331a068..5d55f8e 100644 --- a/src/features/messages/speakCharacter.ts +++ b/src/features/messages/speakCharacter.ts @@ -68,8 +68,11 @@ const createSpeakCharacter = () => { buffer = await fetchAudioStyleBertVITS2( screenplay.talk, ss.stylebertvits2ServerUrl, + ss.stylebertvits2ApiKey, ss.stylebertvits2ModelId, ss.stylebertvits2Style, + ss.stylebertvits2SdpRatio, + ss.stylebertvits2Length, ss.selectLanguage ).catch(() => null) } else if (ss.selectVoice == 'gsvitts') { @@ -224,15 +227,21 @@ export const fetchAudioGoogle = async ( export const fetchAudioStyleBertVITS2 = async ( talk: Talk, stylebertvits2ServerUrl: string, + stylebertvits2ApiKey: string, stylebertvits2ModelId: string, stylebertvits2Style: string, + stylebertvits2SdpRatio: number, + stylebertvits2Length: number, selectLanguage: Language ): Promise => { const ttsVoice = await synthesizeStyleBertVITS2Api( talk.message, stylebertvits2ServerUrl, + stylebertvits2ApiKey, stylebertvits2ModelId, stylebertvits2Style, + stylebertvits2SdpRatio, + stylebertvits2Length, selectLanguage ) return ttsVoice diff --git a/src/features/messages/synthesizeStyleBertVITS2.ts b/src/features/messages/synthesizeStyleBertVITS2.ts index 5679c62..f96d34c 100644 --- a/src/features/messages/synthesizeStyleBertVITS2.ts +++ b/src/features/messages/synthesizeStyleBertVITS2.ts @@ -3,15 +3,21 @@ import { Language } from '@/features/constants/settings' export async function synthesizeStyleBertVITS2Api( message: string, stylebertvits2ServerUrl: string, + stylebertvits2ApiKey: string, stylebertvits2ModelId: string, stylebertvits2Style: string, + stylebertvits2SdpRatio: number, + stylebertvits2Length: number, selectLanguage: Language ) { const body = { message: message, stylebertvits2ServerUrl: stylebertvits2ServerUrl, + stylebertvits2ApiKey: stylebertvits2ApiKey, stylebertvits2ModelId: stylebertvits2ModelId, stylebertvits2Style: stylebertvits2Style, + stylebertvits2SdpRatio: stylebertvits2SdpRatio, + stylebertvits2Length: stylebertvits2Length, selectLanguage: selectLanguage, type: 'stylebertvits2', } diff --git a/src/features/stores/settings.ts b/src/features/stores/settings.ts index 84fd1e7..9eb1282 100644 --- a/src/features/stores/settings.ts +++ b/src/features/stores/settings.ts @@ -45,8 +45,11 @@ interface ModelProvider { voicevoxPitch: number voicevoxIntonation: number stylebertvits2ServerUrl: string + stylebertvits2ApiKey: string stylebertvits2ModelId: string stylebertvits2Style: string + stylebertvits2SdpRatio: number + stylebertvits2Length: number gsviTtsServerUrl: string gsviTtsModelId: string gsviTtsBatchSize: number @@ -120,8 +123,11 @@ const settingsStore = create()( voicevoxPitch: 0.0, voicevoxIntonation: 1.0, stylebertvits2ServerUrl: 'http://127.0.0.1:5000', + stylebertvits2ApiKey: '', stylebertvits2ModelId: '0', stylebertvits2Style: 'Neutral', + stylebertvits2SdpRatio: 0.2, + stylebertvits2Length: 1.0, gsviTtsServerUrl: process.env.NEXT_PUBLIC_LOCAL_TTS_URL || 'http://127.0.0.1:5000/tts', gsviTtsModelId: '', @@ -181,8 +187,11 @@ const settingsStore = create()( voicevoxPitch: state.voicevoxPitch, voicevoxIntonation: state.voicevoxIntonation, stylebertvits2ServerUrl: state.stylebertvits2ServerUrl, + stylebertvits2ApiKey: state.stylebertvits2ApiKey, stylebertvits2ModelId: state.stylebertvits2ModelId, stylebertvits2Style: state.stylebertvits2Style, + stylebertvits2SdpRatio: state.stylebertvits2SdpRatio, + stylebertvits2Length: state.stylebertvits2Length, gsviTtsServerUrl: state.gsviTtsServerUrl, gsviTtsModelId: state.gsviTtsModelId, gsviTtsBatchSize: state.gsviTtsBatchSize, diff --git a/src/pages/api/stylebertvits2.ts b/src/pages/api/stylebertvits2.ts index 398ff56..38fe628 100644 --- a/src/pages/api/stylebertvits2.ts +++ b/src/pages/api/stylebertvits2.ts @@ -30,40 +30,85 @@ export default async function handler( const message = body.message const stylebertvits2ModelId = body.stylebertvits2ModelId const stylebertvits2ServerUrl = body.stylebertvits2ServerUrl + const stylebertvits2ApiKey = body.stylebertvits2ApiKey const stylebertvits2Style = body.stylebertvits2Style + const stylebertvits2SdpRatio = body.stylebertvits2SdpRatio + const stylebertvits2Length = body.stylebertvits2Length const selectLanguage = getLanguageCode(body.selectLanguage) - const queryParams = new URLSearchParams({ - text: message, - model_id: stylebertvits2ModelId, - style: stylebertvits2Style, - language: selectLanguage, - }) - try { - const voice = await fetch( - `${stylebertvits2ServerUrl.replace(/\/$/, '')}/voice?${queryParams}`, - { - method: 'GET', - headers: { - 'Content-Type': 'audio/wav', - }, + if (!stylebertvits2ServerUrl.includes('https://api.runpod.ai')) { + const queryParams = new URLSearchParams({ + text: message, + model_id: stylebertvits2ModelId, + style: stylebertvits2Style, + sdp_ratio: stylebertvits2SdpRatio, + length: stylebertvits2Length, + language: selectLanguage, + }) + + const voice = await fetch( + `${stylebertvits2ServerUrl.replace(/\/$/, '')}/voice?${queryParams}`, + { + method: 'GET', + headers: { + 'Content-Type': 'audio/wav', + }, + } + ) + + if (!voice.ok) { + throw new Error( + `サーバーからの応答が異常です。ステータスコード: ${voice.status}` + ) } - ) - if (!voice.ok) { - throw new Error( - `サーバーからの応答が異常です。ステータスコード: ${voice.status}` + const arrayBuffer = await voice.arrayBuffer() + const buffer = Buffer.from(arrayBuffer) + res.writeHead(200, { + 'Content-Type': 'audio/wav', + 'Content-Length': buffer.length, + }) + res.end(buffer) + } else { + const voice = await fetch( + `${stylebertvits2ServerUrl.replace(/\/$/, '')}`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${stylebertvits2ApiKey}`, + }, + body: JSON.stringify({ + input: { + action: '/voice', + model_id: stylebertvits2ModelId, + text: message, + style: stylebertvits2Style, + sdp_ratio: stylebertvits2SdpRatio, + length: stylebertvits2Length, + language: selectLanguage, + }, + }), + } ) - } - const arrayBuffer = await voice.arrayBuffer() - const buffer = Buffer.from(arrayBuffer) - res.writeHead(200, { - 'Content-Type': 'audio/wav', - 'Content-Length': buffer.length, - }) - res.end(buffer) + if (!voice.ok) { + throw new Error( + `サーバーからの応答が異常です。ステータスコード: ${voice.status}` + ) + } + + const voiceData = await voice.json() + const base64Audio = voiceData.output.voice + const buffer = Buffer.from(base64Audio, 'base64') + + res.writeHead(200, { + 'Content-Type': 'audio/wav', + 'Content-Length': buffer.length, + }) + res.end(buffer) + } } catch (error: any) { res.status(500).json({ error: error.message }) }