diff --git a/locales/en/translation.json b/locales/en/translation.json
index cc10132..a237d00 100644
--- a/locales/en/translation.json
+++ b/locales/en/translation.json
@@ -59,15 +59,18 @@
"VoicevoxIntonation": "Intonation",
"UsingGoogleTTS": "Google TTS",
"UsingStyleBertVITS2": "Style-Bert-VITS2",
- "StyleBertVITS2Info": "Using Style-Bert-VITS2. It supports only Japanese, English, and Chinese. It uses a local API, you need to download and launch the app that suits your environment from the site below.",
+ "StyleBertVITS2Info": "Using Style-Bert-VITS2. It supports only Japanese, English, and Chinese. If using a local API, you need to download and launch the app that suits your environment from the site below. Please also set up an API key if necessary.",
"SpeakerSelection": "Speaker Selection",
"GoogleTTSInfo": "Using Google Cloud Text-to-Speech. It supports multiple languages.",
"AuthFileInstruction": "Obtain the authentication JSON file below and place it in the root folder of the repository as 'credentials.json'.",
"LanguageModelURL": "Select the language model from the URL below.",
"LanguageChoice": "Language Choice",
- "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 Local Server URL",
- "StyleBeatVITS2ModelID": "Style-Bert-VITS2 Model ID",
- "StyleBeatVITS2Style": "Style-Bert-VITS2 Style",
+ "StyleBeatVITS2ServerURL": "Server URL",
+ "StyleBeatVITS2ApiKey": "API Key",
+ "StyleBeatVITS2ModelID": "Model ID",
+ "StyleBeatVITS2Style": "Style",
+ "StyleBeatVITS2SdpRatio": "SDP/DP Mixing Ratio",
+ "StyleBeatVITS2Length": "Speech Rate",
"ConversationHistory": "Conversation History",
"ConversationHistoryInfo": "The latest 10 conversation texts are stored as memories.",
"ConversationHistoryReset": "Reset Conversation History",
diff --git a/locales/ja/translation.json b/locales/ja/translation.json
index d3cc726..e332a9d 100644
--- a/locales/ja/translation.json
+++ b/locales/ja/translation.json
@@ -59,16 +59,19 @@
"VoicevoxIntonation": "抑揚",
"UsingGoogleTTS": "Google TTSを使用する",
"UsingStyleBertVITS2": "Style-Bert-VITS2を使用する",
- "StyleBertVITS2Info": "Style-Bert-VITS2を使用しています。日・英・中のみに対応しています。ローカルAPIを使用するので下記のサイトから環境にあったアプリをダウンロードし、起動しておく必要があります。",
+ "StyleBertVITS2Info": "Style-Bert-VITS2を使用しています。日・英・中のみに対応しています。ローカルAPIを使用する場合は、下記のサイトから環境にあったアプリをダウンロードし起動しておく必要があります。必要な場合はAPIキーも設定してください。",
"SpeakerSelection": "ボイスタイプ選択",
"EnglishToJapanese": "英単語を日本語で読み上げる",
"GoogleTTSInfo": "Google Cloud Text-to-Speechを使用しています。多言語に対応可能です。",
"AuthFileInstruction": "認証用のJSONファイルを下記から取得し、リポジトリのルートフォルダに credentials.json という名称で配置してください。",
"LanguageModelURL": "言語モデルは下記のURLから選択してください。",
"LanguageChoice": "言語選択",
- "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 ローカルサーバーURL",
- "StyleBeatVITS2ModelID": "Style-Bert-VITS2 モデルID",
- "StyleBeatVITS2Style": "Style-Bert-VITS2 スタイル",
+ "StyleBeatVITS2ServerURL": "サーバーURL",
+ "StyleBeatVITS2ApiKey": "API キー",
+ "StyleBeatVITS2ModelID": "モデルID",
+ "StyleBeatVITS2Style": "スタイル",
+ "StyleBeatVITS2SdpRatio": "SDP/DP混合比",
+ "StyleBeatVITS2Length": "話速",
"ConversationHistory": "会話履歴",
"ConversationHistoryInfo": "直近の10会話文が記憶として保持されます。",
"ConversationHistoryReset": "会話履歴リセット",
diff --git a/locales/ko/translation.json b/locales/ko/translation.json
index 9a9c26d..c7a835c 100644
--- a/locales/ko/translation.json
+++ b/locales/ko/translation.json
@@ -59,15 +59,18 @@
"VoicevoxIntonation": "억양",
"UsingGoogleTTS": "Google TTS 사용",
"UsingStyleBertVITS2": "Style-Bert-VITS2 사용",
- "StyleBertVITS2Info": "Style-Bert-VITS2를 사용하고 있습니다. 일본어, 영어, 중국어만 지원됩니다. 로컬 API를 사용하기 때문에 아래 사이트에서 환경에 맞는 앱을 다운로드하고 실행해야 합니다.",
+ "StyleBertVITS2Info": "Style-Bert-VITS2를 사용하고 있습니다. 일본어, 영어, 중국어만 지원됩니다. 로컬 API를 사용하는 경우 아래 사이트에서 환경에 맞는 앱을 다운로드하고 실행해야 합니다. 필요한 경우 API 키도 설정해 주세요.",
"SpeakerSelection": "보이스 타입 선택",
"GoogleTTSInfo": "Google Cloud Text-to-Speech를 사용합니다. 다국어 지원이 가능합니다.",
"AuthFileInstruction": "인증용 JSON 파일을 아래에서 얻어 리포지토리 루트 폴더에 credentials.json이라는 이름으로 배치하십시오.",
"LanguageModelURL": "언어 모델은 아래 URL에서 선택하십시오.",
"LanguageChoice": "언어 선택",
- "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 로컬 서버 URL",
- "StyleBeatVITS2ModelID": "Style-Bert-VITS2 모델 ID",
- "StyleBeatVITS2Style": "Style-Bert-VITS2 스타일",
+ "StyleBeatVITS2ServerURL": "서버 URL",
+ "StyleBeatVITS2ApiKey": "API 키",
+ "StyleBeatVITS2ModelID": "모델 ID",
+ "StyleBeatVITS2Style": "스타일",
+ "StyleBeatVITS2SdpRatio": "SDP/DP 혼합 비율",
+ "StyleBeatVITS2Length": "말하는 속도",
"ConversationHistory": "대화 기록",
"ConversationHistoryInfo": "최근 10개의 대화 문장이 기억으로 유지됩니다.",
"ConversationHistoryReset": "대화 기록 재설정",
diff --git a/locales/zh/translation.json b/locales/zh/translation.json
index f9b6643..d3ef949 100644
--- a/locales/zh/translation.json
+++ b/locales/zh/translation.json
@@ -59,15 +59,18 @@
"VoicevoxIntonation": "語調",
"UsingGoogleTTS": "使用 Google TTS",
"UsingStyleBertVITS2": "使用 Style-Bert-VITS2",
- "StyleBertVITS2Info": "使用 Style-Bert-VITS2。僅支援日語、英語和中文。它使用本地 API,您需要從以下網站下載並啟動適合您環境的應用程式。",
+ "StyleBertVITS2Info": "使用 Style-Bert-VITS2。僅支援日語、英語和中文。如果使用本地 API,您需要從以下網站下載並啟動適合您環境的應用程式。如有需要,請也設定 API 金鑰。",
"SpeakerSelection": "選擇語音角色",
"GoogleTTSInfo": "使用 Google Cloud 文字轉語音。支援多種語言。",
"AuthFileInstruction": "在下方獲取認證 JSON 檔案,並將其放置於儲存庫的根目錄下,命名為 'credentials.json'。",
"LanguageModelURL": "從下方 URL 選擇語言模型。",
"LanguageChoice": "語言選擇",
- "StyleBeatVITS2LocalServerURL": "Style-Bert-VITS2 本地伺服器URL",
- "StyleBeatVITS2ModelID": "Style-Bert-VITS2 模型ID",
- "StyleBeatVITS2Style": "Style-Bert-VITS2 風格",
+ "StyleBeatVITS2ServerURL": "伺服器 URL",
+ "StyleBeatVITS2ApiKey": "API 金鑰",
+ "StyleBeatVITS2ModelID": "模型 ID",
+ "StyleBeatVITS2Style": "風格",
+ "StyleBeatVITS2SdpRatio": "SDP/DP 混合比",
+ "StyleBeatVITS2Length": "語速",
"ConversationHistory": "聊天記錄",
"ConversationHistoryInfo": "最新的 10 個對話內容會被儲存為記憶。",
"ConversationHistoryReset": "重設聊天記錄",
diff --git a/src/components/settings/voice.tsx b/src/components/settings/voice.tsx
index 18f33f4..91cc400 100644
--- a/src/components/settings/voice.tsx
+++ b/src/components/settings/voice.tsx
@@ -27,8 +27,11 @@ const Voice = () => {
const stylebertvits2ServerUrl = settingsStore(
(s) => s.stylebertvits2ServerUrl
)
+ const stylebertvits2ApiKey = settingsStore((s) => s.stylebertvits2ApiKey)
const stylebertvits2ModelId = settingsStore((s) => s.stylebertvits2ModelId)
const stylebertvits2Style = settingsStore((s) => s.stylebertvits2Style)
+ const stylebertvits2SdpRatio = settingsStore((s) => s.stylebertvits2SdpRatio)
+ const stylebertvits2Length = settingsStore((s) => s.stylebertvits2Length)
const gsviTtsServerUrl = settingsStore((s) => s.gsviTtsServerUrl)
const gsviTtsModelId = settingsStore((s) => s.gsviTtsModelId)
const gsviTtsBatchSize = settingsStore((s) => s.gsviTtsBatchSize)
@@ -309,7 +312,7 @@ const Voice = () => {
- {t('StyleBeatVITS2LocalServerURL')}
+ {t('StyleBeatVITS2ServerURL')}
{
}
/>
+
+ {t('StyleBeatVITS2ApiKey')}
+
+
+
+ settingsStore.setState({
+ stylebertvits2ApiKey: e.target.value,
+ })
+ }
+ />
+
{t('StyleBeatVITS2ModelID')}
@@ -356,6 +375,38 @@ const Voice = () => {
}
/>
+
+ {t('StyleBeatVITS2SdpRatio')}: {stylebertvits2SdpRatio}
+
+ {
+ settingsStore.setState({
+ stylebertvits2SdpRatio: Number(e.target.value),
+ })
+ }}
+ >
+
+ {t('StyleBeatVITS2Length')}: {stylebertvits2Length}
+
+ {
+ settingsStore.setState({
+ stylebertvits2Length: Number(e.target.value),
+ })
+ }}
+ >
>
)
} else if (selectVoice === 'gsvitts') {
diff --git a/src/features/messages/speakCharacter.ts b/src/features/messages/speakCharacter.ts
index 331a068..5d55f8e 100644
--- a/src/features/messages/speakCharacter.ts
+++ b/src/features/messages/speakCharacter.ts
@@ -68,8 +68,11 @@ const createSpeakCharacter = () => {
buffer = await fetchAudioStyleBertVITS2(
screenplay.talk,
ss.stylebertvits2ServerUrl,
+ ss.stylebertvits2ApiKey,
ss.stylebertvits2ModelId,
ss.stylebertvits2Style,
+ ss.stylebertvits2SdpRatio,
+ ss.stylebertvits2Length,
ss.selectLanguage
).catch(() => null)
} else if (ss.selectVoice == 'gsvitts') {
@@ -224,15 +227,21 @@ export const fetchAudioGoogle = async (
export const fetchAudioStyleBertVITS2 = async (
talk: Talk,
stylebertvits2ServerUrl: string,
+ stylebertvits2ApiKey: string,
stylebertvits2ModelId: string,
stylebertvits2Style: string,
+ stylebertvits2SdpRatio: number,
+ stylebertvits2Length: number,
selectLanguage: Language
): Promise => {
const ttsVoice = await synthesizeStyleBertVITS2Api(
talk.message,
stylebertvits2ServerUrl,
+ stylebertvits2ApiKey,
stylebertvits2ModelId,
stylebertvits2Style,
+ stylebertvits2SdpRatio,
+ stylebertvits2Length,
selectLanguage
)
return ttsVoice
diff --git a/src/features/messages/synthesizeStyleBertVITS2.ts b/src/features/messages/synthesizeStyleBertVITS2.ts
index 5679c62..f96d34c 100644
--- a/src/features/messages/synthesizeStyleBertVITS2.ts
+++ b/src/features/messages/synthesizeStyleBertVITS2.ts
@@ -3,15 +3,21 @@ import { Language } from '@/features/constants/settings'
export async function synthesizeStyleBertVITS2Api(
message: string,
stylebertvits2ServerUrl: string,
+ stylebertvits2ApiKey: string,
stylebertvits2ModelId: string,
stylebertvits2Style: string,
+ stylebertvits2SdpRatio: number,
+ stylebertvits2Length: number,
selectLanguage: Language
) {
const body = {
message: message,
stylebertvits2ServerUrl: stylebertvits2ServerUrl,
+ stylebertvits2ApiKey: stylebertvits2ApiKey,
stylebertvits2ModelId: stylebertvits2ModelId,
stylebertvits2Style: stylebertvits2Style,
+ stylebertvits2SdpRatio: stylebertvits2SdpRatio,
+ stylebertvits2Length: stylebertvits2Length,
selectLanguage: selectLanguage,
type: 'stylebertvits2',
}
diff --git a/src/features/stores/settings.ts b/src/features/stores/settings.ts
index 84fd1e7..9eb1282 100644
--- a/src/features/stores/settings.ts
+++ b/src/features/stores/settings.ts
@@ -45,8 +45,11 @@ interface ModelProvider {
voicevoxPitch: number
voicevoxIntonation: number
stylebertvits2ServerUrl: string
+ stylebertvits2ApiKey: string
stylebertvits2ModelId: string
stylebertvits2Style: string
+ stylebertvits2SdpRatio: number
+ stylebertvits2Length: number
gsviTtsServerUrl: string
gsviTtsModelId: string
gsviTtsBatchSize: number
@@ -120,8 +123,11 @@ const settingsStore = create()(
voicevoxPitch: 0.0,
voicevoxIntonation: 1.0,
stylebertvits2ServerUrl: 'http://127.0.0.1:5000',
+ stylebertvits2ApiKey: '',
stylebertvits2ModelId: '0',
stylebertvits2Style: 'Neutral',
+ stylebertvits2SdpRatio: 0.2,
+ stylebertvits2Length: 1.0,
gsviTtsServerUrl:
process.env.NEXT_PUBLIC_LOCAL_TTS_URL || 'http://127.0.0.1:5000/tts',
gsviTtsModelId: '',
@@ -181,8 +187,11 @@ const settingsStore = create()(
voicevoxPitch: state.voicevoxPitch,
voicevoxIntonation: state.voicevoxIntonation,
stylebertvits2ServerUrl: state.stylebertvits2ServerUrl,
+ stylebertvits2ApiKey: state.stylebertvits2ApiKey,
stylebertvits2ModelId: state.stylebertvits2ModelId,
stylebertvits2Style: state.stylebertvits2Style,
+ stylebertvits2SdpRatio: state.stylebertvits2SdpRatio,
+ stylebertvits2Length: state.stylebertvits2Length,
gsviTtsServerUrl: state.gsviTtsServerUrl,
gsviTtsModelId: state.gsviTtsModelId,
gsviTtsBatchSize: state.gsviTtsBatchSize,
diff --git a/src/pages/api/stylebertvits2.ts b/src/pages/api/stylebertvits2.ts
index 398ff56..38fe628 100644
--- a/src/pages/api/stylebertvits2.ts
+++ b/src/pages/api/stylebertvits2.ts
@@ -30,40 +30,85 @@ export default async function handler(
const message = body.message
const stylebertvits2ModelId = body.stylebertvits2ModelId
const stylebertvits2ServerUrl = body.stylebertvits2ServerUrl
+ const stylebertvits2ApiKey = body.stylebertvits2ApiKey
const stylebertvits2Style = body.stylebertvits2Style
+ const stylebertvits2SdpRatio = body.stylebertvits2SdpRatio
+ const stylebertvits2Length = body.stylebertvits2Length
const selectLanguage = getLanguageCode(body.selectLanguage)
- const queryParams = new URLSearchParams({
- text: message,
- model_id: stylebertvits2ModelId,
- style: stylebertvits2Style,
- language: selectLanguage,
- })
-
try {
- const voice = await fetch(
- `${stylebertvits2ServerUrl.replace(/\/$/, '')}/voice?${queryParams}`,
- {
- method: 'GET',
- headers: {
- 'Content-Type': 'audio/wav',
- },
+ if (!stylebertvits2ServerUrl.includes('https://api.runpod.ai')) {
+ const queryParams = new URLSearchParams({
+ text: message,
+ model_id: stylebertvits2ModelId,
+ style: stylebertvits2Style,
+ sdp_ratio: stylebertvits2SdpRatio,
+ length: stylebertvits2Length,
+ language: selectLanguage,
+ })
+
+ const voice = await fetch(
+ `${stylebertvits2ServerUrl.replace(/\/$/, '')}/voice?${queryParams}`,
+ {
+ method: 'GET',
+ headers: {
+ 'Content-Type': 'audio/wav',
+ },
+ }
+ )
+
+ if (!voice.ok) {
+ throw new Error(
+ `サーバーからの応答が異常です。ステータスコード: ${voice.status}`
+ )
}
- )
- if (!voice.ok) {
- throw new Error(
- `サーバーからの応答が異常です。ステータスコード: ${voice.status}`
+ const arrayBuffer = await voice.arrayBuffer()
+ const buffer = Buffer.from(arrayBuffer)
+ res.writeHead(200, {
+ 'Content-Type': 'audio/wav',
+ 'Content-Length': buffer.length,
+ })
+ res.end(buffer)
+ } else {
+ const voice = await fetch(
+ `${stylebertvits2ServerUrl.replace(/\/$/, '')}`,
+ {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ Authorization: `Bearer ${stylebertvits2ApiKey}`,
+ },
+ body: JSON.stringify({
+ input: {
+ action: '/voice',
+ model_id: stylebertvits2ModelId,
+ text: message,
+ style: stylebertvits2Style,
+ sdp_ratio: stylebertvits2SdpRatio,
+ length: stylebertvits2Length,
+ language: selectLanguage,
+ },
+ }),
+ }
)
- }
- const arrayBuffer = await voice.arrayBuffer()
- const buffer = Buffer.from(arrayBuffer)
- res.writeHead(200, {
- 'Content-Type': 'audio/wav',
- 'Content-Length': buffer.length,
- })
- res.end(buffer)
+ if (!voice.ok) {
+ throw new Error(
+ `サーバーからの応答が異常です。ステータスコード: ${voice.status}`
+ )
+ }
+
+ const voiceData = await voice.json()
+ const base64Audio = voiceData.output.voice
+ const buffer = Buffer.from(base64Audio, 'base64')
+
+ res.writeHead(200, {
+ 'Content-Type': 'audio/wav',
+ 'Content-Length': buffer.length,
+ })
+ res.end(buffer)
+ }
} catch (error: any) {
res.status(500).json({ error: error.message })
}