From 0ec5a9ee303f91f6d66da9e91ab726ca27f28534 Mon Sep 17 00:00:00 2001 From: Daniel Faria Date: Sun, 12 Nov 2023 18:22:08 -0300 Subject: [PATCH] add support to audio/createSpeech API (#392) --- .../openai/audio/CreateSpeechRequest.java | 45 +++++++++++++++++++ .../theokanning/openai/client/OpenAiApi.java | 4 ++ .../openai/service/OpenAiService.java | 5 +++ .../theokanning/openai/service/AudioTest.java | 19 ++++++++ 4 files changed, 73 insertions(+) create mode 100644 api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java diff --git a/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java b/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java new file mode 100644 index 00000000..6d2e69ac --- /dev/null +++ b/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java @@ -0,0 +1,45 @@ +package com.theokanning.openai.audio; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.NonNull; + +@Builder +@NoArgsConstructor +@AllArgsConstructor +@Data +public class CreateSpeechRequest { + + /** + * The name of the model to use. + */ + @NonNull + String model; + + /** + * The text to generate audio for. The maximum length is 4096 characters. + */ + @NonNull + String input; + + /** + * The voice to use when generating the audio. + */ + @NonNull + String voice; + + /** + * The format to audio in. Supported formats are mp3, opus, aac, and flac. Defaults to mp3. + */ + @JsonProperty("response_format") + String responseFormat; + + /** + * The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. + */ + Double speed; +} diff --git a/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java b/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java index 497dacd5..f2665ee2 100644 --- a/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java +++ b/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java @@ -2,6 +2,7 @@ import com.theokanning.openai.DeleteResult; import com.theokanning.openai.OpenAiResponse; +import com.theokanning.openai.audio.CreateSpeechRequest; import com.theokanning.openai.audio.TranscriptionResult; import com.theokanning.openai.audio.TranslationResult; import com.theokanning.openai.billing.BillingUsage; @@ -149,6 +150,9 @@ public interface OpenAiApi { @POST("/v1/audio/translations") Single createTranslation(@Body RequestBody requestBody); + @POST("/v1/audio/speech") + Single createSpeech(@Body CreateSpeechRequest requestBody); + @POST("/v1/moderations") Single createModeration(@Body ModerationRequest request); diff --git a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java index cedbd805..ea59417e 100644 --- a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java +++ b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java @@ -8,6 +8,7 @@ import com.theokanning.openai.DeleteResult; import com.theokanning.openai.OpenAiError; import com.theokanning.openai.OpenAiHttpException; +import com.theokanning.openai.audio.CreateSpeechRequest; import com.theokanning.openai.audio.CreateTranscriptionRequest; import com.theokanning.openai.audio.CreateTranslationRequest; import com.theokanning.openai.audio.TranscriptionResult; @@ -347,6 +348,10 @@ public ModerationResult createModeration(ModerationRequest request) { return execute(api.createModeration(request)); } + public ResponseBody createSpeech(CreateSpeechRequest request) { + return execute(api.createSpeech(request)); + } + /** * Calls the Open AI api, returns the response, and parses error messages if the request fails */ diff --git a/service/src/test/java/com/theokanning/openai/service/AudioTest.java b/service/src/test/java/com/theokanning/openai/service/AudioTest.java index d5a54a23..9cb083de 100644 --- a/service/src/test/java/com/theokanning/openai/service/AudioTest.java +++ b/service/src/test/java/com/theokanning/openai/service/AudioTest.java @@ -1,13 +1,18 @@ package com.theokanning.openai.service; +import com.theokanning.openai.audio.CreateSpeechRequest; import com.theokanning.openai.audio.CreateTranscriptionRequest; import com.theokanning.openai.audio.CreateTranslationRequest; import com.theokanning.openai.audio.TranscriptionResult; import com.theokanning.openai.audio.TranslationResult; import org.junit.jupiter.api.Test; +import java.io.IOException; import java.time.Duration; +import okhttp3.MediaType; +import okhttp3.ResponseBody; + import static org.junit.jupiter.api.Assertions.*; @@ -69,4 +74,18 @@ void createTranslationVerbose() { assertTrue(result.getDuration() > 0); assertEquals(1, result.getSegments().size()); } + + @Test + void createSpeech() throws IOException { + CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder() + .model("tts-1") + .input("Hello World.") + .voice("alloy") + .build(); + + final ResponseBody speech = service.createSpeech(createSpeechRequest); + assertNotNull(speech); + assertEquals(MediaType.get("audio/mpeg"), speech.contentType()); + assertTrue(speech.bytes().length > 0); + } }