From f1e587d685419765e9ab81c763610847f8fa0317 Mon Sep 17 00:00:00 2001 From: vacuityv Date: Mon, 13 Nov 2023 05:17:36 +0800 Subject: [PATCH 1/2] feat(image): new feature for dalle api. (#393) You can set model, quality and style now. --- .../openai/image/CreateImageEditRequest.java | 5 +++++ .../openai/image/CreateImageRequest.java | 21 ++++++++++++++++--- .../image/CreateImageVariationRequest.java | 5 +++++ .../com/theokanning/openai/image/Image.java | 6 ++++++ .../openai/service/OpenAiService.java | 2 ++ 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/api/src/main/java/com/theokanning/openai/image/CreateImageEditRequest.java b/api/src/main/java/com/theokanning/openai/image/CreateImageEditRequest.java index 72046953..7d37f689 100644 --- a/api/src/main/java/com/theokanning/openai/image/CreateImageEditRequest.java +++ b/api/src/main/java/com/theokanning/openai/image/CreateImageEditRequest.java @@ -21,6 +21,11 @@ public class CreateImageEditRequest { @NonNull String prompt; + /** + * The model to use for image generation. Only dall-e-2 is supported at this time. Defaults to dall-e-2. + */ + String model; + /** * The number of images to generate. Must be between 1 and 10. Defaults to 1. */ diff --git a/api/src/main/java/com/theokanning/openai/image/CreateImageRequest.java b/api/src/main/java/com/theokanning/openai/image/CreateImageRequest.java index b8a1d05d..13672c24 100644 --- a/api/src/main/java/com/theokanning/openai/image/CreateImageRequest.java +++ b/api/src/main/java/com/theokanning/openai/image/CreateImageRequest.java @@ -17,18 +17,28 @@ public class CreateImageRequest { /** - * A text description of the desired image(s). The maximum length in 1000 characters. + * A text description of the desired image(s). The maximum length is 1000 characters for dall-e-2 and 4000 characters for dall-e-3. */ @NonNull String prompt; /** - * The number of images to generate. Must be between 1 and 10. Defaults to 1. + * The model to use for image generation. Defaults to "dall-e-2". + */ + String model; + + /** + * The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 is supported. Defaults to 1. */ Integer n; /** - * The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". Defaults to "1024x1024". + * The quality of the image that will be generated. "hd" creates images with finer details and greater consistency across the image. This param is only supported for dall-e-3. Defaults to "standard". + */ + String quality; + + /** + * The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. Defaults to 1024x1024. */ String size; @@ -38,6 +48,11 @@ public class CreateImageRequest { @JsonProperty("response_format") String responseFormat; + /** + * The style of the generated images. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for dall-e-3. Defaults to vivid. + */ + String style; + /** * A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. */ diff --git a/api/src/main/java/com/theokanning/openai/image/CreateImageVariationRequest.java b/api/src/main/java/com/theokanning/openai/image/CreateImageVariationRequest.java index 2bc0c5d1..f16f613d 100644 --- a/api/src/main/java/com/theokanning/openai/image/CreateImageVariationRequest.java +++ b/api/src/main/java/com/theokanning/openai/image/CreateImageVariationRequest.java @@ -20,6 +20,11 @@ public class CreateImageVariationRequest { */ Integer n; + /** + * The model to use for image generation. Only dall-e-2 is supported at this time. Defaults to dall-e-2. + */ + String model; + /** * The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". Defaults to "1024x1024". */ diff --git a/api/src/main/java/com/theokanning/openai/image/Image.java b/api/src/main/java/com/theokanning/openai/image/Image.java index e3214844..6b8391ed 100644 --- a/api/src/main/java/com/theokanning/openai/image/Image.java +++ b/api/src/main/java/com/theokanning/openai/image/Image.java @@ -21,4 +21,10 @@ public class Image { */ @JsonProperty("b64_json") String b64Json; + + /** + * The prompt that was used to generate the image, if there was any revision to the prompt. + */ + @JsonProperty("revised_prompt") + String revisedPrompt; } diff --git a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java index 0296c15c..cedbd805 100644 --- a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java +++ b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java @@ -250,6 +250,7 @@ public ImageResult createImageEdit(CreateImageEditRequest request, java.io.File .setType(MediaType.get("multipart/form-data")) .addFormDataPart("prompt", request.getPrompt()) .addFormDataPart("size", request.getSize()) + .addFormDataPart("model", request.getModel()) .addFormDataPart("response_format", request.getResponseFormat()) .addFormDataPart("image", "image", imageBody); @@ -276,6 +277,7 @@ public ImageResult createImageVariation(CreateImageVariationRequest request, jav MultipartBody.Builder builder = new MultipartBody.Builder() .setType(MediaType.get("multipart/form-data")) .addFormDataPart("size", request.getSize()) + .addFormDataPart("model", request.getModel()) .addFormDataPart("response_format", request.getResponseFormat()) .addFormDataPart("image", "image", imageBody); From 0ec5a9ee303f91f6d66da9e91ab726ca27f28534 Mon Sep 17 00:00:00 2001 From: Daniel Faria Date: Sun, 12 Nov 2023 18:22:08 -0300 Subject: [PATCH 2/2] add support to audio/createSpeech API (#392) --- .../openai/audio/CreateSpeechRequest.java | 45 +++++++++++++++++++ .../theokanning/openai/client/OpenAiApi.java | 4 ++ .../openai/service/OpenAiService.java | 5 +++ .../theokanning/openai/service/AudioTest.java | 19 ++++++++ 4 files changed, 73 insertions(+) create mode 100644 api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java diff --git a/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java b/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java new file mode 100644 index 00000000..6d2e69ac --- /dev/null +++ b/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java @@ -0,0 +1,45 @@ +package com.theokanning.openai.audio; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.NonNull; + +@Builder +@NoArgsConstructor +@AllArgsConstructor +@Data +public class CreateSpeechRequest { + + /** + * The name of the model to use. + */ + @NonNull + String model; + + /** + * The text to generate audio for. The maximum length is 4096 characters. + */ + @NonNull + String input; + + /** + * The voice to use when generating the audio. + */ + @NonNull + String voice; + + /** + * The format to audio in. Supported formats are mp3, opus, aac, and flac. Defaults to mp3. + */ + @JsonProperty("response_format") + String responseFormat; + + /** + * The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0. + */ + Double speed; +} diff --git a/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java b/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java index 497dacd5..f2665ee2 100644 --- a/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java +++ b/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java @@ -2,6 +2,7 @@ import com.theokanning.openai.DeleteResult; import com.theokanning.openai.OpenAiResponse; +import com.theokanning.openai.audio.CreateSpeechRequest; import com.theokanning.openai.audio.TranscriptionResult; import com.theokanning.openai.audio.TranslationResult; import com.theokanning.openai.billing.BillingUsage; @@ -149,6 +150,9 @@ public interface OpenAiApi { @POST("/v1/audio/translations") Single createTranslation(@Body RequestBody requestBody); + @POST("/v1/audio/speech") + Single createSpeech(@Body CreateSpeechRequest requestBody); + @POST("/v1/moderations") Single createModeration(@Body ModerationRequest request); diff --git a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java index cedbd805..ea59417e 100644 --- a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java +++ b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java @@ -8,6 +8,7 @@ import com.theokanning.openai.DeleteResult; import com.theokanning.openai.OpenAiError; import com.theokanning.openai.OpenAiHttpException; +import com.theokanning.openai.audio.CreateSpeechRequest; import com.theokanning.openai.audio.CreateTranscriptionRequest; import com.theokanning.openai.audio.CreateTranslationRequest; import com.theokanning.openai.audio.TranscriptionResult; @@ -347,6 +348,10 @@ public ModerationResult createModeration(ModerationRequest request) { return execute(api.createModeration(request)); } + public ResponseBody createSpeech(CreateSpeechRequest request) { + return execute(api.createSpeech(request)); + } + /** * Calls the Open AI api, returns the response, and parses error messages if the request fails */ diff --git a/service/src/test/java/com/theokanning/openai/service/AudioTest.java b/service/src/test/java/com/theokanning/openai/service/AudioTest.java index d5a54a23..9cb083de 100644 --- a/service/src/test/java/com/theokanning/openai/service/AudioTest.java +++ b/service/src/test/java/com/theokanning/openai/service/AudioTest.java @@ -1,13 +1,18 @@ package com.theokanning.openai.service; +import com.theokanning.openai.audio.CreateSpeechRequest; import com.theokanning.openai.audio.CreateTranscriptionRequest; import com.theokanning.openai.audio.CreateTranslationRequest; import com.theokanning.openai.audio.TranscriptionResult; import com.theokanning.openai.audio.TranslationResult; import org.junit.jupiter.api.Test; +import java.io.IOException; import java.time.Duration; +import okhttp3.MediaType; +import okhttp3.ResponseBody; + import static org.junit.jupiter.api.Assertions.*; @@ -69,4 +74,18 @@ void createTranslationVerbose() { assertTrue(result.getDuration() > 0); assertEquals(1, result.getSegments().size()); } + + @Test + void createSpeech() throws IOException { + CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder() + .model("tts-1") + .input("Hello World.") + .voice("alloy") + .build(); + + final ResponseBody speech = service.createSpeech(createSpeechRequest); + assertNotNull(speech); + assertEquals(MediaType.get("audio/mpeg"), speech.contentType()); + assertTrue(speech.bytes().length > 0); + } }