Merge branch 'main' into manage-assistants

TheoKanning · Nov 12, 2023 · fb3ca19 · fb3ca19
2 parents cd776d0 + 0ec5a9e
commit fb3ca19
Show file tree

Hide file tree

Showing 8 changed files with 108 additions and 3 deletions.
diff --git a/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java b/api/src/main/java/com/theokanning/openai/audio/CreateSpeechRequest.java
@@ -0,0 +1,45 @@
+package com.theokanning.openai.audio;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.NonNull;
+
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+@Data
+public class CreateSpeechRequest {
+
+    /**
+     * The name of the model to use.
+     */
+    @NonNull
+    String model;
+
+    /**
+     * The text to generate audio for. The maximum length is 4096 characters.
+     */
+    @NonNull
+    String input;
+
+    /**
+     * The voice to use when generating the audio.
+     */
+    @NonNull
+    String voice;
+
+    /**
+     * The format to audio in. Supported formats are mp3, opus, aac, and flac. Defaults to mp3.
+     */
+    @JsonProperty("response_format")
+    String responseFormat;
+
+    /**
+     * The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0.
+     */
+    Double speed;
+}
diff --git a/api/src/main/java/com/theokanning/openai/image/CreateImageEditRequest.java b/api/src/main/java/com/theokanning/openai/image/CreateImageEditRequest.java
@@ -21,6 +21,11 @@ public class CreateImageEditRequest {
     @NonNull
     String prompt;
 
+    /**
+     * The model to use for image generation. Only dall-e-2 is supported at this time. Defaults to dall-e-2.
+     */
+    String model;
+
     /**
      * The number of images to generate. Must be between 1 and 10. Defaults to 1.
      */

diff --git a/api/src/main/java/com/theokanning/openai/image/CreateImageRequest.java b/api/src/main/java/com/theokanning/openai/image/CreateImageRequest.java
@@ -17,18 +17,28 @@
 public class CreateImageRequest {
 
     /**
-     * A text description of the desired image(s). The maximum length in 1000 characters.
+     * A text description of the desired image(s). The maximum length is 1000 characters for dall-e-2 and 4000 characters for dall-e-3.
      */
     @NonNull
     String prompt;
 
     /**
-     * The number of images to generate. Must be between 1 and 10. Defaults to 1.
+     * The model to use for image generation. Defaults to "dall-e-2".
+     */
+    String model;
+
+    /**
+     * The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 is supported. Defaults to 1.
      */
     Integer n;
 
     /**
-     * The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". Defaults to "1024x1024".
+     * The quality of the image that will be generated. "hd" creates images with finer details and greater consistency across the image. This param is only supported for dall-e-3. Defaults to "standard".
+     */
+    String quality;
+
+    /**
+     * The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. Defaults to 1024x1024.
      */
     String size;
 
@@ -38,6 +48,11 @@ public class CreateImageRequest {
     @JsonProperty("response_format")
     String responseFormat;
 
+    /**
+     * The style of the generated images. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for dall-e-3. Defaults to vivid.
+     */
+    String style;
+
     /**
      * A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
      */

diff --git a/api/src/main/java/com/theokanning/openai/image/CreateImageVariationRequest.java b/api/src/main/java/com/theokanning/openai/image/CreateImageVariationRequest.java
@@ -20,6 +20,11 @@ public class CreateImageVariationRequest {
      */
     Integer n;
 
+    /**
+     * The model to use for image generation. Only dall-e-2 is supported at this time. Defaults to dall-e-2.
+     */
+    String model;
+
     /**
      * The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". Defaults to "1024x1024".
      */

diff --git a/api/src/main/java/com/theokanning/openai/image/Image.java b/api/src/main/java/com/theokanning/openai/image/Image.java
@@ -21,4 +21,10 @@ public class Image {
      */
     @JsonProperty("b64_json")
     String b64Json;
+
+    /**
+     * The prompt that was used to generate the image, if there was any revision to the prompt.
+     */
+    @JsonProperty("revised_prompt")
+    String revisedPrompt;
 }
diff --git a/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java b/client/src/main/java/com/theokanning/openai/client/OpenAiApi.java
@@ -9,6 +9,7 @@
 import com.theokanning.openai.assistants.DeleteAssistantResult;
 import com.theokanning.openai.assistants.ListAssistant;
 import com.theokanning.openai.assistants.ListAssistantQueryRequest;
+import com.theokanning.openai.audio.CreateSpeechRequest;
 import com.theokanning.openai.audio.TranscriptionResult;
 import com.theokanning.openai.audio.TranslationResult;
 import com.theokanning.openai.billing.BillingUsage;
@@ -157,6 +158,9 @@ public interface OpenAiApi {
     @POST("/v1/audio/translations")
     Single<TranslationResult> createTranslation(@Body RequestBody requestBody);
 
+    @POST("/v1/audio/speech")
+    Single<ResponseBody> createSpeech(@Body CreateSpeechRequest requestBody);
+
     @POST("/v1/moderations")
     Single<ModerationResult> createModeration(@Body ModerationRequest request);
 

diff --git a/service/src/main/java/com/theokanning/openai/service/OpenAiService.java b/service/src/main/java/com/theokanning/openai/service/OpenAiService.java
@@ -15,6 +15,7 @@
 import com.theokanning.openai.assistants.AssistantFileRequest;
 import com.theokanning.openai.assistants.ListAssistant;
 import com.theokanning.openai.assistants.ListAssistantQueryRequest;
+import com.theokanning.openai.audio.CreateSpeechRequest;
 import com.theokanning.openai.audio.CreateTranscriptionRequest;
 import com.theokanning.openai.audio.CreateTranslationRequest;
 import com.theokanning.openai.audio.TranscriptionResult;
@@ -258,6 +259,7 @@ public ImageResult createImageEdit(CreateImageEditRequest request, java.io.File
                 .setType(MediaType.get("multipart/form-data"))
                 .addFormDataPart("prompt", request.getPrompt())
                 .addFormDataPart("size", request.getSize())
+                .addFormDataPart("model", request.getModel())
                 .addFormDataPart("response_format", request.getResponseFormat())
                 .addFormDataPart("image", "image", imageBody);
 
@@ -284,6 +286,7 @@ public ImageResult createImageVariation(CreateImageVariationRequest request, jav
         MultipartBody.Builder builder = new MultipartBody.Builder()
                 .setType(MediaType.get("multipart/form-data"))
                 .addFormDataPart("size", request.getSize())
+                .addFormDataPart("model", request.getModel())
                 .addFormDataPart("response_format", request.getResponseFormat())
                 .addFormDataPart("image", "image", imageBody);
 
@@ -391,6 +394,9 @@ public ListAssistant<Assistant> listAssistantFiles(String assistantId, ListAssis
         return execute(api.listAssistantFiles(assistantId, queryParameters));
     }
 
+    public ResponseBody createSpeech(CreateSpeechRequest request) {
+        return execute(api.createSpeech(request));
+    }
 
     /**
      * Calls the Open AI api, returns the response, and parses error messages if the request fails

diff --git a/service/src/test/java/com/theokanning/openai/service/AudioTest.java b/service/src/test/java/com/theokanning/openai/service/AudioTest.java
@@ -1,13 +1,18 @@
 package com.theokanning.openai.service;
 
+import com.theokanning.openai.audio.CreateSpeechRequest;
 import com.theokanning.openai.audio.CreateTranscriptionRequest;
 import com.theokanning.openai.audio.CreateTranslationRequest;
 import com.theokanning.openai.audio.TranscriptionResult;
 import com.theokanning.openai.audio.TranslationResult;
 import org.junit.jupiter.api.Test;
 
+import java.io.IOException;
 import java.time.Duration;
 
+import okhttp3.MediaType;
+import okhttp3.ResponseBody;
+
 import static org.junit.jupiter.api.Assertions.*;
 
 
@@ -69,4 +74,18 @@ void createTranslationVerbose() {
         assertTrue(result.getDuration() > 0);
         assertEquals(1, result.getSegments().size());
     }
+
+    @Test
+    void createSpeech() throws IOException {
+        CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder()
+                .model("tts-1")
+                .input("Hello World.")
+                .voice("alloy")
+                .build();
+
+        final ResponseBody speech = service.createSpeech(createSpeechRequest);
+        assertNotNull(speech);
+        assertEquals(MediaType.get("audio/mpeg"), speech.contentType());
+        assertTrue(speech.bytes().length > 0);
+    }
 }