Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into manage-assistants
Browse files Browse the repository at this point in the history
  • Loading branch information
TheoKanning authored Nov 12, 2023
2 parents cd776d0 + 0ec5a9e commit fb3ca19
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.theokanning.openai.audio;

import com.fasterxml.jackson.annotation.JsonProperty;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.NonNull;

@Builder
@NoArgsConstructor
@AllArgsConstructor
@Data
public class CreateSpeechRequest {

/**
* The name of the model to use.
*/
@NonNull
String model;

/**
* The text to generate audio for. The maximum length is 4096 characters.
*/
@NonNull
String input;

/**
* The voice to use when generating the audio.
*/
@NonNull
String voice;

/**
* The format to audio in. Supported formats are mp3, opus, aac, and flac. Defaults to mp3.
*/
@JsonProperty("response_format")
String responseFormat;

/**
* The speed of the generated audio. Select a value from 0.25 to 4.0. Defaults to 1.0.
*/
Double speed;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ public class CreateImageEditRequest {
@NonNull
String prompt;

/**
* The model to use for image generation. Only dall-e-2 is supported at this time. Defaults to dall-e-2.
*/
String model;

/**
* The number of images to generate. Must be between 1 and 10. Defaults to 1.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,28 @@
public class CreateImageRequest {

/**
* A text description of the desired image(s). The maximum length in 1000 characters.
* A text description of the desired image(s). The maximum length is 1000 characters for dall-e-2 and 4000 characters for dall-e-3.
*/
@NonNull
String prompt;

/**
* The number of images to generate. Must be between 1 and 10. Defaults to 1.
* The model to use for image generation. Defaults to "dall-e-2".
*/
String model;

/**
* The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 is supported. Defaults to 1.
*/
Integer n;

/**
* The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". Defaults to "1024x1024".
* The quality of the image that will be generated. "hd" creates images with finer details and greater consistency across the image. This param is only supported for dall-e-3. Defaults to "standard".
*/
String quality;

/**
* The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. Defaults to 1024x1024.
*/
String size;

Expand All @@ -38,6 +48,11 @@ public class CreateImageRequest {
@JsonProperty("response_format")
String responseFormat;

/**
* The style of the generated images. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This param is only supported for dall-e-3. Defaults to vivid.
*/
String style;

/**
* A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ public class CreateImageVariationRequest {
*/
Integer n;

/**
* The model to use for image generation. Only dall-e-2 is supported at this time. Defaults to dall-e-2.
*/
String model;

/**
* The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024". Defaults to "1024x1024".
*/
Expand Down
6 changes: 6 additions & 0 deletions api/src/main/java/com/theokanning/openai/image/Image.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,10 @@ public class Image {
*/
@JsonProperty("b64_json")
String b64Json;

/**
* The prompt that was used to generate the image, if there was any revision to the prompt.
*/
@JsonProperty("revised_prompt")
String revisedPrompt;
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.theokanning.openai.assistants.DeleteAssistantResult;
import com.theokanning.openai.assistants.ListAssistant;
import com.theokanning.openai.assistants.ListAssistantQueryRequest;
import com.theokanning.openai.audio.CreateSpeechRequest;
import com.theokanning.openai.audio.TranscriptionResult;
import com.theokanning.openai.audio.TranslationResult;
import com.theokanning.openai.billing.BillingUsage;
Expand Down Expand Up @@ -157,6 +158,9 @@ public interface OpenAiApi {
@POST("/v1/audio/translations")
Single<TranslationResult> createTranslation(@Body RequestBody requestBody);

@POST("/v1/audio/speech")
Single<ResponseBody> createSpeech(@Body CreateSpeechRequest requestBody);

@POST("/v1/moderations")
Single<ModerationResult> createModeration(@Body ModerationRequest request);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.theokanning.openai.assistants.AssistantFileRequest;
import com.theokanning.openai.assistants.ListAssistant;
import com.theokanning.openai.assistants.ListAssistantQueryRequest;
import com.theokanning.openai.audio.CreateSpeechRequest;
import com.theokanning.openai.audio.CreateTranscriptionRequest;
import com.theokanning.openai.audio.CreateTranslationRequest;
import com.theokanning.openai.audio.TranscriptionResult;
Expand Down Expand Up @@ -258,6 +259,7 @@ public ImageResult createImageEdit(CreateImageEditRequest request, java.io.File
.setType(MediaType.get("multipart/form-data"))
.addFormDataPart("prompt", request.getPrompt())
.addFormDataPart("size", request.getSize())
.addFormDataPart("model", request.getModel())
.addFormDataPart("response_format", request.getResponseFormat())
.addFormDataPart("image", "image", imageBody);

Expand All @@ -284,6 +286,7 @@ public ImageResult createImageVariation(CreateImageVariationRequest request, jav
MultipartBody.Builder builder = new MultipartBody.Builder()
.setType(MediaType.get("multipart/form-data"))
.addFormDataPart("size", request.getSize())
.addFormDataPart("model", request.getModel())
.addFormDataPart("response_format", request.getResponseFormat())
.addFormDataPart("image", "image", imageBody);

Expand Down Expand Up @@ -391,6 +394,9 @@ public ListAssistant<Assistant> listAssistantFiles(String assistantId, ListAssis
return execute(api.listAssistantFiles(assistantId, queryParameters));
}

public ResponseBody createSpeech(CreateSpeechRequest request) {
return execute(api.createSpeech(request));
}

/**
* Calls the Open AI api, returns the response, and parses error messages if the request fails
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package com.theokanning.openai.service;

import com.theokanning.openai.audio.CreateSpeechRequest;
import com.theokanning.openai.audio.CreateTranscriptionRequest;
import com.theokanning.openai.audio.CreateTranslationRequest;
import com.theokanning.openai.audio.TranscriptionResult;
import com.theokanning.openai.audio.TranslationResult;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.time.Duration;

import okhttp3.MediaType;
import okhttp3.ResponseBody;

import static org.junit.jupiter.api.Assertions.*;


Expand Down Expand Up @@ -69,4 +74,18 @@ void createTranslationVerbose() {
assertTrue(result.getDuration() > 0);
assertEquals(1, result.getSegments().size());
}

@Test
void createSpeech() throws IOException {
CreateSpeechRequest createSpeechRequest = CreateSpeechRequest.builder()
.model("tts-1")
.input("Hello World.")
.voice("alloy")
.build();

final ResponseBody speech = service.createSpeech(createSpeechRequest);
assertNotNull(speech);
assertEquals(MediaType.get("audio/mpeg"), speech.contentType());
assertTrue(speech.bytes().length > 0);
}
}

0 comments on commit fb3ca19

Please sign in to comment.