From 3467267fb66caa619fe41e7a2f3c201a3d51c5fb Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Thu, 14 Mar 2024 14:37:27 +0530 Subject: [PATCH 01/13] feat: add settings for OpenAI TTS provider --- includes/Classifai/Features/TextToSpeech.php | 4 +- .../Providers/OpenAI/TextToSpeech.php | 212 ++++++++++++++++++ .../Classifai/Services/LanguageProcessing.php | 1 + 3 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 includes/Classifai/Providers/OpenAI/TextToSpeech.php diff --git a/includes/Classifai/Features/TextToSpeech.php b/includes/Classifai/Features/TextToSpeech.php index bd530006c..3d3a6edb1 100644 --- a/includes/Classifai/Features/TextToSpeech.php +++ b/includes/Classifai/Features/TextToSpeech.php @@ -4,6 +4,7 @@ use Classifai\Services\LanguageProcessing; use Classifai\Providers\Azure\Speech; +use Classifai\Providers\OpenAI\TextToSpeech as OpenAITTS; use WP_REST_Server; use WP_REST_Request; use WP_Error; @@ -54,7 +55,8 @@ public function __construct() { // Contains just the providers this feature supports. $this->supported_providers = [ - Speech::ID => __( 'Microsoft Azure AI Speech', 'classifai' ), + Speech::ID => __( 'Microsoft Azure AI Speech', 'classifai' ), + OpenAITTS::ID => __( 'OpenAI Text to Speech', 'classifai' ), ]; } diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php new file mode 100644 index 000000000..34b5a969f --- /dev/null +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -0,0 +1,212 @@ +feature_instance = $feature_instance; + } + + /** + * Register settings for the provider. + */ + public function render_provider_fields(): void { + $settings = $this->feature_instance->get_settings( static::ID ); + + add_settings_field( + static::ID . '_api_key', + esc_html__( 'API Key', 'classifai' ), + [ $this->feature_instance, 'render_input' ], + $this->feature_instance->get_option_name(), + $this->feature_instance->get_option_name() . '_section', + [ + 'option_index' => static::ID, + 'label_for' => 'api_key', + 'input_type' => 'password', + 'default_value' => $settings['api_key'], + 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, + 'description' => sprintf( + wp_kses( + /* translators: %1$s is replaced with the OpenAI sign up URL */ + __( 'Don\'t have an OpenAI account yet? Sign up for one in order to get your API key.', 'classifai' ), + [ + 'a' => [ + 'href' => [], + 'title' => [], + ], + ] + ), + esc_url( 'https://platform.openai.com/signup' ) + ), + ] + ); + + add_settings_field( + static::ID . '_tts_model', + esc_html__( 'TTS model', 'classifai' ), + [ $this->feature_instance, 'render_select' ], + $this->feature_instance->get_option_name(), + $this->feature_instance->get_option_name() . '_section', + [ + 'option_index' => static::ID, + 'label_for' => 'tts_model', + 'options' => [ + 'tts-1' => __( 'Text-to-speech 1 (Optimized for speed)', 'classifai' ), + 'tts-1-hd' => __( 'Text-to-speech 1 HD (Optimized for quality)', 'classifai' ), + ], + 'default_value' => $settings['tts_model'], + 'description' => __( 'Select a model depending on your requirement.', 'classifai' ), + 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, + ] + ); + + add_settings_field( + static::ID . '_voice', + esc_html__( 'Voice', 'classifai' ), + [ $this->feature_instance, 'render_select' ], + $this->feature_instance->get_option_name(), + $this->feature_instance->get_option_name() . '_section', + [ + 'option_index' => static::ID, + 'label_for' => 'voice', + 'options' => [ + 'alloy' => __( 'Alloy (male)', 'classifai' ), + 'echo' => __( 'Echo (male)', 'classifai' ), + 'fable' => __( 'Fable (male)', 'classifai' ), + 'onyx' => __( 'Onyx (male)', 'classifai' ), + 'nova' => __( 'Nova (female)', 'classifai' ), + 'shimmer' => __( 'Shimmer (female)', 'classifai' ), + ], + 'default_value' => $settings['voice'], + 'description' => __( 'Select the speech voice.', 'classifai' ), + 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, + ] + ); + + add_settings_field( + static::ID . '_format', + esc_html__( 'Audio format', 'classifai' ), + [ $this->feature_instance, 'render_select' ], + $this->feature_instance->get_option_name(), + $this->feature_instance->get_option_name() . '_section', + [ + 'option_index' => static::ID, + 'label_for' => 'format', + 'options' => [ + 'mp3' => __( '.mp3', 'classifai' ), + 'opus' => __( '.opus', 'classifai' ), + 'aac' => __( '.aac', 'classifai' ), + 'flac' => __( '.flac', 'classifai' ), + 'wav' => __( '.wav', 'classifai' ), + 'pcm' => __( '.pcm', 'classifai' ), + ], + 'default_value' => $settings['format'], + 'description' => __( 'Select the audio format.', 'classifai' ), + 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, + ] + ); + + add_settings_field( + static::ID . '_speed', + esc_html__( 'Audio speed', 'classifai' ), + [ $this->feature_instance, 'render_input' ], + $this->feature_instance->get_option_name(), + $this->feature_instance->get_option_name() . '_section', + [ + 'option_index' => static::ID, + 'label_for' => 'speed', + 'input_type' => 'number', + 'min' => 0.25, + 'max' => 4, + 'step' => 0.25, + 'default_value' => $settings['speed'], + 'description' => __( 'Select the speed of the generated audio.', 'classifai' ), + 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, + ] + ); + } + + /** + * Returns the default settings for the provider. + * + * @return array + */ + public function get_default_provider_settings(): array { + $common_settings = [ + 'api_key' => '', + 'authenticated' => false, + ]; + + switch ( $this->feature_instance::ID ) { + case FeatureTextToSpeech::ID: + return array_merge( + $common_settings, + [ + 'tts_model' => 'tts-1', + 'voice' => 'voice', + 'format' => 'mp3', + 'speed' => 1, + ] + ); + } + + return $common_settings; + } + + /** + * Sanitization for the options being saved. + * + * @param array $new_settings Array of settings about to be saved. + * @return array The sanitized settings to be saved. + */ + public function sanitize_settings( array $new_settings ): array { + $settings = $this->feature_instance->get_settings(); + $api_key_settings = $this->sanitize_api_key_settings( $new_settings, $settings ); + $new_settings[ static::ID ]['api_key'] = $api_key_settings[ static::ID ]['api_key']; + $new_settings[ static::ID ]['authenticated'] = $api_key_settings[ static::ID ]['authenticated']; + + if ( $this->feature_instance instanceof FeatureTextToSpeech ) { + if ( in_array( $new_settings[ static::ID ]['tts_model'], [ 'tts-1', 'tts-1-hd' ], true ) ) { + $new_settings[ static::ID ]['tts_model'] = sanitize_text_field( $new_settings[ static::ID ]['tts_model'] ); + } + + if ( in_array( $new_settings[ static::ID ]['voice'], [ 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer' ], true ) ) { + $new_settings[ static::ID ]['voice'] = sanitize_text_field( $new_settings[ static::ID ]['voice'] ); + } + + if ( in_array( $new_settings[ static::ID ]['format'], [ 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm' ], true ) ) { + $new_settings[ static::ID ]['format'] = sanitize_text_field( $new_settings[ static::ID ]['format'] ); + } + + $speed = filter_var( $new_settings[ static::ID ]['speed'] ?? 1.0, FILTER_SANITIZE_NUMBER_FLOAT ); + + if ( 0.25 <= $speed || 4.00 >= $speed ) { + $new_settings[ static::ID ]['speed'] = sanitize_text_field( $new_settings[ static::ID ]['speed'] ); + } + } + + return $new_settings; + } +} diff --git a/includes/Classifai/Services/LanguageProcessing.php b/includes/Classifai/Services/LanguageProcessing.php index 66baadae3..7671656ab 100644 --- a/includes/Classifai/Services/LanguageProcessing.php +++ b/includes/Classifai/Services/LanguageProcessing.php @@ -38,6 +38,7 @@ public static function get_service_providers(): array { 'classifai_language_processing_service_providers', [ 'Classifai\Providers\Azure\Speech', + 'Classifai\Providers\OpenAI\TextToSpeech', 'Classifai\Providers\OpenAI\ChatGPT', 'Classifai\Providers\OpenAI\Embeddings', 'Classifai\Providers\OpenAI\Moderation', From 0a8a7b1aa6cca54afbe8211004e4b38243948ee0 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Thu, 14 Mar 2024 18:20:52 +0530 Subject: [PATCH 02/13] feat: add logic to synthesize audio --- includes/Classifai/Features/TextToSpeech.php | 24 ++++ includes/Classifai/Providers/Azure/Speech.php | 17 +-- .../Providers/OpenAI/TextToSpeech.php | 111 +++++++++++++++++- 3 files changed, 137 insertions(+), 15 deletions(-) diff --git a/includes/Classifai/Features/TextToSpeech.php b/includes/Classifai/Features/TextToSpeech.php index 3d3a6edb1..baf5fe053 100644 --- a/includes/Classifai/Features/TextToSpeech.php +++ b/includes/Classifai/Features/TextToSpeech.php @@ -5,6 +5,7 @@ use Classifai\Services\LanguageProcessing; use Classifai\Providers\Azure\Speech; use Classifai\Providers\OpenAI\TextToSpeech as OpenAITTS; +use Classifai\Normalizer; use WP_REST_Server; use WP_REST_Request; use WP_Error; @@ -44,6 +45,14 @@ class TextToSpeech extends Feature { */ const DISPLAY_GENERATED_AUDIO = '_classifai_display_generated_audio'; + /** + * Meta key to get/set the audio hash that helps to indicate if there is any need + * for the audio file to be regenerated or not. + * + * @var string + */ + const AUDIO_HASH_KEY = '_classifai_post_audio_hash'; + /** * Constructor. */ @@ -802,6 +811,21 @@ public function get_audio_generation_subsequent_state( $post = null ): bool { return apply_filters( 'classifai_audio_generation_subsequent_state', false, get_post( $post ) ); } + /** + * Normalizes the post content for text to speech generation. + * + * @param int $post_id The post ID. + * + * @return string The normalized post content. + */ + public function normalize_post_content( int $post_id ): string { + $normalizer = new Normalizer(); + $post = get_post( $post_id ); + $post_content = $normalizer->normalize_content( $post->post_content, $post->post_title, $post_id ); + + return $post_content; + } + /** * Generates feature setting data required for migration from * ClassifAI < 3.0.0 to 3.0.0 diff --git a/includes/Classifai/Providers/Azure/Speech.php b/includes/Classifai/Providers/Azure/Speech.php index 670ec195f..13a13457d 100644 --- a/includes/Classifai/Providers/Azure/Speech.php +++ b/includes/Classifai/Providers/Azure/Speech.php @@ -6,7 +6,6 @@ namespace Classifai\Providers\Azure; use Classifai\Providers\Provider; -use Classifai\Normalizer; use Classifai\Features\TextToSpeech; use stdClass; use WP_Http; @@ -30,14 +29,6 @@ class Speech extends Provider { */ const API_PATH = 'cognitiveservices/v1'; - /** - * Meta key to get/set the audio hash that helps to indicate if there is any need - * for the audio file to be regenerated or not. - * - * @var string - */ - const AUDIO_HASH_KEY = '_classifai_post_audio_hash'; - /** * Azure Text to Speech constructor. * @@ -337,12 +328,10 @@ public function synthesize_speech( int $post_id ) { ); } - $normalizer = new Normalizer(); $feature = new TextToSpeech(); $settings = $feature->get_settings(); - $post = get_post( $post_id ); - $post_content = $normalizer->normalize_content( $post->post_content, $post->post_title, $post_id ); - $content_hash = get_post_meta( $post_id, self::AUDIO_HASH_KEY, true ); + $post_content = $feature->normalize_post_content( $post_id ); + $content_hash = get_post_meta( $post_id, TextToSpeech::AUDIO_HASH_KEY, true ); $saved_attachment_id = (int) get_post_meta( $post_id, $feature::AUDIO_ID_KEY, true ); // Don't regenerate the audio file it it already exists and the content hasn't changed. @@ -415,7 +404,7 @@ public function synthesize_speech( int $post_id ) { ); } - update_post_meta( $post_id, self::AUDIO_HASH_KEY, md5( $post_content ) ); + update_post_meta( $post_id, TextToSpeech::AUDIO_HASH_KEY, md5( $post_content ) ); return $response_body; } diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php index 34b5a969f..27b70a89c 100644 --- a/includes/Classifai/Providers/OpenAI/TextToSpeech.php +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -7,11 +7,12 @@ use Classifai\Providers\Provider; use Classifai\Features\TextToSpeech as FeatureTextToSpeech; +use WP_Error; class TextToSpeech extends Provider { use OpenAI; - const ID = 'openai_text-to-speech'; + const ID = 'openai_text_to_speech'; /** * OpenAI Text to Speech URL. @@ -209,4 +210,112 @@ public function sanitize_settings( array $new_settings ): array { return $new_settings; } + + /** + * Common entry point for all REST endpoints for this provider. + * + * @param int $post_id The post ID we're processing. + * @param string $route_to_call The name of the route we're going to be processing. + * @param array $args Optional arguments to pass to the route. + * @return array|string|WP_Error + */ + public function rest_endpoint_callback( $post_id, string $route_to_call = '', array $args = [] ) { + if ( ! $post_id || ! get_post( $post_id ) ) { + return new WP_Error( 'post_id_required', esc_html__( 'A valid post ID is required.', 'classifai' ) ); + } + + $route_to_call = strtolower( $route_to_call ); + $return = ''; + + // Handle all of our routes. + switch ( $route_to_call ) { + case 'synthesize': + $return = $this->synthesize_speech( $post_id, $args ); + break; + } + + return $return; + } + + /** + * Synthesizes speech from a post item. + * + * @param int $post_id Post ID. + * @return string|WP_Error + */ + public function synthesize_speech( int $post_id ) { + if ( empty( $post_id ) ) { + return new WP_Error( + 'openai_text_to_speech_post_id_missing', + esc_html__( 'Post ID missing.', 'classifai' ) + ); + } + + // We skip the user cap check if running under WP-CLI. + if ( ! current_user_can( 'edit_post', $post_id ) && ( ! defined( 'WP_CLI' ) || ! WP_CLI ) ) { + return new WP_Error( + 'openai_text_to_speech_user_not_authorized', + esc_html__( 'Unauthorized user.', 'classifai' ) + ); + } + + $feature = new FeatureTextToSpeech(); + $settings = $feature->get_settings(); + $post_content = $feature->normalize_post_content( $post_id ); + $content_hash = get_post_meta( $post_id, FeatureTextToSpeech::AUDIO_HASH_KEY, true ); + $saved_attachment_id = (int) get_post_meta( $post_id, $feature::AUDIO_ID_KEY, true ); + + // Don't regenerate the audio file it it already exists and the content hasn't changed. + if ( $saved_attachment_id ) { + + // Check if the audio file exists. + $audio_attachment_url = wp_get_attachment_url( $saved_attachment_id ); + + if ( $audio_attachment_url && ! empty( $content_hash ) && ( md5( $post_content ) === $content_hash ) ) { + return $saved_attachment_id; + } + } + + // Create the request body to synthesize speech from text. + $request_body = array( + 'model' => $settings[ static::ID ]['tts_model'], + 'voice' => $settings[ static::ID ]['voice'], + 'input' => $post_content, + ); + + // Request parameters. + $request_params = array( + 'method' => 'POST', + 'body' => wp_json_encode( $request_body ), + 'timeout' => 60, // phpcs:ignore WordPressVIPMinimum.Performance.RemoteRequestTimeout.timeout_timeout + 'headers' => array( + 'Authorization' => 'Bearer ' . $settings[ static::ID ]['api_key'], + 'Content-Type' => 'application/json', + ), + ); + + $response = wp_remote_post( $this->api_url, $request_params ); + + if ( is_wp_error( $response ) ) { + return new WP_Error( + 'openai_text_to_speech_http_error', + esc_html( $response->get_error_message() ) + ); + } + + $code = wp_remote_retrieve_response_code( $response ); + $response_body = wp_remote_retrieve_body( $response ); + + // return error if HTTP status code is not 200. + if ( \WP_Http::OK !== $code ) { + return new WP_Error( + 'openai_text_to_speech_unsuccessful_request', + esc_html__( 'HTTP request unsuccessful.', 'classifai' ) + ); + } + + update_post_meta( $post_id, FeatureTextToSpeech::AUDIO_HASH_KEY, md5( $post_content ) ); + + return $response_body; + } } From 276ec686563f05cc8b38c35bac09c51e3c967115 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Thu, 14 Mar 2024 18:59:22 +0530 Subject: [PATCH 03/13] add e2e tests --- .../text-to-speech-microsoft-azure.test.js | 8 +- ...xt-to-speech-openai-text-to-sppech.test.js | 214 ++++++++++++++++++ tests/test-plugin/e2e-test-plugin.php | 5 +- 3 files changed, 222 insertions(+), 5 deletions(-) create mode 100644 tests/cypress/integration/language-processing/text-to-speech-openai-text-to-sppech.test.js diff --git a/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js b/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js index 17302c216..193b9fa80 100644 --- a/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js +++ b/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js @@ -7,13 +7,13 @@ describe( '[Language Processing] Text to Speech (Microsoft Azure) Tests', () => cy.get( '#classifai_feature_text_to_speech_generation_post_types_post' ).check( 'post' ); - cy.get( '#endpoint_url' ).clear(); - cy.get( '#endpoint_url' ).type( 'https://service.com' ); - cy.get( '#api_key' ).type( 'password' ); + cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][api_key]"]' ).clear(); + cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][api_key]"]' ).type( 'https://service.com' ); + cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][api_key]"]' ).type( 'password' ); cy.get( '#status' ).check(); cy.get( '#submit' ).click(); - cy.get( '#voice' ).select( 'en-AU-AnnetteNeural|Female' ); + cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][voice]"]' ).select( 'en-AU-AnnetteNeural|Female' ); cy.get( '#submit' ).click(); cy.optInAllFeatures(); cy.disableClassicEditor(); diff --git a/tests/cypress/integration/language-processing/text-to-speech-openai-text-to-sppech.test.js b/tests/cypress/integration/language-processing/text-to-speech-openai-text-to-sppech.test.js new file mode 100644 index 000000000..71cc9c29d --- /dev/null +++ b/tests/cypress/integration/language-processing/text-to-speech-openai-text-to-sppech.test.js @@ -0,0 +1,214 @@ +describe( '[Language Processing] Text to Speech (OpenAI) Tests', () => { + before( () => { + cy.login(); + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_text_to_speech_generation' + ); + cy.get( + '#classifai_feature_text_to_speech_generation_post_types_post' + ).check( 'post' ); + cy.get( '#provider' ).select( 'openai_text_to_speech' ); + cy.get( '#tts_model' ).select( 'tts-1' ); + cy.get( '[name="classifai_feature_text_to_speech_generation[openai_text_to_speech][api_key]"]' ).type( 'password' ); + cy.get( '#status' ).check(); + cy.get( '#submit' ).click(); + + cy.get( '[name="classifai_feature_text_to_speech_generation[openai_text_to_speech][voice]"]' ).select( 'alloy' ); + cy.get( '#submit' ).click(); + cy.optInAllFeatures(); + cy.disableClassicEditor(); + } ); + + beforeEach( () => { + cy.login(); + } ); + + it( 'Generates audio from text', () => { + cy.createPost( { + title: 'Text to Speech test', + content: + "This feature uses OpenAI's Text to Speech capabilities.", + } ); + + cy.get( 'button[aria-label="Close panel"]' ).click(); + cy.get( 'button[data-label="Post"]' ).click(); + cy.get( '.classifai-panel' ).click(); + cy.get( '#classifai-audio-controls__preview-btn' ).should( 'exist' ); + } ); + + it( 'Audio controls are visible if supported by post type', () => { + cy.visit( '/text-to-speech-test/' ); + cy.get( '.class-post-audio-controls' ).should( 'be.visible' ); + } ); + + it( 'a11y - aria-labels', () => { + cy.visit( '/text-to-speech-test/' ); + cy.get( '.dashicons-controls-play' ).should( 'be.visible' ); + cy.get( '.class-post-audio-controls' ).should( + 'have.attr', + 'aria-label', + 'Play audio' + ); + + cy.get( '.class-post-audio-controls' ).click(); + + cy.get( '.dashicons-controls-play' ).should( 'not.be.visible' ); + cy.get( '.class-post-audio-controls' ).should( + 'have.attr', + 'aria-label', + 'Pause audio' + ); + + cy.get( '.class-post-audio-controls' ).click(); + cy.get( '.dashicons-controls-play' ).should( 'be.visible' ); + cy.get( '.class-post-audio-controls' ).should( + 'have.attr', + 'aria-label', + 'Play audio' + ); + } ); + + it( 'a11y - keyboard accessibility', () => { + cy.visit( '/text-to-speech-test/' ); + cy.get( '.class-post-audio-controls' ) + .tab( { shift: true } ) + .tab() + .type( '{enter}' ); + cy.get( '.dashicons-controls-pause' ).should( 'be.visible' ); + cy.get( '.class-post-audio-controls' ).should( + 'have.attr', + 'aria-label', + 'Pause audio' + ); + + cy.get( '.class-post-audio-controls' ).type( '{enter}' ); + cy.get( '.dashicons-controls-play' ).should( 'be.visible' ); + cy.get( '.class-post-audio-controls' ).should( + 'have.attr', + 'aria-label', + 'Play audio' + ); + } ); + + it( 'Can see the enable button in a post (Classic Editor)', () => { + cy.enableClassicEditor(); + + cy.createClassicPost( { + title: 'Text to Speech test classic', + content: + "This feature uses OpenAI's Text to Speech capabilities.", + postType: 'post', + } ); + + cy.get( '#classifai-text-to-speech-meta-box' ).should( 'exist' ); + cy.get( '#classifai_synthesize_speech' ).check(); + cy.get( '#classifai-audio-preview' ).should( 'exist' ); + + cy.visit( '/text-to-speech-test/' ); + cy.get( '.class-post-audio-controls' ).should( 'be.visible' ); + + cy.disableClassicEditor(); + } ); + + it( 'Disable support for post type Post', () => { + cy.disableClassicEditor(); + + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_text_to_speech_generation' + ); + cy.get( + '#classifai_feature_text_to_speech_generation_post_types_post' + ).uncheck( 'post' ); + cy.get( '#submit' ).click(); + + cy.visit( '/text-to-speech-test/' ); + cy.get( '.class-post-audio-controls' ).should( 'not.exist' ); + } ); + + it( 'Can enable/disable text to speech feature', () => { + // Disable feature. + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_text_to_speech_generation' + ); + cy.get( '#status' ).uncheck(); + cy.get( '#submit' ).click(); + + // Verify that the feature is not available. + cy.verifyTextToSpeechEnabled( false ); + + // Enable feature. + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_text_to_speech_generation' + ); + cy.get( '#status' ).check(); + cy.get( + '#classifai_feature_text_to_speech_generation_post_types_post' + ).check( 'post' ); + cy.get( '#submit' ).click(); + + // Verify that the feature is available. + cy.verifyTextToSpeechEnabled( true ); + } ); + + it( 'Can enable/disable text to speech feature by role', () => { + // Enable feature. + cy.visit( + '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_text_to_speech_generation' + ); + cy.get( + '#classifai_feature_text_to_speech_generation_post_types_post' + ).check( 'post' ); + cy.get( '#submit' ).click(); + + // Disable admin role. + cy.disableFeatureForRoles( 'feature_text_to_speech_generation', [ + 'administrator', + ] ); + + // Verify that the feature is not available. + cy.verifyTextToSpeechEnabled( false ); + + // Enable admin role. + cy.enableFeatureForRoles( 'feature_text_to_speech_generation', [ + 'administrator', + ] ); + + // Verify that the feature is available. + cy.verifyTextToSpeechEnabled( true ); + } ); + + it( 'Can enable/disable text to speech feature by user', () => { + // Disable admin role. + cy.disableFeatureForRoles( 'feature_text_to_speech_generation', [ + 'administrator', + ] ); + + // Verify that the feature is not available. + cy.verifyTextToSpeechEnabled( false ); + + // Enable feature for admin user. + cy.enableFeatureForUsers( 'feature_text_to_speech_generation', [ + 'admin', + ] ); + + // Verify that the feature is available. + cy.verifyTextToSpeechEnabled( true ); + } ); + + it( 'User can opt-out text to speech feature', () => { + // Enable user based opt-out. + cy.enableFeatureOptOut( 'feature_text_to_speech_generation' ); + + // opt-out + cy.optOutFeature( 'feature_text_to_speech_generation' ); + + // Verify that the feature is not available. + cy.verifyTextToSpeechEnabled( false ); + + // opt-in + cy.optInFeature( 'feature_text_to_speech_generation' ); + + // Verify that the feature is available. + cy.verifyTextToSpeechEnabled( true ); + } ); +} ); diff --git a/tests/test-plugin/e2e-test-plugin.php b/tests/test-plugin/e2e-test-plugin.php index c6ce7a891..ac813b92e 100644 --- a/tests/test-plugin/e2e-test-plugin.php +++ b/tests/test-plugin/e2e-test-plugin.php @@ -58,7 +58,10 @@ function classifai_test_mock_http_requests( $preempt, $parsed_args, $url ) { ), 'body' => file_get_contents( __DIR__ . '/text-to-speech-voices.json' ), ); - } elseif ( strpos( $url, 'https://service.com/cognitiveservices/v1' ) !== false ) { + } elseif ( + strpos( $url, 'https://service.com/cognitiveservices/v1' ) !== false + || strpos( $url, 'https://api.openai.com/v1/audio/speech' ) !== false + ) { return array( 'response' => array( 'code' => 200, From 9542f6ea2522e288bc4caf90aeda050c02315529 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Thu, 14 Mar 2024 19:08:07 +0530 Subject: [PATCH 04/13] add speed and response format support --- includes/Classifai/Providers/OpenAI/TextToSpeech.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php index 27b70a89c..e9b2e8773 100644 --- a/includes/Classifai/Providers/OpenAI/TextToSpeech.php +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -278,9 +278,11 @@ public function synthesize_speech( int $post_id ) { // Create the request body to synthesize speech from text. $request_body = array( - 'model' => $settings[ static::ID ]['tts_model'], - 'voice' => $settings[ static::ID ]['voice'], - 'input' => $post_content, + 'model' => $settings[ static::ID ]['tts_model'], + 'voice' => $settings[ static::ID ]['voice'], + 'response_format' => $settings[ static::ID ]['format'], + 'speed' => (float) $settings[ static::ID ]['speed'], + 'input' => $post_content, ); // Request parameters. From 77060065a6feacfcb3ca2c5d996a4350cef7f089 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Thu, 14 Mar 2024 19:13:04 +0530 Subject: [PATCH 05/13] fix tests --- .../language-processing/text-to-speech-microsoft-azure.test.js | 3 ++- ...ch.test.js => text-to-speech-openai-text-to-speech.test.js} | 0 2 files changed, 2 insertions(+), 1 deletion(-) rename tests/cypress/integration/language-processing/{text-to-speech-openai-text-to-sppech.test.js => text-to-speech-openai-text-to-speech.test.js} (100%) diff --git a/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js b/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js index 193b9fa80..74ce13d4c 100644 --- a/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js +++ b/tests/cypress/integration/language-processing/text-to-speech-microsoft-azure.test.js @@ -4,11 +4,12 @@ describe( '[Language Processing] Text to Speech (Microsoft Azure) Tests', () => cy.visit( '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_text_to_speech_generation' ); + cy.get( '#provider' ).select( 'ms_azure_text_to_speech' ); cy.get( '#classifai_feature_text_to_speech_generation_post_types_post' ).check( 'post' ); cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][api_key]"]' ).clear(); - cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][api_key]"]' ).type( 'https://service.com' ); + cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][endpoint_url]"]' ).type( 'https://service.com' ); cy.get( '[name="classifai_feature_text_to_speech_generation[ms_azure_text_to_speech][api_key]"]' ).type( 'password' ); cy.get( '#status' ).check(); cy.get( '#submit' ).click(); diff --git a/tests/cypress/integration/language-processing/text-to-speech-openai-text-to-sppech.test.js b/tests/cypress/integration/language-processing/text-to-speech-openai-text-to-speech.test.js similarity index 100% rename from tests/cypress/integration/language-processing/text-to-speech-openai-text-to-sppech.test.js rename to tests/cypress/integration/language-processing/text-to-speech-openai-text-to-speech.test.js From 8009d3a1eab2e1587d60a7085630c507fe2299a7 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Thu, 14 Mar 2024 20:40:17 +0530 Subject: [PATCH 06/13] update README.md, readme.txt --- README.md | 2 +- readme.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index be24b38a5..5b67a97d2 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * Generate new images on demand to use in-content or as a featured image using [OpenAI's DALL·E 3 API](https://platform.openai.com/docs/guides/images) * Generate transcripts of audio files using [OpenAI's Whisper API](https://platform.openai.com/docs/guides/speech-to-text) * Moderate incoming comments for sensitive content using [OpenAI's Moderation API](https://platform.openai.com/docs/guides/moderation) -* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech) +* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) * Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/) and [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) * BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) *(note that this service has been [deprecated by Microsoft](https://learn.microsoft.com/en-us/azure/ai-services/personalizer/) and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality (see [issue#392](https://github.com/10up/classifai/issues/392))* * Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) diff --git a/readme.txt b/readme.txt index f8de8436d..0a5e62ae3 100644 --- a/readme.txt +++ b/readme.txt @@ -23,7 +23,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * Expand or condense text content using [OpenAI's ChatGPT API](https://platform.openai.com/docs/guides/chat), [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) or [Google's Gemini API](https://ai.google.dev/docs/gemini_api_overview) * Generate new images on demand to use in-content or as a featured image using [OpenAI's DALL·E 3 API](https://platform.openai.com/docs/guides/images) * Generate transcripts of audio files using [OpenAI's Whisper API](https://platform.openai.com/docs/guides/speech-to-text) -* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech) +* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech) * Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/) and [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) * BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) _(note that this service has been deprecated by Microsoft and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality)_ * Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/) From 2cb2ee73a29abca5d516cc36298d295b9e30fe7a Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Tue, 19 Mar 2024 14:54:54 -0600 Subject: [PATCH 07/13] Minor text updates --- README.md | 27 +++++++++++++++++-- .../Providers/OpenAI/TextToSpeech.php | 8 +++--- readme.txt | 2 +- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5b67a97d2..7c0a008f9 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ * [Set Up OpenAI Embeddings Language Processing](#set-up-classification-via-openai-embeddings) * [Set Up OpenAI Whisper Language Processing](#set-up-audio-transcripts-generation-via-openai-whisper) * [Set Up Azure AI Language Processing](#set-up-text-to-speech-via-microsoft-azure) +* [Set Up OpenAI Text to Speech Processing](#set-up-text-to-speech-via-openai) * [Set Up Azure AI Vision Image Processing](#set-up-image-processing-features-via-microsoft-azure) * [Set Up OpenAI DALL·E Image Processing](#set-up-image-generation-via-openai) * [Set Up OpenAI Moderation Language Processing](#set-up-comment-moderation-via-openai-moderation) @@ -73,7 +74,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro * PHP 7.4+ * [WordPress](http://wordpress.org) 6.1+ * To utilize the NLU Language Processing functionality, you will need an active [IBM Watson](https://cloud.ibm.com/registration) account. -* To utilize the ChatGPT, Embeddings, or Whisper Language Processing functionality or DALL·E Image Processing functionality, you will need an active [OpenAI](https://platform.openai.com/signup) account. +* To utilize the ChatGPT, Embeddings, Text to Speech or Whisper Language Processing functionality or DALL·E Image Processing functionality, you will need an active [OpenAI](https://platform.openai.com/signup) account. * To utilize the Azure AI Vision Image Processing functionality or Text to Speech Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account. * To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://aka.ms/oai/access) for OpenAI access. * To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account. @@ -84,7 +85,7 @@ Note that there is no cost to using ClassifAI itself. Both IBM Watson and Micros IBM Watson's Natural Language Understanding ("NLU"), which is one of the providers that powers the classification feature, has a ["lite" pricing tier](https://www.ibm.com/cloud/watson-natural-language-understanding/pricing) that offers 30,000 free NLU items per month. -OpenAI, which is one of the providers that powers the classification, title generation, excerpt generation, content resizing, audio transcripts generation, moderation and image generation features, has a limited free trial and then requires a [pay per usage](https://openai.com/pricing) plan. +OpenAI, which is one of the providers that powers the classification, title generation, excerpt generation, content resizing, audio transcripts generation, text to speech, moderation and image generation features, has a limited free trial and then requires a [pay per usage](https://openai.com/pricing) plan. Microsoft Azure AI Vision, which is one of the providers that powers the descriptive text generator, image tags generator, image cropping, image text extraction and PDF text extraction features, has a ["free" pricing tier](https://azure.microsoft.com/en-us/pricing/details/cognitive-services/computer-vision/) that offers 20 transactions per minute and 5,000 transactions per month. @@ -347,6 +348,7 @@ IBM Watson's [Categories](https://cloud.ibm.com/docs/natural-language-understand ## Set Up Audio Transcripts Generation (via OpenAI Whisper) Note that [OpenAI](https://platform.openai.com/docs/guides/speech-to-text) can create a transcript for audio files that meet the following requirements: + * The file must be presented in mp3, mp4, mpeg, mpga, m4a, wav, or webm format * The file size must be less than 25 megabytes (MB) @@ -399,6 +401,27 @@ Note that [OpenAI](https://platform.openai.com/docs/guides/speech-to-text) can c * Click the button to preview the generated speech audio for the post. * View the post on the front-end and see a read-to-me feature has been added +## Set Up Text to Speech (via OpenAI) + +### 1. Sign up for OpenAI + +* [Sign up for an OpenAI account](https://platform.openai.com/signup) or sign into your existing one. +* If creating a new account, complete the verification process (requires confirming your email and phone number). +* Log into your account and go to the [API key page](https://platform.openai.com/account/api-keys). +* Click `Create new secret key` and copy the key that is shown. + +### 2. Configure OpenAI API Keys under Tools > ClassifAI > Language Processing > Text to Speech + +* Select **OpenAI Text to Speech** in the provider dropdown. +* Enter your API Key copied from the above step into the `API Key` field. + +### 3. Using the Text to Speech service + +* Assuming the post type selected is "post", create a new post and publish it. +* After a few seconds, a "Preview" button will appear under the ClassifAI settings panel. +* Click the button to preview the generated speech audio for the post. +* View the post on the front-end and see a read-to-me feature has been added + ## Set Up Image Processing features (via Microsoft Azure) Note that [Azure AI Vision](https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/home#image-requirements) can analyze and crop images that meet the following requirements: diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php index e9b2e8773..379ade717 100644 --- a/includes/Classifai/Providers/OpenAI/TextToSpeech.php +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -78,7 +78,7 @@ public function render_provider_fields(): void { 'tts-1-hd' => __( 'Text-to-speech 1 HD (Optimized for quality)', 'classifai' ), ], 'default_value' => $settings['tts_model'], - 'description' => __( 'Select a model depending on your requirement.', 'classifai' ), + 'description' => __( 'Select a model depending on your requirements.', 'classifai' ), 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, ] ); @@ -101,7 +101,7 @@ public function render_provider_fields(): void { 'shimmer' => __( 'Shimmer (female)', 'classifai' ), ], 'default_value' => $settings['voice'], - 'description' => __( 'Select the speech voice.', 'classifai' ), + 'description' => __( 'Select the desired speech voice.', 'classifai' ), 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, ] ); @@ -124,7 +124,7 @@ public function render_provider_fields(): void { 'pcm' => __( '.pcm', 'classifai' ), ], 'default_value' => $settings['format'], - 'description' => __( 'Select the audio format.', 'classifai' ), + 'description' => __( 'Select the desired audio format.', 'classifai' ), 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, ] ); @@ -143,7 +143,7 @@ public function render_provider_fields(): void { 'max' => 4, 'step' => 0.25, 'default_value' => $settings['speed'], - 'description' => __( 'Select the speed of the generated audio.', 'classifai' ), + 'description' => __( 'Select the desired speed of the generated audio.', 'classifai' ), 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, ] ); diff --git a/readme.txt b/readme.txt index 0a5e62ae3..148f8a050 100644 --- a/readme.txt +++ b/readme.txt @@ -33,7 +33,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro **Requirements** * To utilize the NLU Language Processing functionality, you will need an active [IBM Watson](https://cloud.ibm.com/registration) account. -* To utilize the ChatGPT, Embeddings, or Whisper Language Processing functionality or DALL·E Image Processing functionality, you will need an active [OpenAI](https://platform.openai.com/signup) account. +* To utilize the ChatGPT, Embeddings, Text to Speech or Whisper Language Processing functionality or DALL·E Image Processing functionality, you will need an active [OpenAI](https://platform.openai.com/signup) account. * To utilize the Azure AI Vision Image Processing functionality or Text to Speech Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account. * To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUNTZBNzRKNlVQSFhZMU9aV09EVzYxWFdORCQlQCN0PWcu) for OpenAI access. * To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account. From b6e6658d308f12fdb4c6c4ab8b8d51e2ccfbf370 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Fri, 29 Mar 2024 20:42:13 +0530 Subject: [PATCH 08/13] limit audio format options & add link to doc --- .../Providers/OpenAI/TextToSpeech.php | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php index e9b2e8773..f0624c702 100644 --- a/includes/Classifai/Providers/OpenAI/TextToSpeech.php +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -78,7 +78,18 @@ public function render_provider_fields(): void { 'tts-1-hd' => __( 'Text-to-speech 1 HD (Optimized for quality)', 'classifai' ), ], 'default_value' => $settings['tts_model'], - 'description' => __( 'Select a model depending on your requirement.', 'classifai' ), + 'description' => sprintf( + wp_kses( + __( 'Select a model depending on your requirement.', 'classifai' ), + [ + 'a' => [ + 'href' => [], + 'title' => [], + ], + ], + ), + esc_url( 'https://platform.openai.com/docs/models/tts' ) + ), 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, ] ); @@ -102,6 +113,18 @@ public function render_provider_fields(): void { ], 'default_value' => $settings['voice'], 'description' => __( 'Select the speech voice.', 'classifai' ), + 'description' => sprintf( + wp_kses( + __( 'Select the speech voice.', 'classifai' ), + [ + 'a' => [ + 'href' => [], + 'title' => [], + ], + ], + ), + esc_url( 'https://platform.openai.com/docs/guides/text-to-speech/voice-options' ) + ), 'class' => 'classifai-provider-field hidden provider-scope-' . static::ID, ] ); @@ -117,11 +140,7 @@ public function render_provider_fields(): void { 'label_for' => 'format', 'options' => [ 'mp3' => __( '.mp3', 'classifai' ), - 'opus' => __( '.opus', 'classifai' ), - 'aac' => __( '.aac', 'classifai' ), - 'flac' => __( '.flac', 'classifai' ), 'wav' => __( '.wav', 'classifai' ), - 'pcm' => __( '.pcm', 'classifai' ), ], 'default_value' => $settings['format'], 'description' => __( 'Select the audio format.', 'classifai' ), From f6376844d93d2c3f03aae904828cbee070c74a69 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Fri, 29 Mar 2024 21:17:55 +0530 Subject: [PATCH 09/13] use APIRequest --- .../Classifai/Providers/OpenAI/APIRequest.php | 28 ++++++++++++++----- .../Providers/OpenAI/TextToSpeech.php | 17 +++++------ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/includes/Classifai/Providers/OpenAI/APIRequest.php b/includes/Classifai/Providers/OpenAI/APIRequest.php index 4f6f9db76..015c380a3 100644 --- a/includes/Classifai/Providers/OpenAI/APIRequest.php +++ b/includes/Classifai/Providers/OpenAI/APIRequest.php @@ -270,19 +270,33 @@ public function post_form( string $url = '', array $body = [] ) { */ public function get_result( $response ) { if ( ! is_wp_error( $response ) ) { + $headers = wp_remote_retrieve_headers( $response ); + $content_type = false; + + if ( ! is_wp_error( $headers ) ) { + $content_type = isset( $headers['content-type'] ) ? $headers['content-type'] : false; + } + $body = wp_remote_retrieve_body( $response ); $code = wp_remote_retrieve_response_code( $response ); - $json = json_decode( $body, true ); - if ( json_last_error() === JSON_ERROR_NONE ) { - if ( empty( $json['error'] ) ) { - return $json; + if ( $content_type && false !== strpos( $content_type, 'application/json' ) ) { + $json = json_decode( $body, true ); + + if ( json_last_error() === JSON_ERROR_NONE ) { + if ( empty( $json['error'] ) ) { + return $json; + } else { + $message = $json['error']['message'] ?? esc_html__( 'An error occured', 'classifai' ); + return new WP_Error( $code, $message ); + } } else { - $message = $json['error']['message'] ?? esc_html__( 'An error occured', 'classifai' ); - return new WP_Error( $code, $message ); + return new WP_Error( 'Invalid JSON: ' . json_last_error_msg(), $body ); } + } elseif ( $content_type && false !== strpos( $content_type, 'audio/mpeg' ) ) { + return $response; } else { - return new WP_Error( 'Invalid JSON: ' . json_last_error_msg(), $body ); + return new WP_Error( 'Invalid content type', $response ); } } else { return $response; diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php index 05c3415a4..77a7080f6 100644 --- a/includes/Classifai/Providers/OpenAI/TextToSpeech.php +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -282,6 +282,7 @@ public function synthesize_speech( int $post_id ) { $post_content = $feature->normalize_post_content( $post_id ); $content_hash = get_post_meta( $post_id, FeatureTextToSpeech::AUDIO_HASH_KEY, true ); $saved_attachment_id = (int) get_post_meta( $post_id, $feature::AUDIO_ID_KEY, true ); + $request = new APIRequest( $settings[ static::ID ]['api_key'] ?? '', $feature->get_option_name() ); // Don't regenerate the audio file it it already exists and the content hasn't changed. if ( $saved_attachment_id ) { @@ -314,7 +315,12 @@ public function synthesize_speech( int $post_id ) { ), ); - $response = wp_remote_post( $this->api_url, $request_params ); + $response = $request->post( + $this->api_url, + [ + 'body' => wp_json_encode( $request_body ), + ] + ); if ( is_wp_error( $response ) ) { return new WP_Error( @@ -323,17 +329,8 @@ public function synthesize_speech( int $post_id ) { ); } - $code = wp_remote_retrieve_response_code( $response ); $response_body = wp_remote_retrieve_body( $response ); - // return error if HTTP status code is not 200. - if ( \WP_Http::OK !== $code ) { - return new WP_Error( - 'openai_text_to_speech_unsuccessful_request', - esc_html__( 'HTTP request unsuccessful.', 'classifai' ) - ); - } - update_post_meta( $post_id, FeatureTextToSpeech::AUDIO_HASH_KEY, md5( $post_content ) ); return $response_body; From 416c03defdb295152603269de208304ebcb44d02 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Fri, 29 Mar 2024 21:31:05 +0530 Subject: [PATCH 10/13] add the get_debug_information method --- .../Providers/OpenAI/TextToSpeech.php | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php index 77a7080f6..4b4d19e99 100644 --- a/includes/Classifai/Providers/OpenAI/TextToSpeech.php +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -304,17 +304,6 @@ public function synthesize_speech( int $post_id ) { 'input' => $post_content, ); - // Request parameters. - $request_params = array( - 'method' => 'POST', - 'body' => wp_json_encode( $request_body ), - 'timeout' => 60, // phpcs:ignore WordPressVIPMinimum.Performance.RemoteRequestTimeout.timeout_timeout - 'headers' => array( - 'Authorization' => 'Bearer ' . $settings[ static::ID ]['api_key'], - 'Content-Type' => 'application/json', - ), - ); - $response = $request->post( $this->api_url, [ @@ -335,4 +324,30 @@ public function synthesize_speech( int $post_id ) { return $response_body; } + + /** + * Returns the debug information for the provider settings. + * + * @return array + */ + public function get_debug_information(): array { + $settings = $this->feature_instance->get_settings(); + $provider_settings = $settings[ static::ID ]; + $debug_info = []; + + if ( $this->feature_instance instanceof FeatureTextToSpeech ) { + $debug_info[ __( 'Model', 'classifai' ) ] = $provider_settings['tts_model'] ?? ''; + $debug_info[ __( 'Voice', 'classifai' ) ] = $provider_settings['voice'] ?? ''; + $debug_info[ __( 'Audio format', 'classifai' ) ] = $provider_settings['format'] ?? ''; + + // We don't save the response transient because WP does not support serialized binary data to be inserted to the options. + } + + return apply_filters( + 'classifai_' . self::ID . '_debug_information', + $debug_info, + $settings, + $this->feature_instance + ); + } } From 45a2b26040470dfaf084770fbbd2598d8ba4396f Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Fri, 29 Mar 2024 21:47:06 +0530 Subject: [PATCH 11/13] add check for content length > 4096 --- includes/Classifai/Providers/OpenAI/TextToSpeech.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/includes/Classifai/Providers/OpenAI/TextToSpeech.php b/includes/Classifai/Providers/OpenAI/TextToSpeech.php index 4b4d19e99..873f1d4ef 100644 --- a/includes/Classifai/Providers/OpenAI/TextToSpeech.php +++ b/includes/Classifai/Providers/OpenAI/TextToSpeech.php @@ -284,6 +284,13 @@ public function synthesize_speech( int $post_id ) { $saved_attachment_id = (int) get_post_meta( $post_id, $feature::AUDIO_ID_KEY, true ); $request = new APIRequest( $settings[ static::ID ]['api_key'] ?? '', $feature->get_option_name() ); + if ( mb_strlen( $post_content ) > 4096 ) { + return new WP_Error( + 'openai_text_to_speech_content_too_long', + esc_html__( 'Character length should not exceed beyond 4096 characters.', 'classifai' ) + ); + } + // Don't regenerate the audio file it it already exists and the content hasn't changed. if ( $saved_attachment_id ) { From 78bc9f31bf075f4639fa24e1cea04ec2fff2a4f5 Mon Sep 17 00:00:00 2001 From: Siddharth Thevaril Date: Sat, 30 Mar 2024 00:21:07 +0530 Subject: [PATCH 12/13] fix failing tests --- includes/Classifai/Providers/OpenAI/APIRequest.php | 2 +- .../image-processing/image-generation-openai-dalle.test.js | 1 + tests/test-plugin/e2e-test-plugin.php | 7 +++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/includes/Classifai/Providers/OpenAI/APIRequest.php b/includes/Classifai/Providers/OpenAI/APIRequest.php index 015c380a3..bcc9015a4 100644 --- a/includes/Classifai/Providers/OpenAI/APIRequest.php +++ b/includes/Classifai/Providers/OpenAI/APIRequest.php @@ -280,7 +280,7 @@ public function get_result( $response ) { $body = wp_remote_retrieve_body( $response ); $code = wp_remote_retrieve_response_code( $response ); - if ( $content_type && false !== strpos( $content_type, 'application/json' ) ) { + if ( false === $content_type || false !== strpos( $content_type, 'application/json' ) ) { $json = json_decode( $body, true ); if ( json_last_error() === JSON_ERROR_NONE ) { diff --git a/tests/cypress/integration/image-processing/image-generation-openai-dalle.test.js b/tests/cypress/integration/image-processing/image-generation-openai-dalle.test.js index 3f676e1ee..f55bd7680 100644 --- a/tests/cypress/integration/image-processing/image-generation-openai-dalle.test.js +++ b/tests/cypress/integration/image-processing/image-generation-openai-dalle.test.js @@ -38,6 +38,7 @@ describe( 'Image Generation (OpenAI DALL·E) Tests', () => { ).check(); cy.get( '#submit' ).click(); + cy.get( '.notice' ).contains( 'Settings saved.' ); } ); it( 'Can generate images in the media modal', () => { diff --git a/tests/test-plugin/e2e-test-plugin.php b/tests/test-plugin/e2e-test-plugin.php index ac813b92e..3f1660a17 100644 --- a/tests/test-plugin/e2e-test-plugin.php +++ b/tests/test-plugin/e2e-test-plugin.php @@ -63,10 +63,13 @@ function classifai_test_mock_http_requests( $preempt, $parsed_args, $url ) { || strpos( $url, 'https://api.openai.com/v1/audio/speech' ) !== false ) { return array( - 'response' => array( + 'response' => array( 'code' => 200, ), - 'body' => file_get_contents( __DIR__ . '/text-to-speech.txt' ), + 'headers' => array( + 'content-type' => 'audio/mpeg', + ), + 'body' => file_get_contents( __DIR__ . '/text-to-speech.txt' ), ); } elseif ( strpos( $url, 'https://api.openai.com/v1/embeddings' ) !== false ) { $response = file_get_contents( __DIR__ . '/embeddings.json' ); From 76d83b774c5fa883b1d18d31d6518808fa5c2df3 Mon Sep 17 00:00:00 2001 From: Darin Kotter Date: Mon, 1 Apr 2024 14:19:29 -0600 Subject: [PATCH 13/13] Change a few things in the AWS Polly integration to take advantage of the changes made in the TTS feature, standardizing on how all these providers work --- .../Classifai/Providers/AWS/AmazonPolly.php | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/includes/Classifai/Providers/AWS/AmazonPolly.php b/includes/Classifai/Providers/AWS/AmazonPolly.php index fe6365022..d2206b411 100644 --- a/includes/Classifai/Providers/AWS/AmazonPolly.php +++ b/includes/Classifai/Providers/AWS/AmazonPolly.php @@ -9,7 +9,6 @@ namespace Classifai\Providers\AWS; use Classifai\Providers\Provider; -use Classifai\Normalizer; use Classifai\Features\TextToSpeech; use WP_Error; use Aws\Sdk; @@ -18,14 +17,6 @@ class AmazonPolly extends Provider { const ID = 'aws_polly'; - /** - * Meta key to get/set the audio hash that helps to indicate if there is any need - * for the audio file to be regenerated or not. - * - * @var string - */ - const AUDIO_HASH_KEY = '_classifai_post_audio_hash'; - /** * AmazonPolly Text to Speech constructor. * @@ -374,12 +365,10 @@ public function synthesize_speech( int $post_id ) { ); } - $normalizer = new Normalizer(); $feature = new TextToSpeech(); $settings = $feature->get_settings(); - $post = get_post( $post_id ); - $post_content = $normalizer->normalize_content( $post->post_content, $post->post_title, $post_id ); - $content_hash = get_post_meta( $post_id, self::AUDIO_HASH_KEY, true ); + $post_content = $feature->normalize_post_content( $post_id ); + $content_hash = get_post_meta( $post_id, TextToSpeech::AUDIO_HASH_KEY, true ); $saved_attachment_id = (int) get_post_meta( $post_id, $feature::AUDIO_ID_KEY, true ); // Don't regenerate the audio file it it already exists and the content hasn't changed. @@ -453,7 +442,7 @@ public function synthesize_speech( int $post_id ) { $polly_client = $this->get_polly_client(); $result = $polly_client->synthesizeSpeech( $synthesize_data ); - update_post_meta( $post_id, self::AUDIO_HASH_KEY, md5( $post_content ) ); + update_post_meta( $post_id, TextToSpeech::AUDIO_HASH_KEY, md5( $post_content ) ); $contents = $result['AudioStream']->getContents(); return $contents; } catch ( \Exception $e ) {