diff --git a/.wp-env.json b/.wp-env.json
index d3be216b9..1fb420e93 100644
--- a/.wp-env.json
+++ b/.wp-env.json
@@ -1,5 +1,5 @@
{
- "plugins": [".", "./tests/test-plugin", "https://downloads.wordpress.org/plugin/classic-editor.zip"],
+ "plugins": [".", "./tests/test-plugin", "https://downloads.wordpress.org/plugin/classic-editor.zip", "https://downloads.wordpress.org/plugin/elasticpress.zip"],
"env": {
"tests": {
"mappings": {
diff --git a/README.md b/README.md
index aaab050ce..be7c6cf55 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro
* Moderate incoming comments for sensitive content using [OpenAI's Moderation API](https://platform.openai.com/docs/guides/moderation)
* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech)
* Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
+* Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress)
* BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) *(note that this service has been [deprecated by Microsoft](https://learn.microsoft.com/en-us/azure/ai-services/personalizer/) and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality (see [issue#392](https://github.com/10up/classifai/issues/392))*
* Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/)
* Scan images and PDF files for embedded text and save for use in post meta using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/)
@@ -55,6 +56,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro
* To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://aka.ms/oai/access) for OpenAI access.
* To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account.
* To utilize the AWS Language Processing functionality, you will need an active [AWS](https://console.aws.amazon.com/) account.
+* To utilize the Smart 404 feature, you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+.
## Pricing
@@ -111,10 +113,10 @@ Add this repository to composer.json, specifying a release version, as shown bel
"type": "package",
"package": {
"name": "10up/classifai",
- "version": "2.0.0",
+ "version": "3.1.1",
"type": "wordpress-plugin",
"dist": {
- "url": "https://github.com/10up/classifai/archive/refs/tags/2.0.0.zip",
+ "url": "https://github.com/10up/classifai/archive/refs/tags/3.1.1.zip",
"type": "zip"
}
}
@@ -126,7 +128,7 @@ Finally, require the plugin, using the version number you specified in the previ
```json
"require": {
- "10up/classifai": "3.0.0"
+ "10up/classifai": "3.1.1"
}
```
@@ -440,6 +442,125 @@ Note that [OpenAI](https://platform.openai.com/docs/guides/speech-to-text) can c
* Click the button to preview the generated speech audio for the post.
* View the post on the front-end and see a read-to-me feature has been added
+## Set Up the Smart 404 Feature
+
+### 1. Decide on Provider
+
+* This Feature is powered by either OpenAI or Azure OpenAI.
+* Once you've chosen a Provider, you'll need to create an account and get authentication details.
+ * When setting things up on the Azure side, ensure you choose either the `text-embedding-3-small` or `text-embedding-3-large` model. The Feature will not work with other models.
+
+### 2. Configure Settings under Tools > ClassifAI > Language Processing > Smart 404
+
+* Select the proper Provider in the provider dropdown.
+* Enter your authentication details.
+* Configure any other settings as desired.
+
+### 3. ElasticPress configuration
+
+Once the Smart 404 Feature is configured, you can then proceed to get ElasticPress set up to index the data.
+
+If on a standard WordPress installation:
+
+* Install and activate the [ElasticPress](https://github.com/10up/elasticpress) plugin.
+* Set your Elasticsearch URL in the ElasticPress settings (`ElasticPress > Settings`).
+* Go to the `ElasticPress > Sync` settings page and trigger a sync, ensuring this is set to run a sync from scratch. This will send over the new schema to Elasticsearch and index all content, including creating vector embeddings for each post.
+
+If on a WordPress VIP hosted environment:
+
+* [Enable Enterprise Search](https://docs.wpvip.com/enterprise-search/enable/)
+* [Run the VIP-CLI `index` command](https://docs.wpvip.com/enterprise-search/index/). This sends the new schema to Elasticsearch and indexes all content, including creating vector embeddings for each post. Note you may need to use the `--setup` flag to ensure the schema is created correctly.
+
+At this point all of your content should be indexed, along with the embeddings data. You'll then need to update your 404 template to display the recommended results.
+
+### 4. Display the recommended results
+
+The Smart 404 Feature comes with a few helper functions that can be used to display the recommended results on your 404 page:
+
+* Directly display the results using the `Classifai\render_smart_404_results()` function.
+* Get the data and then display it in your own way using the `Classifai\get_smart_404_results()` function.
+
+You will need to directly integrate these functions into your 404 template where desired. The plugin does not automatically display the results on the 404 page for you.
+
+Both functions support the following arguments. If any argument is not provided, the default value set on the settings page will be used:
+
+* `$index` (string) - The ElasticPress index to search in. Default is `post`.
+* `$num` (int) - Maximum number of results to display. Default is `5`.
+* `$num_candidates` (int) - Maximum number of results to search over. Default is `5000`.
+* `$rescore` (bool) - Whether to run a rescore query or not. Can give better results but often is slower. Default is `false`.
+* `$score_function` (string) - The [vector scoring function](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-script-score-query.html#vector-functions) to use. Default is `cosine`. Options are `cosine`, `dot_product`, `l1_norm` and `l2_norm`.
+
+The `Classifai\render_smart_404_results()` function also supports the following additional arguments:
+
+* `$fallback` (bool) - Whether to run a fallback WordPress query if no results are found in Elasticsearch. These results will then be rendered. Default is `true`.
+
+Examples:
+
+```php
+// Render the results.
+Classifai\render_smart_404_results(
+ [
+ 'index' => 'post',
+ 'num' => 3,
+ 'num_candidates' => 1000,
+ 'rescore' => true,
+ 'fallback' => true,
+ 'score_function' => 'dot_product',
+ ]
+);
+```
+
+```php
+// Get the results.
+$results = Classifai\get_smart_404_results(
+ [
+ 'index' => 'post',
+ 'num' => 10,
+ 'num_candidates' => 8000,
+ 'rescore' => false,
+ 'score_function' => 'cosine',
+ ]
+);
+
+ob_start();
+
+// Render the results.
+foreach ( $results as $result ) {
+?>
+
+
diff --git a/includes/Classifai/Features/Smart404.php b/includes/Classifai/Features/Smart404.php
new file mode 100644
index 000000000..08fe1797d
--- /dev/null
+++ b/includes/Classifai/Features/Smart404.php
@@ -0,0 +1,362 @@
+label = __( 'Smart 404', 'classifai' );
+
+ // Contains all providers that are registered to the service.
+ $this->provider_instances = $this->get_provider_instances( LanguageProcessing::get_service_providers() );
+
+ // Contains just the providers this feature supports.
+ $this->supported_providers = [
+ OpenAIEmbeddings::ID => __( 'OpenAI Embeddings', 'classifai' ),
+ AzureEmbeddings::ID => __( 'Azure OpenAI Embeddings', 'classifai' ),
+ ];
+ }
+
+ /**
+ * Setup any needed integrations.
+ *
+ * This will always fire even if the Feature is not enabled
+ * so we add our own check.
+ */
+ public function setup() {
+ // Ensure ElasticPress is installed before we proceed.
+ if ( ! is_elasticpress_installed() ) {
+ $warning_notice_func = function ( $current_feature ) {
+ if ( self::ID !== $current_feature ) {
+ return;
+ }
+
+ echo '';
+ ?>
+
+
+
+ is_configured() && $this->is_enabled() ) {
+ $integration = new Smart404EPIntegration( $this->get_feature_provider_instance() );
+ $integration->init();
+ }
+ }
+
+ /**
+ * Get the description for the enable field.
+ *
+ * @return string
+ */
+ public function get_enable_description(): string {
+ return esc_html__( 'Enable Smart 404 functionality.', 'classifai' );
+ }
+
+ /**
+ * Add any needed custom fields.
+ */
+ public function add_custom_settings_fields() {
+ $settings = $this->get_settings();
+
+ add_settings_field(
+ 'num',
+ esc_html__( 'Number of posts to show', 'classifai' ),
+ [ $this, 'render_input' ],
+ $this->get_option_name(),
+ $this->get_option_name() . '_section',
+ [
+ 'label_for' => 'num',
+ 'input_type' => 'number',
+ 'min' => 1,
+ 'step' => 1,
+ 'default_value' => $settings['num'],
+ 'description' => __( 'Determines the maximum number of posts that will show on a 404 page. This can be overridden in the display functions.', 'classifai' ),
+ ]
+ );
+
+ add_settings_field(
+ 'num_search',
+ esc_html__( 'Number of posts to search', 'classifai' ),
+ [ $this, 'render_input' ],
+ $this->get_option_name(),
+ $this->get_option_name() . '_section',
+ [
+ 'label_for' => 'num_search',
+ 'input_type' => 'number',
+ 'min' => 1,
+ 'step' => 1,
+ 'default_value' => $settings['num_search'],
+ 'description' => __( 'Determines the maximum number of posts Elasticsearch will use for the vector search. A higher number can give more accurate results but will be slower. This can be overridden in the display functions.', 'classifai' ),
+ ]
+ );
+
+ add_settings_field(
+ 'threshold',
+ esc_html__( 'Threshold', 'classifai' ),
+ [ $this, 'render_input' ],
+ $this->get_option_name(),
+ $this->get_option_name() . '_section',
+ [
+ 'label_for' => 'threshold',
+ 'input_type' => 'number',
+ 'min' => 0,
+ 'step' => 0.01,
+ 'default_value' => $settings['threshold'],
+ 'description' => __( 'Set the minimum threshold we want for our results. Any result that falls below this number will be automatically removed.', 'classifai' ),
+ ]
+ );
+
+ add_settings_field(
+ 'rescore',
+ esc_html__( 'Use rescore query', 'classifai' ),
+ [ $this, 'render_input' ],
+ $this->get_option_name(),
+ $this->get_option_name() . '_section',
+ [
+ 'label_for' => 'rescore',
+ 'input_type' => 'checkbox',
+ 'default_value' => $settings['rescore'],
+ 'description' => __( 'Will run a normal Elasticsearch query and then rescore those results using a vector query. Can give better results but often results in worse performance. This can be overridden in the display functions', 'classifai' ),
+ ]
+ );
+
+ add_settings_field(
+ 'fallback',
+ esc_html__( 'Use fallback results', 'classifai' ),
+ [ $this, 'render_input' ],
+ $this->get_option_name(),
+ $this->get_option_name() . '_section',
+ [
+ 'label_for' => 'fallback',
+ 'input_type' => 'checkbox',
+ 'default_value' => $settings['fallback'],
+ 'description' => __( 'If no results are found in Elasticsearch, will fallback to displaying most recent results from WordPress. This can be overridden in the display functions', 'classifai' ),
+ ]
+ );
+
+ add_settings_field(
+ 'score_function',
+ esc_html__( 'Score function', 'classifai' ),
+ [ $this, 'render_select' ],
+ $this->get_option_name(),
+ $this->get_option_name() . '_section',
+ [
+ 'label_for' => 'score_function',
+ 'options' => [
+ 'cosine' => __( 'Cosine', 'classifai' ),
+ 'dot_product' => __( 'Dot Product', 'classifai' ),
+ 'l1_norm' => __( 'L1 Norm', 'classifai' ),
+ 'l2_norm' => __( 'L2 Norm', 'classifai' ),
+ ],
+ 'default_value' => $settings['score_function'],
+ 'description' => __( 'Choose which vector scoring function you want to use. You may need to adjust the threshold if you change this. This can be overridden in the display functions', 'classifai' ),
+ ]
+ );
+ }
+
+ /**
+ * Returns the default settings for the Feature.
+ *
+ * @return array
+ */
+ public function get_feature_default_settings(): array {
+ return [
+ 'provider' => OpenAIEmbeddings::ID,
+ 'num' => 3,
+ 'num_search' => 5000,
+ 'threshold' => 2.35,
+ 'rescore' => 0,
+ 'fallback' => 1,
+ 'score_function' => 'cosine',
+ ];
+ }
+
+ /**
+ * Sanitizes the default feature settings.
+ *
+ * @param array $new_settings Settings being saved.
+ * @return array
+ */
+ public function sanitize_default_feature_settings( array $new_settings ): array {
+ $settings = $this->get_settings();
+
+ $new_settings['num'] = absint( $new_settings['num'] ?? $settings['num'] );
+ $new_settings['num_search'] = absint( $new_settings['num_search'] ?? $settings['num_search'] );
+ $new_settings['threshold'] = floatval( $new_settings['threshold'] ?? $settings['threshold'] );
+
+ if ( empty( $new_settings['rescore'] ) || 1 !== (int) $new_settings['rescore'] ) {
+ $new_settings['rescore'] = 'no';
+ } else {
+ $new_settings['rescore'] = '1';
+ }
+
+ if ( empty( $new_settings['fallback'] ) || 1 !== (int) $new_settings['fallback'] ) {
+ $new_settings['fallback'] = 'no';
+ } else {
+ $new_settings['fallback'] = '1';
+ }
+
+ if ( isset( $new_settings['score_function'] ) && in_array( $new_settings['score_function'], [ 'cosine', 'dot_product', 'l1_norm', 'l2_norm' ], true ) ) {
+ $new_settings['score_function'] = sanitize_text_field( $new_settings['score_function'] );
+ } else {
+ $new_settings['score_function'] = 'cosine';
+ }
+
+ return $new_settings;
+ }
+
+ /**
+ * Run an exact k-NN search.
+ *
+ * @param string $query Query to search for.
+ * @param array $args Arguments to pass to the search.
+ * @return array|WP_Error
+ */
+ public function exact_knn_search( string $query, array $args = [] ) {
+ // Ensure the Feature is enabled and configured before trying to use it.
+ if ( ! is_elasticpress_installed() || ! $this->is_configured() || ! $this->is_enabled() ) {
+ return new WP_Error( 'not_enabled', __( 'Feature is not enabled.', 'classifai' ) );
+ }
+
+ // Ensure we have a query.
+ if ( empty( $query ) ) {
+ return new WP_Error( 'no_query', __( 'No query provided.', 'classifai' ) );
+ }
+
+ $settings = $this->get_settings();
+
+ // Parse the arguments, setting our defaults.
+ $args = wp_parse_args(
+ $args,
+ [
+ 'index' => 'post',
+ 'post_type' => [ 'post' ],
+ 'num' => $settings['num'] ?? 5,
+ 'num_candidates' => $settings['num_search'] ?? 5000,
+ 'rescore' => $settings['rescore'] ?? '1',
+ 'fallback' => $settings['fallback'] ?? '1',
+ 'score_function' => $settings['score_function'] ?? 'cosine',
+ ]
+ );
+
+ /**
+ * Filter the arguments before running the search.
+ *
+ * @hook classifai_smart_404_exact_knn_search_args
+ *
+ * @param array $args Arguments to pass to the search.
+ * @param string $query Query to search for.
+ */
+ $args = apply_filters( 'classifai_smart_404_exact_knn_search_args', $args, $query );
+
+ // Ensure our post types are set as an array.
+ if ( ! is_array( $args['post_type'] ) ) {
+ $args['post_type'] = [ $args['post_type'] ];
+ }
+
+ $integration = new Smart404EPIntegration( $this->get_feature_provider_instance() );
+
+ // Run our search. Note that this will take our query and generate embeddings for it.
+ if ( 'no' === $args['rescore'] || false === $args['rescore'] ) {
+ $results = $integration->exact_knn_search( $query, $args );
+ } else {
+ $results = $integration->search_rescored_by_exact_knn( $query, $args );
+ }
+
+ // Ensure we have a good response.
+ if ( is_wp_error( $results ) ) {
+ // If we have fallback enabled, return those results.
+ if ( 'no' !== $args['fallback'] && false !== $args['fallback'] ) {
+ return $this->fallback_results( $args );
+ }
+
+ // translators: %s is the error message.
+ return new WP_Error( 'error', sprintf( __( 'Error making request: %s.', 'classifai' ), $results->get_error_message() ) );
+ }
+
+ // Filter out any results that are below a certain score.
+ $results = array_filter(
+ $results,
+ function ( $result ) use ( $settings ) {
+ return (float) $result['score'] >= $settings['threshold'] ?? 2.35;
+ }
+ );
+
+ // If we have no results after filtering and fallback is enabled, return those results.
+ if ( empty( $results ) && ( 'no' !== $args['fallback'] && false !== $args['fallback'] ) ) {
+ return $this->fallback_results( $args );
+ }
+
+ return $results;
+ }
+
+ /**
+ * Run a fallback WordPress query for most recent results.
+ *
+ * @param array $args Arguments to pass to the search.
+ * @return array|WP_Error
+ */
+ public function fallback_results( array $args = [] ) {
+ // Ensure the Feature is enabled and configured before trying to use it.
+ if ( ! $this->is_configured() || ! $this->is_enabled() ) {
+ return new WP_Error( 'not_enabled', __( 'Feature is not enabled.', 'classifai' ) );
+ }
+
+ $settings = $this->get_settings();
+
+ // Parse the arguments, setting our defaults.
+ $args = wp_parse_args(
+ $args,
+ [
+ 'num' => $settings['num'] ?? 5,
+ ]
+ );
+
+ // Run our query.
+ $results = new WP_Query(
+ [
+ 'post_type' => 'post',
+ 'posts_per_page' => $args['num'],
+ 'post_status' => 'publish',
+ 'orderby' => 'date',
+ 'order' => 'DESC',
+ ]
+ );
+
+ // Ensure we have some results.
+ if ( ! $results->have_posts() ) {
+ return new WP_Error( 'no_results', __( 'No results found.', 'classifai' ) );
+ }
+
+ return $results->posts;
+ }
+}
diff --git a/includes/Classifai/Features/Smart404EPIntegration.php b/includes/Classifai/Features/Smart404EPIntegration.php
new file mode 100644
index 000000000..d1748fbcf
--- /dev/null
+++ b/includes/Classifai/Features/Smart404EPIntegration.php
@@ -0,0 +1,681 @@
+embeddings_handler = $provider;
+ $this->es_version = ! $provider ? '7.0' : Elasticsearch::factory()->get_elasticsearch_version();
+ $this->tokenizer = ! $this->embeddings_handler ? new Tokenizer( 8191 ) : new Tokenizer( (int) $this->embeddings_handler->get_max_tokens() );
+
+ if ( $provider ) {
+ if ( 'openai_embeddings' === $provider::ID ) {
+ $this->embeddings_meta_key = 'classifai_openai_embeddings';
+ } elseif ( 'azure_openai_embeddings' === $provider::ID ) {
+ $this->embeddings_meta_key = 'classifai_azure_openai_embeddings';
+ }
+ }
+ }
+
+ /**
+ * Inintialize the class and register the needed hooks.
+ */
+ public function init() {
+ // Vector support was added in Elasticsearch 7.0.
+ if ( ! $this->es_version || version_compare( $this->es_version, '7.0', '<=' ) ) {
+ return;
+ }
+
+ add_filter( 'ep_post_mapping', [ $this, 'add_post_vector_field_mapping' ] );
+ add_filter( 'ep_prepare_meta_excluded_public_keys', [ $this, 'exclude_vector_meta' ] );
+ add_filter( 'ep_post_sync_args_post_prepare_meta', [ $this, 'add_vector_field_to_post_sync' ], 10, 2 );
+ add_filter( 'ep_retrieve_the_post', [ $this, 'add_score_field_to_document' ], 10, 2 );
+ }
+
+ /**
+ * Add our vector field mapping to the Elasticsearch post index.
+ *
+ * @param array $mapping Current mapping.
+ * @param bool $quantization Whether to use quantization for the vector field. Default false.
+ * @return array
+ */
+ public function add_post_vector_field_mapping( array $mapping, bool $quantization = true ): array {
+ // Don't add the field if it already exists.
+ if ( isset( $mapping['mappings']['properties']['chunks'] ) ) {
+ return $mapping;
+ }
+
+ // Add the default vector field mapping.
+ $mapping['mappings']['properties']['chunks'] = [
+ 'type' => 'nested',
+ 'properties' => [
+ 'vector' => [
+ 'type' => 'dense_vector',
+ 'dims' => (int) $this->embeddings_handler->get_dimensions(),
+ ],
+ ],
+ ];
+
+ // Add extra vector fields for newer versions of Elasticsearch.
+ if ( version_compare( $this->es_version, '8.0', '>=' ) ) {
+ // The index (true or false, default true) and similarity (l2_norm, dot_product or cosine) fields
+ // were added in 8.0. The similarity field must be set if index is true.
+ $mapping['mappings']['properties']['chunks']['properties']['vector'] = array_merge(
+ $mapping['mappings']['properties']['chunks']['properties']['vector'],
+ [
+ 'index' => true,
+ 'similarity' => 'cosine',
+ ]
+ );
+
+ // The element_type field was added in 8.6. This can be either float (default) or byte.
+ if ( version_compare( $this->es_version, '8.6', '>=' ) ) {
+ $mapping['mappings']['properties']['chunks']['properties']['vector']['element_type'] = 'float';
+ }
+
+ // The int8_hnsw type was added in 8.12.
+ if ( $quantization && version_compare( $this->es_version, '8.12', '>=' ) ) {
+ // This is supposed to result in better performance but slightly less accurate results.
+ // See https://www.elastic.co/guide/en/elasticsearch/reference/8.13/knn-search.html#knn-search-quantized-example.
+ // Can test with this on and off and compare results to see what works best.
+ $mapping['mappings']['properties']['chunks']['properties']['vector']['index_options']['type'] = 'int8_hnsw';
+ }
+ }
+
+ return $mapping;
+ }
+
+ /**
+ * Exclude our vector meta from being synced.
+ *
+ * @param array $excluded_keys Current excluded keys.
+ * @return array
+ */
+ public function exclude_vector_meta( array $excluded_keys ): array {
+ $excluded_keys[] = $this->embeddings_meta_key;
+ $excluded_keys[] = $this->content_hash_meta_key;
+
+ return $excluded_keys;
+ }
+
+ /**
+ * Add the embedding data to the post vector sync args.
+ *
+ * @param array $args Current sync args.
+ * @param int $post_id Post ID being synced.
+ * @return array
+ */
+ public function add_vector_field_to_post_sync( array $args, int $post_id ): array {
+ // No need to add vector data if no content exists.
+ $post = get_post( $post_id );
+ if ( empty( $post->post_content ) ) {
+ return $args;
+ }
+
+ // Try to use the stored embeddings first if content hasn't changed.
+ $embeddings = get_post_meta( $post_id, $this->embeddings_meta_key, true );
+ $content_hash = get_post_meta( $post_id, $this->content_hash_meta_key, true );
+
+ // This will include the post title and post content combined.
+ $content = $this->embeddings_handler->get_normalized_content( $post_id, 'post' );
+
+ // Add the post slug to our content as well.
+ $content = $post->post_name . ".\n\n" . $content;
+
+ // If they don't exist or content has changed, make API requests to generate them.
+ if ( ! $embeddings || md5( $content ) !== $content_hash ) {
+ $embeddings = [];
+
+ // Chunk the content into smaller pieces.
+ $content_chunks = $this->embeddings_handler->chunk_content( $content );
+
+ // Get the embeddings for each chunk.
+ if ( ! empty( $content_chunks ) ) {
+ $total_tokens = $this->tokenizer->tokens_in_content( $content );
+
+ // If we have a lot of tokens, we need to get embeddings for each chunk individually.
+ if ( (int) $this->embeddings_handler->get_max_tokens() < $total_tokens || ! method_exists( $this->embeddings_handler, 'generate_embeddings' ) ) {
+ foreach ( $content_chunks as $chunk ) {
+ $embedding = $this->get_embedding( $chunk );
+
+ if ( $embedding && ! is_wp_error( $embedding ) ) {
+ $embeddings[] = $embedding;
+ }
+
+ // Show an error message if something went wrong.
+ if ( is_wp_error( $embedding ) ) {
+ if ( is_indexing_wpcli() ) {
+ WP_CLI::warning(
+ sprintf(
+ /* translators: %d is the post ID; %s is the error message */
+ esc_html__( 'Error generating embedding for ID #%1$d: %2$s', 'classifai' ),
+ $post_id,
+ $embedding->get_error_message()
+ )
+ );
+ }
+ }
+ }
+ } else {
+ // Otherwise let's get all embeddings in a single request.
+ $embeddings = $this->get_embeddings( $content_chunks );
+
+ // Show an error message if something went wrong.
+ if ( is_wp_error( $embeddings ) ) {
+ if ( is_indexing_wpcli() ) {
+ WP_CLI::warning(
+ sprintf(
+ /* translators: %d is the post ID; %s is the error message */
+ esc_html__( 'Error generating embedding for ID #%1$d: %2$s', 'classifai' ),
+ $post_id,
+ $embeddings->get_error_message()
+ )
+ );
+ }
+
+ $embeddings = [];
+ }
+ }
+ }
+
+ // Store the embeddings for future use.
+ if ( ! empty( $embeddings ) ) {
+ update_post_meta( $post_id, $this->embeddings_meta_key, $embeddings );
+ update_post_meta( $post_id, $this->content_hash_meta_key, md5( $content ) );
+ }
+ }
+
+ // If we still don't have embeddings, return early.
+ if ( ! $embeddings || empty( $embeddings ) ) {
+ if ( is_indexing_wpcli() ) {
+ WP_CLI::warning(
+ sprintf(
+ /* translators: %d is the post ID */
+ esc_html__( 'No embeddings generated for ID #%d', 'classifai' ),
+ $post_id
+ )
+ );
+ }
+
+ return $args;
+ }
+
+ // Add the embeddings data to the sync args.
+ $args['chunks'] = [];
+
+ foreach ( $embeddings as $embedding ) {
+ $args['chunks'][] = [
+ 'vector' => array_map( 'floatval', $embedding ),
+ ];
+ }
+
+ return $args;
+ }
+
+ /**
+ * Add the score field to the document.
+ *
+ * @param array $document Document retrieved from Elasticsearch.
+ * @param array $hit Raw Elasticsearch hit.
+ * @return array
+ */
+ public function add_score_field_to_document( array $document, array $hit ): array {
+ // Only modify if our field is present.
+ if ( ! isset( $document['chunks'] ) ) {
+ return $document;
+ }
+
+ // Add the score to the document if it exists.
+ if ( isset( $hit['_score'] ) ) {
+ $document['score'] = $hit['_score'];
+ }
+
+ return $document;
+ }
+
+ /**
+ * Get an embedding from a given text.
+ *
+ * @param string $text Text to get the embedding for.
+ * @param bool $cache Whether to cache the result. Default false.
+ * @return array|WP_Error
+ */
+ public function get_embedding( string $text, bool $cache = false ) {
+ // Check to see if we have a stored embedding.
+ if ( $cache ) {
+ $key = 'classifai_ep_embedding_' . sanitize_title( $text );
+ $query_embedding = wp_cache_get( $key );
+
+ if ( $query_embedding ) {
+ return $query_embedding;
+ }
+ }
+
+ // Generate the embedding.
+ $embedding = $this->embeddings_handler->generate_embedding( $text, new Smart404() );
+
+ if ( is_wp_error( $embedding ) ) {
+ return $embedding;
+ }
+
+ // Store the embedding for future use if desired.
+ if ( $cache ) {
+ wp_cache_set( $key, $embedding );
+ }
+
+ return $embedding;
+ }
+
+ /**
+ * Get multiple embeddings at once.
+ *
+ * @param array $strings Array of text to get embeddings for.
+ * @return array|WP_Error
+ */
+ public function get_embeddings( array $strings ) {
+ // Generate the embeddings.
+ $embeddings = $this->embeddings_handler->generate_embeddings( $strings, new Smart404() );
+
+ return $embeddings;
+ }
+
+ /**
+ * Run an exact k-nearest neighbor (kNN) search.
+ *
+ * @param string $query Query to search for.
+ * @param array $args {
+ * Optional. Arguments to pass to the search.
+ *
+ * @type string $index Indexable to run the query against. Default post.
+ * @type array $post_type Post types to return results of. Defaults to just post.
+ * @type int $num Number of items to return.
+ * @type string $score_function Function to use for scoring. Default cosine.
+ * }
+ * @return array|WP_Error
+ */
+ public function exact_knn_search( string $query, array $args = [] ) {
+ $query_embedding = $this->get_embedding( $query, true );
+
+ if ( is_wp_error( $query_embedding ) ) {
+ return $query_embedding;
+ }
+
+ // Parse the arguments, setting our defaults.
+ $args = wp_parse_args(
+ $args,
+ [
+ 'index' => 'post',
+ 'post_type' => [ 'post' ],
+ 'num' => 5,
+ 'score_function' => 'cosine',
+ ]
+ );
+
+ // Get the ElasticPress indexable.
+ $indexable = Indexables::factory()->get( $args['index'] );
+
+ if ( ! $indexable ) {
+ return new WP_Error( 'invalid_index', esc_html__( 'Invalid indexable provided.', 'classifai' ) );
+ }
+
+ // Build our exact kNN query.
+ $knn_query = [
+ 'from' => 0,
+ 'size' => (int) $args['num'],
+ 'query' => [
+ 'bool' => [
+ 'must' => [
+ [
+ 'terms' => [
+ 'post_type.raw' => $args['post_type'],
+ ],
+ ],
+ [
+ 'terms' => [
+ 'post_status' => [
+ 'publish',
+ ],
+ ],
+ ],
+ [
+ 'nested' => [
+ 'path' => 'chunks',
+ 'query' => [
+ 'script_score' => [
+ 'query' => [
+ 'match_all' => (object) [],
+ ],
+ 'script' => [
+ 'source' => $this->get_script_source( $args['score_function'] ),
+ 'params' => [
+ 'query_vector' => array_map( 'floatval', $query_embedding ),
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ];
+
+ // Run the query using the ElasticPress indexable.
+ $res = $indexable->query_es( $knn_query, [] );
+
+ if ( false === $res || ! isset( $res['documents'] ) ) {
+ return new WP_Error( 'es_error', esc_html__( 'Unable to query Elasticsearch', 'classifai' ) );
+ }
+
+ return $res['documents'];
+ }
+
+ /**
+ * Runs a normal ES search query then rescores results with an exact kNN search.
+ *
+ * @param string $query Query to search for.
+ * @param array $args {
+ * Optional. Arguments to pass to the search.
+ *
+ * @type string $index Indexable to run the query against. Default post.
+ * @type array $post_type Post types to return results of. Defaults to just post.
+ * @type int $num Number of items to return.
+ * @type int $num_candidates Number of candidates to search. Larger numbers give better results but are slower.
+ * @type string $score_function Function to use for scoring. Default cosine.
+ * }
+ * @return array|WP_Error
+ */
+ public function search_rescored_by_exact_knn( string $query, array $args = [] ) {
+ $query_embedding = $this->get_embedding( $query, true );
+
+ if ( is_wp_error( $query_embedding ) ) {
+ return $query_embedding;
+ }
+
+ // Parse the arguments, setting our defaults.
+ $args = wp_parse_args(
+ $args,
+ [
+ 'index' => 'post',
+ 'post_type' => [ 'post' ],
+ 'num' => 5,
+ 'num_candidates' => 50,
+ 'score_function' => 'cosine',
+ ]
+ );
+
+ // Get the ElasticPress indexable.
+ $indexable = Indexables::factory()->get( $args['index'] );
+
+ if ( ! $indexable ) {
+ return new WP_Error( 'invalid_index', esc_html__( 'Invalid indexable provided.', 'classifai' ) );
+ }
+
+ // Build our default search query.
+ $default_es_query = [
+ 'from' => 0,
+ 'size' => (int) $args['num_candidates'],
+ ];
+
+ // Expand our default search query depending on the indexable type.
+ switch ( $args['index'] ) {
+ case 'post':
+ $default_query = $this->default_search_post_query( $query, $args['post_type'], (int) $args['num_candidates'], $indexable );
+
+ if ( isset( $default_query['query'] ) ) {
+ $default_es_query['query'] = $default_query['query'];
+
+ // Add the post_name field to the multi_match fields.
+ for ( $key = 0; $key < 3; $key++ ) {
+ if ( isset( $default_es_query['query']['function_score']['query']['bool']['should'][ $key ]['multi_match']['fields'] ) ) {
+ $default_es_query['query']['function_score']['query']['bool']['should'][ $key ]['multi_match']['fields'] = array_merge( $default_es_query['query']['function_score']['query']['bool']['should'][ $key ]['multi_match']['fields'], [ 'post_name' ] );
+ }
+ }
+
+ if ( isset( $default_query['post_filter'] ) ) {
+ $default_es_query['post_filter'] = $default_query['post_filter'];
+ }
+ }
+
+ break;
+ }
+
+ // Run the query using the ElasticPress indexable.
+ $default_res = $indexable->query_es( $default_es_query, [] );
+
+ if ( false === $default_res || ! isset( $default_res['documents'] ) ) {
+ return new WP_Error( 'es_error', esc_html__( 'Unable to query Elasticsearch', 'classifai' ) );
+ }
+
+ // Get the post IDs from the default search.
+ $post_ids = array_column( $default_res['documents'], 'post_id' );
+
+ if ( empty( $post_ids ) ) {
+ return new WP_Error( 'es_error', esc_html__( 'No post IDs found', 'classifai' ) );
+ }
+
+ // Build our exact kNN query.
+ $knn_query = [
+ 'from' => 0,
+ 'size' => (int) $args['num'],
+ 'query' => [
+ 'bool' => [
+ 'must' => [
+ [
+ 'bool' => [
+ 'must' => [
+ 'terms' => [
+ 'post_id' => $post_ids,
+ ],
+ ],
+ ],
+ ],
+ [
+ 'nested' => [
+ 'path' => 'chunks',
+ 'query' => [
+ 'script_score' => [
+ 'query' => [
+ 'match_all' => (object) [],
+ ],
+ 'script' => [
+ 'source' => $this->get_script_source( $args['score_function'] ),
+ 'params' => [
+ 'query_vector' => array_map( 'floatval', $query_embedding ),
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ],
+ ];
+
+ // Run the query using the ElasticPress indexable.
+ $res = $indexable->query_es( $knn_query, [] );
+
+ if ( false === $res || ! isset( $res['documents'] ) ) {
+ return new WP_Error( 'es_error', esc_html__( 'Unable to query Elasticsearch', 'classifai' ) );
+ }
+
+ return $res['documents'];
+ }
+
+ /**
+ * Build a default search post query.
+ *
+ * @param string $query Query to search for.
+ * @param array $post_type Post types to return results of.
+ * @param int $num Number of items to return.
+ * @param Indexable $indexable Indexable to run the query against.
+ * @return array
+ */
+ private function default_search_post_query( string $query, array $post_type, int $num, Indexable $indexable ): array {
+ $search_args = [
+ 's' => $query,
+ 'post_type' => ! empty( $post_type ) ? $post_type : 'any',
+ 'posts_per_page' => (int) $num,
+ ];
+
+ $search_query = new \WP_Query();
+
+ $search_query->init();
+ $search_query->query = wp_parse_args( $search_args );
+ $search_query->query_vars = $search_query->query;
+
+ $default_query = $indexable->format_args( $search_query->query_vars, $search_query );
+
+ return $default_query;
+ }
+
+ /**
+ * Set the script source based on the desired score function.
+ *
+ * @param string $type Type of score function to use. Default "cosineSimilarity".
+ * @return string
+ */
+ private function get_script_source( string $type = 'cosine' ): string {
+ $source = '';
+
+ switch ( $type ) {
+ case 'cosine':
+ case 'cosine_similarity':
+ $source = 'cosineSimilarity(params.query_vector, "chunks.vector") + 1.0';
+ break;
+
+ case 'dot':
+ case 'dot_product':
+ $source = 'double value = dotProduct(params.query_vector, "chunks.vector"); return sigmoid(1, Math.E, -value);';
+ break;
+
+ case 'l1_norm':
+ case 'l1norm':
+ $source = '1 / (1 + l1norm(params.query_vector, "chunks.vector"))';
+ break;
+
+ case 'l2_norm':
+ case 'l2norm':
+ $source = '1 / (1 + l2norm(params.query_vector, "chunks.vector"))';
+ break;
+ }
+
+ return $source;
+ }
+
+ /**
+ * Convert Elasticsearch results to WP_Post objects.
+ *
+ * @param array $results Document results from Elasticsearch.
+ * @return array
+ */
+ public function convert_es_results_to_post_objects( array $results ): array {
+ $new_posts = [];
+
+ // Turn each ES result into a WP_Post object.
+ // Copied from ElasticPress\Indexable\Post\QueryIntegration::format_hits_as_posts.
+ foreach ( $results as $post_array ) {
+ // Don't convert if not needed.
+ if ( is_a( $post_array, 'WP_Post' ) ) {
+ $new_posts[] = $post_array;
+ continue;
+ }
+
+ $post = new \stdClass();
+
+ $post->ID = $post_array['post_id'];
+ $post->site_id = get_current_blog_id();
+
+ if ( ! empty( $post_array['site_id'] ) ) {
+ $post->site_id = $post_array['site_id'];
+ }
+
+ $post_return_args = [
+ 'post_type',
+ 'post_author',
+ 'post_name',
+ 'post_status',
+ 'post_title',
+ 'post_content',
+ 'post_excerpt',
+ 'post_date',
+ 'post_date_gmt',
+ 'permalink',
+ ];
+
+ foreach ( $post_return_args as $key ) {
+ if ( 'post_author' === $key ) {
+ $post->$key = $post_array[ $key ]['id'];
+ } elseif ( isset( $post_array[ $key ] ) ) {
+ $post->$key = $post_array[ $key ];
+ }
+ }
+
+ $post->elasticsearch = true;
+
+ if ( $post ) {
+ $new_posts[] = $post;
+ }
+ }
+
+ return $new_posts;
+ }
+}
diff --git a/includes/Classifai/Helpers.php b/includes/Classifai/Helpers.php
index 5968fe69b..494d64b96 100644
--- a/includes/Classifai/Helpers.php
+++ b/includes/Classifai/Helpers.php
@@ -3,6 +3,8 @@
namespace Classifai;
use Classifai\Features\Classification;
+use Classifai\Features\Smart404;
+use Classifai\Features\Smart404EPIntegration;
use Classifai\Providers\Provider;
use Classifai\Admin\UserProfile;
use Classifai\Providers\Watson\NLU;
@@ -661,3 +663,94 @@ function get_classification_mode(): string {
return $value;
}
+
+/**
+ * Get all parts from the current URL.
+ *
+ * For instance, if the URL is `https://example.com/this/is/a/test/`,
+ * this function will return: `[ 'this', 'is', 'a', 'test' ]`.
+ *
+ * @return array
+ */
+function get_url_slugs(): array {
+ global $wp;
+
+ $parts = explode( '/', $wp->request );
+
+ return array_filter( $parts );
+}
+
+/**
+ * Get the last part from the current URL.
+ *
+ * For instance, if the URL is `https://example.com/this/is/a/test`,
+ * this function will return: 'test'.
+ *
+ * @return string
+ */
+function get_last_url_slug(): string {
+ $parts = get_url_slugs();
+
+ return trim( end( $parts ) );
+}
+
+/**
+ * Check if ElasticPress is installed.
+ *
+ * @return bool
+ */
+function is_elasticpress_installed(): bool {
+ return class_exists( '\\ElasticPress\\Feature' );
+}
+
+/**
+ * Get the Smart 404 results.
+ *
+ * @param array $args Arguments to pass to the search.
+ * @return array
+ */
+function get_smart_404_results( array $args = [] ): array {
+ // Run our query.
+ $results = ( new Smart404() )->exact_knn_search( get_last_url_slug(), $args );
+
+ // Ensure the query ran successfully.
+ if ( is_wp_error( $results ) ) {
+ return [];
+ }
+
+ // Convert the results to normal WP_Post objects.
+ $results = ( new Smart404EPIntegration() )->convert_es_results_to_post_objects( $results );
+
+ return $results;
+}
+
+/**
+ * Render the Smart 404 results.
+ *
+ * @param array $args Arguments to pass to the search.
+ */
+function render_smart_404_results( array $args = [] ) {
+ // Get the results.
+ $results = get_smart_404_results( $args );
+
+ // Handle situation where we don't have results.
+ if ( empty( $results ) ) {
+ return;
+ }
+
+ // Iterate through each result and render it.
+ echo '';
+ foreach ( $results as $result ) {
+ ?>
+ -
+
+ post_title ); ?>
+
+
+ post_excerpt ); ?>
+
+
+ ';
+}
diff --git a/includes/Classifai/Providers/Azure/Embeddings.php b/includes/Classifai/Providers/Azure/Embeddings.php
index 51de3d252..7e8245ecb 100644
--- a/includes/Classifai/Providers/Azure/Embeddings.php
+++ b/includes/Classifai/Providers/Azure/Embeddings.php
@@ -907,11 +907,11 @@ public function generate_embeddings_for_term( int $term_id, bool $force = false,
/**
* Generate an embedding for a particular piece of text.
*
- * @param string $text Text to generate the embedding for.
- * @param Feature $feature Feature instance.
+ * @param string $text Text to generate the embedding for.
+ * @param Feature|null $feature Feature instance.
* @return array|boolean|WP_Error
*/
- public function generate_embedding( string $text = '', Feature $feature = null ) {
+ public function generate_embedding( string $text = '', $feature = null ) {
if ( ! $feature ) {
$feature = new Classification();
}
diff --git a/includes/Classifai/Providers/OpenAI/Embeddings.php b/includes/Classifai/Providers/OpenAI/Embeddings.php
index 51f058ec2..002f380e3 100644
--- a/includes/Classifai/Providers/OpenAI/Embeddings.php
+++ b/includes/Classifai/Providers/OpenAI/Embeddings.php
@@ -522,7 +522,7 @@ public function generate_embeddings_for_post( int $post_id, bool $force = false
// Chunk the post content down.
$embeddings = [];
- $content = $this->get_content( $post_id, 'post' );
+ $content = $this->get_normalized_content( $post_id, 'post' );
$content_chunks = $this->chunk_content( $content );
// Get the embeddings for each chunk.
@@ -996,7 +996,7 @@ public function generate_embeddings_for_term( int $term_id, bool $force = false,
// Chunk the term content down.
$embeddings = [];
- $content = $this->get_content( $term_id, 'term' );
+ $content = $this->get_normalized_content( $term_id, 'term' );
$content_chunks = $this->chunk_content( $content );
// Get the embeddings for each chunk.
@@ -1208,7 +1208,7 @@ public function chunk_content( string $content = '', int $chunk_size = 150, $ove
* @param string $type Type of content. Default 'post'.
* @return string
*/
- public function get_content( int $id = 0, string $type = 'post' ): string {
+ public function get_normalized_content( int $id = 0, string $type = 'post' ): string {
$normalizer = new Normalizer();
// Get the content depending on the type.
diff --git a/includes/Classifai/Services/ServicesManager.php b/includes/Classifai/Services/ServicesManager.php
index f15a796ac..564c7d0e2 100644
--- a/includes/Classifai/Services/ServicesManager.php
+++ b/includes/Classifai/Services/ServicesManager.php
@@ -76,6 +76,7 @@ public function register_language_processing_features( array $features ): array
'\Classifai\Features\TextToSpeech',
'\Classifai\Features\AudioTranscriptsGeneration',
'\Classifai\Features\Moderation',
+ '\Classifai\Features\Smart404',
];
foreach ( $core_features as $feature ) {
diff --git a/readme.txt b/readme.txt
index d760d77a2..cc92fd113 100644
--- a/readme.txt
+++ b/readme.txt
@@ -25,6 +25,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro
* Generate transcripts of audio files using [OpenAI's Whisper API](https://platform.openai.com/docs/guides/speech-to-text)
* Convert text content into audio and output a "read-to-me" feature on the front-end to play this audio using [Microsoft Azure's Text to Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/text-to-speech), [Amazon Polly](https://aws.amazon.com/polly/) or [OpenAI's Text to Speech API](https://platform.openai.com/docs/guides/text-to-speech)
* Classify post content using [IBM Watson's Natural Language Understanding API](https://www.ibm.com/watson/services/natural-language-understanding/), [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
+* Create a smart 404 page that has a recommended results section that suggests relevant content to the user based on the page URL they were trying to access using either [OpenAI's Embedding API](https://platform.openai.com/docs/guides/embeddings) or [Microsoft Azure's OpenAI service](https://azure.microsoft.com/en-us/products/ai-services/openai-service) in combination with [ElasticPress](https://github.com/10up/ElasticPress)
* BETA: Recommend content based on overall site traffic via [Microsoft Azure's AI Personalizer API](https://azure.microsoft.com/en-us/services/cognitive-services/personalizer/) _(note that this service has been deprecated by Microsoft and as such, will no longer work. We are looking to replace this with a new provider to maintain the same functionality)_
* Generate image alt text, image tags, and smartly crop images using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/)
* Scan images and PDF files for embedded text and save for use in post meta using [Microsoft Azure's AI Vision API](https://azure.microsoft.com/en-us/services/cognitive-services/computer-vision/)
@@ -38,6 +39,7 @@ Tap into leading cloud-based services like [OpenAI](https://openai.com/), [Micro
* To utilize the Azure OpenAI Language Processing functionality, you will need an active [Microsoft Azure](https://signup.azure.com/signup) account and you will need to [apply](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUNTZBNzRKNlVQSFhZMU9aV09EVzYxWFdORCQlQCN0PWcu) for OpenAI access.
* To utilize the Google Gemini Language Processing functionality, you will need an active [Google Gemini](https://ai.google.dev/tutorials/setup) account.
* To utilize the AWS Language Processing functionality, you will need an active [AWS](https://console.aws.amazon.com/) account.
+* To utilize the Smart 404 feature, you will need to use [ElasticPress](https://github.com/10up/ElasticPress) 5.0.0+ and [Elasticsearch](https://www.elastic.co/elasticsearch) 7.0+.
== Upgrade Notice ==
diff --git a/tests/Classifai/HelpersTest.php b/tests/Classifai/HelpersTest.php
index e8927c3be..c4d932999 100644
--- a/tests/Classifai/HelpersTest.php
+++ b/tests/Classifai/HelpersTest.php
@@ -8,6 +8,8 @@
use function Classifai\Providers\Watson\get_password;
use function Classifai\Providers\Watson\get_feature_threshold;
use function Classifai\get_classification_feature_taxonomy;
+use function Classifai\get_url_slugs;
+use function Classifai\get_last_url_slug;
/**
* @group helpers
@@ -284,4 +286,30 @@ public function test_get_post_statuses() {
$this->assertEquals( 0, count( $statuses_diff ) );
$this->assertArrayHasKey( 'unread', $all_statuses );
}
+
+ /**
+ * Tests for the get_last_url_slug method.
+ */
+ public function test_get_url_slugs() {
+ global $wp;
+
+ // If URL is https://www.example.com/this/is/a/test/
+ // $wp->request will be 'this/is/a/test'.
+ $wp->request = 'this/is/a/test';
+
+ $slugs = get_url_slugs();
+ $this->assertEquals( [ 'this', 'is', 'a', 'test' ], $slugs );
+ }
+
+ /**
+ * Tests for the get_last_url_slug method.
+ */
+ public function test_get_last_url_slug() {
+ global $wp;
+
+ $wp->request = 'https://example.com/this/is/a/test/';
+
+ $slug = get_last_url_slug();
+ $this->assertEquals( 'test', $slug );
+ }
}
diff --git a/tests/cypress/integration/language-processing/smart-404-azure-openai.test.js b/tests/cypress/integration/language-processing/smart-404-azure-openai.test.js
new file mode 100644
index 000000000..6ce812272
--- /dev/null
+++ b/tests/cypress/integration/language-processing/smart-404-azure-openai.test.js
@@ -0,0 +1,63 @@
+describe( '[Language processing] Smart 404 - Azure OpenAI Tests', () => {
+ before( () => {
+ cy.login();
+ cy.optInAllFeatures();
+ } );
+
+ beforeEach( () => {
+ cy.login();
+ } );
+
+ it( "See error message if ElasticPress isn't activate", () => {
+ cy.disableElasticPress();
+
+ cy.visit(
+ '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_smart_404'
+ );
+
+ cy.get( '.classifai-nlu-sections .notice-error' ).should( 'exist' );
+ } );
+
+ it( 'Can save Smart 404 settings', () => {
+ cy.enableElasticPress();
+
+ cy.visit(
+ '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_smart_404'
+ );
+
+ // Enabled Feature.
+ cy.get( '#status' ).check();
+
+ // Setup Provider.
+ cy.get( '#provider' ).select( 'azure_openai_embeddings' );
+ cy.get(
+ 'input[name="classifai_feature_smart_404[azure_openai_embeddings][endpoint_url]"]'
+ )
+ .clear()
+ .type( 'https://e2e-test-azure-openai.test/' );
+ cy.get(
+ 'input[name="classifai_feature_smart_404[azure_openai_embeddings][api_key]"]'
+ )
+ .clear()
+ .type( 'password' );
+ cy.get(
+ 'input[name="classifai_feature_smart_404[azure_openai_embeddings][deployment]"]'
+ )
+ .clear()
+ .type( 'test' );
+
+ // Change all settings.
+ cy.get( '#num' ).clear().type( 4 );
+ cy.get( '#num_search' ).clear().type( 7000 );
+ cy.get( '#threshold' ).clear().type( 3.25 );
+ cy.get( '#rescore' ).uncheck();
+ cy.get( '#fallback' ).check();
+ cy.get( '#score_function' ).select( 'l1_norm' );
+ cy.get( '#classifai_feature_smart_404_roles_administrator' ).check();
+
+ // Save settings.
+ cy.get( '#submit' ).click();
+
+ cy.disableElasticPress();
+ } );
+} );
diff --git a/tests/cypress/integration/language-processing/smart-404-openai.test.js b/tests/cypress/integration/language-processing/smart-404-openai.test.js
new file mode 100644
index 000000000..635265efb
--- /dev/null
+++ b/tests/cypress/integration/language-processing/smart-404-openai.test.js
@@ -0,0 +1,49 @@
+describe( '[Language processing] Smart 404 - OpenAI Tests', () => {
+ before( () => {
+ cy.login();
+ cy.optInAllFeatures();
+ } );
+
+ beforeEach( () => {
+ cy.login();
+ } );
+
+ it( "See error message if ElasticPress isn't activate", () => {
+ cy.disableElasticPress();
+
+ cy.visit(
+ '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_smart_404'
+ );
+
+ cy.get( '.classifai-nlu-sections .notice-error' ).should( 'exist' );
+ } );
+
+ it( 'Can save Smart 404 settings', () => {
+ cy.enableElasticPress();
+
+ cy.visit(
+ '/wp-admin/tools.php?page=classifai&tab=language_processing&feature=feature_smart_404'
+ );
+
+ // Enabled Feature.
+ cy.get( '#status' ).check();
+
+ // Setup Provider.
+ cy.get( '#provider' ).select( 'openai_embeddings' );
+ cy.get( '#api_key' ).clear().type( 'password' );
+
+ // Change all settings.
+ cy.get( '#num' ).clear().type( 5 );
+ cy.get( '#num_search' ).clear().type( 8000 );
+ cy.get( '#threshold' ).clear().type( 2.55 );
+ cy.get( '#rescore' ).check();
+ cy.get( '#fallback' ).uncheck();
+ cy.get( '#score_function' ).select( 'dot_product' );
+ cy.get( '#classifai_feature_smart_404_roles_administrator' ).check();
+
+ // Save settings.
+ cy.get( '#submit' ).click();
+
+ cy.disableElasticPress();
+ } );
+} );
diff --git a/tests/cypress/support/commands.js b/tests/cypress/support/commands.js
index d0b243583..e9d22b623 100644
--- a/tests/cypress/support/commands.js
+++ b/tests/cypress/support/commands.js
@@ -574,3 +574,27 @@ Cypress.Commands.add( 'enableClassicEditor', () => {
}
} );
} );
+
+/**
+ * Activate the ElasticPress plugin.
+ */
+Cypress.Commands.add( 'enableElasticPress', () => {
+ cy.visit( '/wp-admin/plugins.php' );
+ cy.get( 'body' ).then( ( $body ) => {
+ if ( $body.find( '#activate-elasticpress' ).length > 0 ) {
+ cy.get( '#activate-elasticpress' ).click();
+ }
+ } );
+} );
+
+/**
+ * Deactivate the Classic Editor plugin.
+ */
+Cypress.Commands.add( 'disableElasticPress', () => {
+ cy.visit( '/wp-admin/plugins.php' );
+ cy.get( 'body' ).then( ( $body ) => {
+ if ( $body.find( '#deactivate-elasticpress' ).length > 0 ) {
+ cy.get( '#deactivate-elasticpress' ).click();
+ }
+ } );
+} );