diff --git a/serverless/images/semantic-options.svg b/serverless/images/semantic-options.svg
new file mode 100644
index 00000000..3bedf530
--- /dev/null
+++ b/serverless/images/semantic-options.svg
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg viewBox="0 0 800 500" xmlns="http://www.w3.org/2000/svg">
+  <!-- Title -->
+  <text x="400" y="40" text-anchor="middle" font-size="24" fill="#000" font-family="Inter, Arial, sans-serif">Elasticsearch semantic search workflows</text>
+  
+  <!-- Workflow boxes -->
+  <g transform="translate(0, 80)">
+    <!-- semantic_text workflow (recommended) -->
+    <rect x="50" y="50" width="200" height="300" rx="10" fill="#c2e0ff" stroke="#3f8dd6" stroke-width="2"/>
+    <text x="150" y="80" text-anchor="middle" font-size="16" font-weight="bold" fill="#2c5282" font-family="Inter, Arial, sans-serif">semantic_text</text>
+    <text x="150" y="100" text-anchor="middle" font-size="12" fill="#2c5282" font-family="Inter, Arial, sans-serif">(Recommended)</text>
+    
+    <!-- Inference API workflow -->
+    <rect x="300" y="50" width="200" height="300" rx="10" fill="#d9f1e3" stroke="#38a169" stroke-width="2"/>
+    <text x="400" y="80" text-anchor="middle" font-size="16" font-weight="bold" fill="#276749" font-family="Inter, Arial, sans-serif">Inference API</text>
+    
+    <!-- Model deployment workflow -->
+    <rect x="550" y="50" width="200" height="300" rx="10" fill="#feebc8" stroke="#dd6b20" stroke-width="2"/>
+    <text x="650" y="80" text-anchor="middle" font-size="16" font-weight="bold" fill="#9c4221" font-family="Inter, Arial, sans-serif">Model Deployment</text>
+    
+    <!-- Complexity indicators -->
+    <text x="150" y="130" text-anchor="middle" font-size="12" fill="#2c5282" font-family="Inter, Arial, sans-serif">Complexity: Low</text>
+    <text x="400" y="130" text-anchor="middle" font-size="12" fill="#276749" font-family="Inter, Arial, sans-serif">Complexity: Medium</text>
+    <text x="650" y="130" text-anchor="middle" font-size="12" fill="#9c4221" font-family="Inter, Arial, sans-serif">Complexity: High</text>
+    
+    <!-- Components in each workflow -->
+    <g transform="translate(60, 150)">
+      <!-- semantic_text components -->
+      <rect x="10" y="0" width="170" height="30" rx="5" fill="#fff" stroke="#3f8dd6"/>
+      <text x="95" y="20" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Create Inference Endpoint</text>
+      
+      <rect x="10" y="40" width="170" height="30" rx="5" fill="#fff" stroke="#3f8dd6"/>
+      <text x="95" y="60" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Define Index Mapping</text>
+      
+      <!-- Inference API components -->
+      <rect x="260" y="0" width="170" height="30" rx="5" fill="#fff" stroke="#38a169"/>
+      <text x="345" y="20" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Create Inference Endpoint</text>
+      
+      <rect x="260" y="40" width="170" height="30" rx="5" fill="#fff" stroke="#38a169"/>
+      <text x="345" y="60" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Configure Model Settings</text>
+      
+      <rect x="260" y="80" width="170" height="30" rx="5" fill="#fff" stroke="#38a169"/>
+      <text x="345" y="100" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Define Index Mapping</text>
+      
+      <rect x="260" y="120" width="170" height="30" rx="5" fill="#fff" stroke="#38a169"/>
+      <text x="345" y="140" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Setup Ingest Pipeline</text>
+      
+      <!-- Model deployment components -->
+      <rect x="510" y="0" width="170" height="30" rx="5" fill="#fff" stroke="#dd6b20"/>
+      <text x="595" y="20" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Select NLP Model</text>
+      
+      <rect x="510" y="40" width="170" height="30" rx="5" fill="#fff" stroke="#dd6b20"/>
+      <text x="595" y="60" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Deploy with Eland Client</text>
+      
+      <rect x="510" y="80" width="170" height="30" rx="5" fill="#fff" stroke="#dd6b20"/>
+      <text x="595" y="100" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Define Index Mapping</text>
+      
+      <rect x="510" y="120" width="170" height="30" rx="5" fill="#fff" stroke="#dd6b20"/>
+      <text x="595" y="140" text-anchor="middle" font-size="12" font-family="Inter, Arial, sans-serif">Setup Ingest Pipeline</text>
+    </g>
+  </g>
+</svg>
diff --git a/serverless/index-serverless-elasticsearch.asciidoc b/serverless/index-serverless-elasticsearch.asciidoc
index b7493298..64d53024 100644
--- a/serverless/index-serverless-elasticsearch.asciidoc
+++ b/serverless/index-serverless-elasticsearch.asciidoc
@@ -40,7 +40,6 @@ include::./pages/search-your-data-the-search-api.asciidoc[leveloffset=+3]
 include::./pages/search-with-synonyms.asciidoc[leveloffset=+3]
 include::./pages/knn-search.asciidoc[leveloffset=+3]
 include::./pages/search-your-data-semantic-search.asciidoc[leveloffset=+3]
-include::./pages/search-your-data-semantic-search-elser.asciidoc[leveloffset=+4]
 
 include::./pages/explore-your-data.asciidoc[leveloffset=+2]
 
diff --git a/serverless/pages/search-your-data-semantic-search-elser.asciidoc b/serverless/pages/search-your-data-semantic-search-elser.asciidoc
deleted file mode 100644
index 5e9d4907..00000000
--- a/serverless/pages/search-your-data-semantic-search-elser.asciidoc
+++ /dev/null
@@ -1,390 +0,0 @@
-[[elasticsearch-reference-semantic-search-elser]]
-= Tutorial: Semantic search with ELSER
-
-// :description: Perform semantic search using ELSER, an NLP model trained by Elastic.
-// :keywords: elasticsearch, elser, semantic search
-
-Elastic Learned Sparse EncodeR - or ELSER - is an NLP model trained by Elastic
-that enables you to perform semantic search by using sparse vector
-representation. Instead of literal matching on search terms, semantic search
-retrieves results based on the intent and the contextual meaning of a search
-query.
-
-The instructions in this tutorial shows you how to use ELSER to perform semantic
-search on your data.
-
-[NOTE]
-====
-Only the first 512 extracted tokens per field are considered during
-semantic search with ELSER. Refer to
-{ml-docs}/ml-nlp-limitations.html#ml-nlp-elser-v1-limit-512[this page] for more
-information.
-====
-
-[discrete]
-[[requirements]]
-== Requirements
-
-To perform semantic search by using ELSER, you must have the NLP model deployed
-in your cluster. Refer to the
-{ml-docs}/ml-nlp-elser.html[ELSER documentation] to learn how to download and
-deploy the model.
-
-[NOTE]
-====
-The minimum dedicated ML node size for deploying and using the ELSER model
-is 4 GB in {es} Service if
-{cloud}/ec-autoscaling.html[deployment autoscaling] is turned off. Turning on
-autoscaling is recommended because it allows your deployment to dynamically
-adjust resources based on demand. Better performance can be achieved by using
-more allocations or more threads per allocation, which requires bigger ML nodes.
-Autoscaling provides bigger nodes when required. If autoscaling is turned off,
-you must provide suitably sized nodes yourself.
-====
-
-[discrete]
-[[elser-mappings]]
-== Create the index mapping
-
-First, the mapping of the destination index - the index that contains the tokens
-that the model created based on your text - must be created.  The destination
-index must have a field with the
-{ref}/sparse-vector.html[`sparse_vector`] or {ref}/rank-features.html[`rank_features`] field
-type to index the ELSER output.
-
-[NOTE]
-====
-ELSER output must be ingested into a field with the `sparse_vector` or
-`rank_features` field type. Otherwise, {es} interprets the token-weight pairs as
-a massive amount of fields in a document. If you get an error similar to this
-`"Limit of total fields [1000] has been exceeded while adding new fields"` then
-the ELSER output field is not mapped properly and it has a field type different
-than `sparse_vector` or `rank_features`.
-====
-
-[source,bash]
-----
-curl -X PUT "${ES_URL}/my-index" \
--H "Authorization: ApiKey ${API_KEY}" \
--H "Content-Type: application/json" \
--d'
-{
-  "mappings": {
-    "properties": {
-      "content_embedding": {   <1>
-        "type": "sparse_vector"   <2>
-      },
-      "content": {   <3>
-        "type": "text"   <4>
-      }
-    }
-  }
-}
-'
-----
-
-<1> The name of the field to contain the generated tokens. It must be refrenced
-in the {infer} pipeline configuration in the next step.
-
-<2> The field to contain the tokens is a `sparse_vector` field.
-
-<3> The name of the field from which to create the sparse vector representation.
-In this example, the name of the field is `content`. It must be referenced in the
-{infer} pipeline configuration in the next step.
-
-<4> The field type which is text in this example.
-
-To learn how to optimize space, refer to the <<save-space,Saving disk space by excluding the ELSER tokens from document source>> section.
-
-[discrete]
-[[inference-ingest-pipeline]]
-== Create an ingest pipeline with an inference processor
-
-Create an {ref}/ingest.html[ingest pipeline] with an
-{ref}/inference-processor.html[inference processor] to use ELSER to infer against the data
-that is being ingested in the pipeline.
-
-[source,bash]
-----
-curl -X PUT "${ES_URL}/_ingest/pipeline/elser-v2-test" \
--H "Authorization: ApiKey ${API_KEY}" \
--H "Content-Type: application/json" \
--d'
-{
-  "processors": [
-    {
-      "inference": {
-        "model_id": ".elser_model_2",
-        "input_output": [ <1>
-          {
-            "input_field": "content",
-            "output_field": "content_embedding"
-          }
-        ]
-      }
-    }
-  ]
-}
-'
-----
-
-[discrete]
-[[load-data]]
-== Load data
-
-In this step, you load the data that you later use in the {infer} ingest
-pipeline to extract tokens from it.
-
-Use the `msmarco-passagetest2019-top1000` data set, which is a subset of the MS
-MARCO Passage Ranking data set. It consists of 200 queries, each accompanied by
-a list of relevant text passages. All unique passages, along with their IDs,
-have been extracted from that data set and compiled into a
-https://github.com/elastic/stack-docs/blob/main/docs/en/stack/ml/nlp/data/msmarco-passagetest2019-unique.tsv[tsv file].
-
-Download the file and upload it to your cluster using the
-{kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[Data Visualizer]
-in the {ml-app} UI. Assign the name `id` to the first column and `content` to
-the second column. The index name is `test-data`. Once the upload is complete,
-you can see an index named `test-data` with 182469 documents.
-
-[discrete]
-[[reindexing-data-elser]]
-== Ingest the data through the {infer} ingest pipeline
-
-Create the tokens from the text by reindexing the data throught the {infer}
-pipeline that uses ELSER as the inference model.
-
-[source,bash]
-----
-curl -X POST "${ES_URL}/_reindex?wait_for_completion=false" \
--H "Authorization: ApiKey ${API_KEY}" \
--H "Content-Type: application/json" \
--d'
-{
-  "source": {
-    "index": "test-data",
-    "size": 50   <1>
-  },
-  "dest": {
-    "index": "my-index",
-    "pipeline": "elser-v2-test"
-  }
-}
-'
-----
-
-<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller
-number makes the update of the reindexing process quicker which enables you to
-follow the progress closely and detect errors early.
-
-The call returns a task ID to monitor the progress:
-
-[source,bash]
-----
-curl -X GET "${ES_URL}/_tasks/<task_id>" \
--H "Authorization: ApiKey ${API_KEY}" \
-----
-
-You can also open the Trained Models UI, select the Pipelines tab under ELSER to
-follow the progress.
-
-[discrete]
-[[sparse-vector-query]]
-== Semantic search by using the `sparse_vector` query
-
-To perform semantic search, use the `sparse_vector` query, and provide the
-query text and the inference ID associated with the ELSER model service. The example below uses the query text "How to
-avoid muscle soreness after running?", the `content_embedding` field contains
-the generated ELSER output:
-
-[source,bash]
-----
-curl -X GET "${ES_URL}/my-index/_search" \
--H "Authorization: ApiKey ${API_KEY}" \
--H "Content-Type: application/json" \
--d'
-{
-   "query":{
-      "sparse_vector":{
-         "field": "content_embedding",
-         "inference_id": "my-elser-endpoint",
-         "query": "How to avoid muscle soreness after running?"
-      }
-   }
-}
-'
-----
-
-The result is the top 10 documents that are closest in meaning to your query
-text from the `my-index` index sorted by their relevancy. The result also
-contains the extracted tokens for each of the relevant search results with their
-weights.
-
-[source,consol-result]
-----
-"hits": {
-  "total": {
-    "value": 10000,
-    "relation": "gte"
-  },
-  "max_score": 26.199875,
-  "hits": [
-    {
-      "_index": "my-index",
-      "_id": "FPr9HYsBag9jXmT8lEpI",
-      "_score": 26.199875,
-      "_source": {
-        "content_embedding": {
-          "muscular": 0.2821541,
-          "bleeding": 0.37929374,
-          "foods": 1.1718726,
-          "delayed": 1.2112266,
-          "cure": 0.6848574,
-          "during": 0.5886185,
-          "fighting": 0.35022718,
-          "rid": 0.2752442,
-          "soon": 0.2967024,
-          "leg": 0.37649947,
-          "preparation": 0.32974035,
-          "advance": 0.09652356,
-          (...)
-        },
-        "id": 1713868,
-        "model_id": ".elser_model_2",
-        "content": "For example, if you go for a run, you will mostly use the muscles in your lower body. Give yourself 2 days to rest those muscles so they have a chance to heal before you exercise them again. Not giving your muscles enough time to rest can cause muscle damage, rather than muscle development."
-      }
-    },
-    (...)
-  ]
-}
-----
-
-[discrete]
-[[sparse-vector-compound-query]]
-== Combining semantic search with other queries
-
-You can combine `sparse_vector` with other queries in a
-{ref}/compound-queries.html[compound query]. For example using a filter clause in a
-{ref}/query-dsl-bool-query.html[Boolean query] or a full text query which may or may not use the same
-query text as the `sparse_vector` query. This enables you to combine the search
-results from both queries.
-
-The search hits from the `sparse_vector` query tend to score higher than other
-{es} queries. Those scores can be regularized by increasing or decreasing the
-relevance scores of each query by using the `boost` parameter. Recall on the
-`sparse_vector` query can be high where there is a long tail of less relevant
-results. Use the `min_score` parameter to prune those less relevant documents.
-
-[source,bash]
-----
-curl -X GET "${ES_URL}/my-index/_search" \
--H "Authorization: ApiKey ${API_KEY}" \
--H "Content-Type: application/json" \
--d'
-{
-  "query": {
-    "bool": {   <1>
-      "should": [
-        {
-          "sparse_vector": {
-            "field": "content_embedding",
-            "query": "How to avoid muscle soreness after running?",
-            "inference_id": "my-elser-endpoint",
-            "boost": 1   <2>
-            }
-          }
-        },
-        {
-          "query_string": {
-            "query": "toxins",
-            "boost": 4   <3>
-          }
-        }
-      ]
-    }
-  },
-  "min_score": 10   <4>
-}
-'
-----
-
-<1> Both the `sparse_vector` and the `query_string` queries are in a `should`
-clause of a `bool` query.
-
-<2> The `boost` value is `1` for the `sparse_vector` query which is the default
-value. This means that the relevance score of the results of this query are not
-boosted.
-
-<3> The `boost` value is `4` for the `query_string` query. The relevance score
-of the results of this query is increased causing them to rank higher in the
-search results.
-
-<4> Only the results with a score equal to or higher than `10` are displayed.
-
-[discrete]
-[[optimization]]
-== Optimizing performance
-
-[discrete]
-[[save-space]]
-=== Saving disk space by excluding the ELSER tokens from document source
-
-The tokens generated by ELSER must be indexed for use in the
-{ref}/query-dsl-sparse-vector-query.html[sparse_vector query]. However, it is not
-necessary to retain those terms in the document source. You can save disk space
-by using the {ref}/mapping-source-field.html#include-exclude[source exclude] mapping to remove the ELSER
-terms from the document source.
-
-[WARNING]
-====
-Reindex uses the document source to populate the destination index.
-Once the ELSER terms have been excluded from the source, they cannot be
-recovered through reindexing. Excluding the tokens from the source is a
-space-saving optimsation that should only be applied if you are certain that
-reindexing will not be required in the future! It's important to carefully
-consider this trade-off and make sure that excluding the ELSER terms from the
-source aligns with your specific requirements and use case.
-====
-
-The mapping that excludes `content_embedding` from the  `_source` field can be
-created by the following API call:
-
-[source,bash]
-----
-curl -X PUT "${ES_URL}/my-index" \
--H "Authorization: ApiKey ${API_KEY}" \
--H "Content-Type: application/json" \
--d'
-{
-  "mappings": {
-    "_source": {
-      "excludes": [
-        "content_embedding"
-      ]
-    },
-    "properties": {
-      "content_embedding": {
-        "type": "sparse_vector"
-      },
-      "content": {
-        "type": "text"
-      }
-    }
-  }
-}
-'
-----
-
-[discrete]
-[[further-reading]]
-== Further reading
-
-* {ml-docs}/ml-nlp-elser.html[How to download and deploy ELSER]
-* {ml-docs}/ml-nlp-limitations.html#ml-nlp-elser-v1-limit-512[ELSER limitation]
-* https://www.elastic.co/blog/may-2023-launch-information-retrieval-elasticsearch-ai-model[Improving information retrieval in the Elastic Stack: Introducing Elastic Learned Sparse Encoder, our new retrieval model]
-
-[discrete]
-[[interactive-example]]
-== Interactive example
-
-* The `elasticsearch-labs` repo has an interactive example of running https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb[ELSER-powered semantic search] using the {es} Python client.
diff --git a/serverless/pages/search-your-data-semantic-search-elser.asciidoc  b/serverless/pages/search-your-data-semantic-search-elser.asciidoc 
new file mode 100644
index 00000000..02712bc1
--- /dev/null
+++ b/serverless/pages/search-your-data-semantic-search-elser.asciidoc 	
@@ -0,0 +1,9 @@
+[[elasticsearch-reference-semantic-search-elser]]
+= Tutorial: Semantic search with ELSER
+
+// :description: Perform semantic search using ELSER, an NLP model trained by Elastic.
+// :keywords: elasticsearch, elser, semantic search
+
+// This page is not included in the index file, so it is not visible in the navigation menu anymore. HTTP redirects will be set up.
+
+ℹ️ Refer to <<elasticsearch-reference-semantic-search>> for an overview of semantic search in {es-serverless}.
\ No newline at end of file
diff --git a/serverless/pages/search-your-data-semantic-search.asciidoc b/serverless/pages/search-your-data-semantic-search.asciidoc
index efe8670f..de30fe82 100644
--- a/serverless/pages/search-your-data-semantic-search.asciidoc
+++ b/serverless/pages/search-your-data-semantic-search.asciidoc
@@ -8,136 +8,42 @@ Semantic search is a search method that helps you find data based on the intent
 and contextual meaning of a search query, instead of a match on query terms
 (lexical search).
 
-{es} provides semantic search capabilities using {ml-docs}/ml-nlp.html[natural
-language processing (NLP)] and vector search. Deploying an NLP model to {es}
-enables it to extract text embeddings out of text. Embeddings are vectors that
-provide a numeric representation of a text. Pieces of content with similar
-meaning have similar representations.
+Elasticsearch provides various semantic search capabilities using natural language processing (NLP) and vector search. Using an NLP model enables you to extract text embeddings out of text. Embeddings are vectors that provide a numeric representation of a text. Pieces of content with similar meaning have similar representations.
 
-image::images/vector-search-oversimplification.png[A simplified representation of encoding textual concepts as vectors]
+There are three main workflows for implementing semantic search with {es}, arranged in order of increasing complexity:
 
-_A simplified representation of encoding textual concepts as vectors_
+- <<elasticsearch-reference-semantic-search-semantic-text,The `semantic text` workflow>>
+- <<elasticsearch-reference-semantic-search-inference-api,Inference API workflow>>
+- <<elasticsearch-reference-semantic-search-model-deployment,Model deployment workflow>>
 
-At query time, {es} can use the same NLP model to convert a query into
-embeddings, enabling you to find documents with similar text embeddings.
+image::images/semantic-options.svg[Overview of semantic search workflows in {es}]
 
-This guide shows you how to implement semantic search with {es}, from selecting
-an NLP model, to writing queries.
+[NOTE]
+====
+Semantic search is available on all Elastic deployment types: self-managed clusters, Elastic Cloud Hosted deployments, and {es-serverless} projects. The links on this page will take you to the {ref}/semantic-search.html[{es} core documentation].
+====
 
 [discrete]
-[[semantic-search-select-nlp-model]]
-== Select an NLP model
-
-{es} offers the usage of a
-{ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-embedding[wide range of NLP models],
-including both dense and sparse vector models. Your choice of the language model
-is critical for implementing semantic search successfully.
-
-While it is possible to bring your own text embedding model, achieving good
-search results through model tuning is challenging. Selecting an appropriate
-model from our third-party model list is the first step. Training the model on
-your own data is essential to ensure better search results than using only BM25.
-However, the model training process requires a team of data scientists and ML
-experts, making it expensive and time-consuming.
-
-To address this issue, Elastic provides a pre-trained representational model
-called {ml-docs}/ml-nlp-elser.html[Elastic Learned Sparse EncodeR (ELSER)].
-ELSER, currently available only for English, is an out-of-domain sparse vector
-model that does not require fine-tuning. This adaptability makes it suitable for
-various NLP use cases out of the box. Unless you have a team of ML specialists,
-it is highly recommended to use the ELSER model.
-
-In the case of sparse vector representation, the vectors mostly consist of zero
-values, with only a small subset containing non-zero values. This representation
-is commonly used for textual data. In the case of ELSER, each document in an
-index and the query text itself are represented by high-dimensional sparse
-vectors. Each non-zero element of the vector corresponds to a term in the model
-vocabulary. The ELSER vocabulary contains around 30000 terms, so the sparse
-vectors created by ELSER contain about 30000 values, the majority of which are
-zero. Effectively the ELSER model is replacing the terms in the original query
-with other terms that have been learnt to exist in the documents that best match
-the original search terms in a training dataset, and weights to control how
-important each is.
+[[elasticsearch-reference-semantic-search-semantic-text]]
+== Semantic search with `semantic text`
 
-[discrete]
-[[semantic-search-deploy-nlp-model]]
-== Deploy the model
-
-After you decide which model you want to use for implementing semantic search,
-you need to deploy the model in {es}.
+The `semantic_text` field simplifies semantic search by providing inference at ingestion time with sensible default values, eliminating the need for complex configurations. 
 
-include::../partials/deploy-nlp-model-widget.asciidoc[]
+Learn how to implement semantic search with `semantic text` in the {ref}/semantic-search-semantic-text.html[Elasticsearch docs →].
 
 [discrete]
-[[semantic-search-field-mappings]]
-== Map a field for the text embeddings
+[[elasticsearch-reference-semantic-search-inference-api]]
+== Semantic search with the inference API
 
-Before you start using the deployed model to generate embeddings based on your
-input text, you need to prepare your index mapping first. The mapping of the
-index depends on the type of model.
+The inference API workflow enables you to perform semantic search using models from a variety of services, such as Cohere, OpenAI, HuggingFace, Azure AI Studio, and more. 
 
-include::../partials/field-mappings-widget.asciidoc[]
-
-[discrete]
-[[semantic-search-generate-embeddings]]
-== Generate text embeddings
+Learn how to implement semantic search with the inference API in the {ref}/semantic-search-inference.html[Elasticsearch docs →].
 
-Once you have created the mappings for the index, you can generate text
-embeddings from your input text. This can be done by using an
-{ref}/ingest.html[ingest pipeline] with an {ref}/inference-processor.html[inference processor].
-The ingest pipeline processes the input data and indexes it into the destination
-index. At index time, the inference ingest processor uses the trained model to
-infer against the data ingested through the pipeline. After you created the
-ingest pipeline with the inference processor, you can ingest your data through
-it to generate the model output.
-
-include::../partials/generate-embeddings-widget.asciidoc[]
-
-Now it is time to perform semantic search!
 
 [discrete]
-[[semantic-search-search]]
-== Search the data
-
-Depending on the type of model you have deployed, you can query sparse vectors
-with a sparse vector query, or dense vectors with a kNN search.
+[[elasticsearch-reference-semantic-search-model-deployment]]
+== Semantic search with the model deployment workflow
 
-include::../partials/search-widget.asciidoc[]
+The model deployment workflow enables you to deploy custom NLP models in Elasticsearch, giving you full control over text embedding generation and vector search. While this workflow offers advanced flexibility, it requires expertise in NLP and machine learning.
 
-[discrete]
-[[semantic-search-hybrid-search]]
-== Beyond semantic search with hybrid search
-
-In some situations, lexical search may perform better than semantic search. For
-example, when searching for single words or IDs, like product numbers.
-
-Combining semantic and lexical search into one hybrid search request using
-{ref}/rrf.html[reciprocal rank fusion] provides the best of both worlds. Not only that,
-but hybrid search using reciprocal rank fusion {blog-ref}improving-information-retrieval-elastic-stack-hybrid[has been shown to perform better
-in general].
-
-include::../partials/hybrid-search-widget.asciidoc[]
-
-[discrete]
-[[semantic-search-read-more]]
-== Read more
-
-* Tutorials:
-+
-** <<elasticsearch-reference-semantic-search-elser,Semantic search with ELSER>>
-** {ml-docs}/ml-nlp-text-emb-vector-search-example.html[Semantic search with the msmarco-MiniLM-L-12-v3 sentence-transformer model]
-* Blogs:
-+
-** {blog-ref}may-2023-launch-sparse-encoder-ai-model[Introducing Elastic Learned Sparse Encoder: Elastic's AI model for semantic search]
-** {blog-ref}lexical-ai-powered-search-elastic-vector-database[How to get the best of lexical and AI-powered search with Elastic's vector database]
-** Information retrieval blog series:
-+
-*** {blog-ref}improving-information-retrieval-elastic-stack-search-relevance[Part 1: Steps to improve search relevance]
-*** {blog-ref}improving-information-retrieval-elastic-stack-benchmarking-passage-retrieval[Part 2: Benchmarking passage retrieval]
-*** {blog-ref}may-2023-launch-information-retrieval-elasticsearch-ai-model[Part 3: Introducing Elastic Learned Sparse Encoder, our new retrieval model]
-*** {blog-ref}improving-information-retrieval-elastic-stack-hybrid[Part 4: Hybrid retrieval]
-* Interactive examples:
-+
-** The https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs`] repo contains a number of interactive semantic search examples in the form of executable Python notebooks, using the {es} Python client.
-
-// The include that was here is another page
+Learn how to implement semantic search with the model deployment workflow in the {ref}/semantic-search-deployed-nlp-model.html[Elasticsearch docs →].