[DOCS] Add warning to create inference API (#3311)

Co-authored-by: Karen Metts <[email protected]> (cherry picked from commit 679ae2b)
elastic · Dec 17, 2024 · f645183 · f645183
1 parent f7a774d
commit f645183
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 5 deletions.
diff --git a/compiler/src/model/utils.ts b/compiler/src/model/utils.ts
@@ -667,7 +667,7 @@ export function hoistRequestAnnotations (
     } else if (tag === 'cluster_privileges') {
       const privileges = [
         'all', 'cancel_task', 'create_snapshot', 'grant_api_key', 'manage', 'manage_api_key', 'manage_ccr',
-        'manage_enrich', 'manage_ilm', 'manage_index_templates', 'manage_ingest_pipelines', 'manage_logstash_pipelines',
+        'manage_enrich', 'manage_ilm', 'manage_index_templates', 'manage_inference', 'manage_ingest_pipelines', 'manage_logstash_pipelines',
         'manage_ml', 'manage_oidc', 'manage_own_api_key', 'manage_pipeline', 'manage_rollup', 'manage_saml',
         'manage_security', 'manage_service_account', 'manage_slm', 'manage_token', 'manage_transform', 'manage_user_profile',
         'manage_watcher', 'monitor', 'monitor_ml', 'monitor_rollup', 'monitor_snapshot', 'monitor_text_structure',

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -23,10 +23,20 @@ import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
 /**
- * Create an inference endpoint
+ * Create an inference endpoint.
+ * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
+ * After creating the endpoint, wait for the model deployment to complete before using it.
+ * To verify the deployment status, use the get trained model statistics API.
+ * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
+ * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
+ *
+ * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
+ * For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
+ * However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
  * @rest_spec_name inference.put
  * @availability stack since=8.11.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
  */
 export interface Request extends RequestBase {
   path_parts: {