From 970435e51b2c18c90ef3059914c1809b328af75f Mon Sep 17 00:00:00 2001 From: Lucie Milan <32450552+lmilan@users.noreply.github.com> Date: Mon, 9 Dec 2024 09:21:25 +0100 Subject: [PATCH] Feat: Add Hugging Face as LLM provider (#8182) * add hugging face provider * Update _huggingface.md * add prerequisite * Update app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_huggingface.md Co-authored-by: lena-larionova <54370747+lena-larionova@users.noreply.github.com> * update tables --------- Co-authored-by: lena-larionova <54370747+lena-larionova@users.noreply.github.com> --- .../_huggingface.md | 68 +++++++++++++++ .../_openai.md | 10 +-- .../ai-proxy-advanced/overview/_index.md | 10 +++ .../_huggingface.md | 87 +++++++++++++++++++ app/_hub/kong-inc/ai-proxy/overview/_index.md | 10 +++ .../ai-providers-prereqs-advanced.md | 13 ++- .../md/plugins-hub/ai-providers-prereqs.md | 11 ++- 7 files changed, 191 insertions(+), 18 deletions(-) create mode 100644 app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_huggingface.md create mode 100644 app/_hub/kong-inc/ai-proxy/how-to/llm-provider-integration-guides/_huggingface.md diff --git a/app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_huggingface.md b/app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_huggingface.md new file mode 100644 index 000000000000..ae005a0562d3 --- /dev/null +++ b/app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_huggingface.md @@ -0,0 +1,68 @@ +--- +nav_title: Hugging Face +title: Set up AI Proxy Advanced with Hugging Face +minimum_version: 3.9.x +--- + +This guide walks you through setting up the AI Proxy plugin with [Hugging Face](https://huggingface.co/). + +{% include_cached /md/plugins-hub/ai-providers-prereqs-advanced.md snippet='intro' %} + +## Prerequisites + +{% include_cached /md/plugins-hub/ai-providers-prereqs-advanced.md snippet='service' provider='Hugging Face' %} +* Hugging Face access token with permissions to make calls to the Inference API +* [Text-generation model](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending) from Hugging Face + +## Provider configuration + +### Set up route and plugin + +Create the route: + +```bash +curl -X POST http://localhost:8001/services/ai-proxy-advanced/routes \ + --data "name=huggingface-chat" \ + --data "paths[]=~/huggingface-chat$" +``` + +Enable and configure the AI Proxy Advanced plugin for Hugging Face, replacing the `` with your own access token and `` with the name of the model to use. + + +{% plugin_example %} +plugin: kong-inc/ai-proxy-advanced +name: ai-proxy-advanced +config: + targets: + - route_type: "llm/v1/chat" + auth: + header_name: Authorization + header_value: "Bearer " + model: + provider: huggingface + name: + options: + max_tokens: 512 + temperature: 1.0 + top_p: 256 + top_k: 0.5 +targets: + - route +formats: + - curl + - konnect + - yaml + - kubernetes + - terraform +{% endplugin_example %} + + +### Test the configuration + +Make an `llm/v1/chat` type request to test your new endpoint: + +```bash +curl -X POST http://localhost:8000/huggingface-chat \ + -H 'Content-Type: application/json' \ + --data-raw '{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] }' +``` diff --git a/app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_openai.md b/app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_openai.md index 43356bbfa2a0..5da83c920d34 100644 --- a/app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_openai.md +++ b/app/_hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/_openai.md @@ -1,22 +1,22 @@ --- nav_title: OpenAI -title: Set up AI Proxy with OpenAI +title: Set up AI Proxy Advanced with OpenAI --- -This guide walks you through setting up the AI Proxy plugin with [OpenAI](https://openai.com/). +This guide walks you through setting up the AI Proxy Advanced plugin with [OpenAI](https://openai.com/). -{% include_cached /md/plugins-hub/ai-providers-prereqs.md snippet='intro' %} +{% include_cached /md/plugins-hub/ai-providers-prereqs-advanced.md snippet='intro' %} ## Prerequisites -{% include_cached /md/plugins-hub/ai-providers-prereqs.md snippet='service' provider='OpenAI' %} +{% include_cached /md/plugins-hub/ai-providers-prereqs-advanced.md snippet='service' provider='OpenAI' %} ## Provider configuration ### Set up route and plugin After creating an OpenAI account, and purchasing a subscription, you can then create an -AI Proxy route and plugin configuration. +AI Proxy Advanced route and plugin configuration. Create a route: diff --git a/app/_hub/kong-inc/ai-proxy-advanced/overview/_index.md b/app/_hub/kong-inc/ai-proxy-advanced/overview/_index.md index eac9e674b1e5..d7f766fec38e 100644 --- a/app/_hub/kong-inc/ai-proxy-advanced/overview/_index.md +++ b/app/_hub/kong-inc/ai-proxy-advanced/overview/_index.md @@ -21,6 +21,9 @@ The following table describes which providers and requests the AI Proxy Advanced | Llama3 (OLLAMA and OpenAI formats) | ✅ | ✅ | ✅ | | Amazon Bedrock | ✅ | ✅ | ✅ | | Gemini | ✅ | ✅ | ✅ | +{% if_version gte:3.9.x %} +| Hugging Face | ✅ | ✅ | ✅ | +{% endif_version %} ## How it works @@ -94,6 +97,10 @@ The plugin's [`config.route_type`](/hub/kong-inc/ai-proxy-advanced/configuration | Amazon Bedrock | Use the LLM `completions` upstream path | `llm/v1/completions` | [Use the model name for the specific LLM provider](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html) | | Gemini | `llm/v1/chat` | `llm/v1/chat` | `gemini-1.5-flash` or `gemini-1.5-pro` | | Gemini | `llm/v1/completions` | `llm/v1/completions` | `gemini-1.5-flash` or `gemini-1.5-pro` | +{% if_version gte:3.9.x %} +| Hugging Face | `/models/{model_provider}/{model_name}` | `llm/v1/chat` | [Use the model name for the specific LLM provider](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending) | +| Hugging Face | `/models/{model_provider}/{model_name}` | `llm/v1/completions` | [Use the model name for the specific LLM provider](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending) | +{% endif_version %} The following upstream URL patterns are used: @@ -108,6 +115,9 @@ The following upstream URL patterns are used: | Mistral | As defined in `config.model.options.upstream_url` | | Amazon Bedrock | `https://bedrock-runtime.{region}.amazonaws.com` | | Gemini | `https://generativelanguage.googleapis.com` | +{% if_version gte:3.9.x %} +| Hugging Face | `https://api-inference.huggingface.co` | +{% endif_version %} {:.important} > While only the **Llama2** and **Mistral** models are classed as self-hosted, the target URL can be overridden for any of the supported providers. diff --git a/app/_hub/kong-inc/ai-proxy/how-to/llm-provider-integration-guides/_huggingface.md b/app/_hub/kong-inc/ai-proxy/how-to/llm-provider-integration-guides/_huggingface.md new file mode 100644 index 000000000000..e4a92228ca28 --- /dev/null +++ b/app/_hub/kong-inc/ai-proxy/how-to/llm-provider-integration-guides/_huggingface.md @@ -0,0 +1,87 @@ +--- +nav_title: Hugging Face +title: Set up AI Proxy with Hugging Face +minimum_version: 3.9.x +--- + +This guide walks you through setting up the AI Proxy plugin with [Hugging Face](https://huggingface.co/). + +{% include_cached /md/plugins-hub/ai-providers-prereqs.md snippet='intro' %} + +## Prerequisites + +{% include_cached /md/plugins-hub/ai-providers-prereqs.md snippet='service' provider='Hugging Face' %} +* Hugging Face access token with permissions to make calls to the Inference API +* [Text-generation model](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending) from Hugging Face + +## Provider configuration + +### Set up route and plugin + +{% navtabs %} +{% navtab Kong Admin API %} + +Create the route: + +```bash +curl -X POST http://localhost:8001/services/ai-proxy/routes \ + --data "name=huggingface-chat" \ + --data "paths[]=~/huggingface-chat$" +``` + +Enable and configure the AI Proxy plugin for Hugging Face, replacing the `` with your access token and `` with the name of the model to use: + +```bash +curl -X POST http://localhost:8001/routes/huggingface-chat/plugins \ + --data "name=ai-proxy" \ + --data "config.route_type=llm/v1/chat" \ + --data "config.auth.header_name=Authorization" \ + --data "config.auth.header_value= Bearer " \ + --data "config.model.provider=huggingface" \ + --data "config.model.name=" \ + --data "config.model.options.max_tokens=512" \ + --data "config.model.options.temperature=1.0" \ + --data "config.model.options.top_p=256" \ + --data "config.model.options.top_k=0.5" +``` + +{% endnavtab %} +{% navtab YAML %} +```yaml +routes: +- name: huggingface-chat + service: + name: ai-proxy + paths: + - "~/huggingface-chat$" + methods: + - POST +plugins: + - name: ai-proxy + config: + route_type: "llm/v1/chat" + auth: + header_name: "Authorization" + header_value: "Bearer " # add your Hugging Face access token + model: + provider: "huggingface" + name: "" # add the Hugging Face model to use + options: + max_tokens: 512 + temperature: 1.0 + top_p: 256 + top_k: 0.5 +``` + +{% endnavtab %} +{% endnavtabs %} + +### Test the configuration + +Make an `llm/v1/chat` type request to test your new endpoint: + +```bash +curl -X POST http://localhost:8000/huggingface-chat \ + -H 'Content-Type: application/json' \ + --data-raw '{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] }' +``` diff --git a/app/_hub/kong-inc/ai-proxy/overview/_index.md b/app/_hub/kong-inc/ai-proxy/overview/_index.md index 129c3ffc9f95..15ce5189132a 100644 --- a/app/_hub/kong-inc/ai-proxy/overview/_index.md +++ b/app/_hub/kong-inc/ai-proxy/overview/_index.md @@ -33,6 +33,9 @@ The following table describes which providers and requests the AI Proxy plugin s | Amazon Bedrock | ✅ | ✅ | ✅ | | Gemini | ✅ | ✅ | ✅ | {% endif_version %} +{% if_version gte:3.9.x %} +| Hugging Face | ✅ | ✅ | ✅ | +{% endif_version %} ## How it works @@ -87,6 +90,10 @@ The plugin's [`config.route_type`](/hub/kong-inc/ai-proxy/configuration/#config- | Gemini | `llm/v1/chat` | `llm/v1/chat` | `gemini-1.5-flash` or `gemini-1.5-pro` | | Gemini | `llm/v1/completions` | `llm/v1/completions` | `gemini-1.5-flash` or `gemini-1.5-pro` | {% endif_version %} +{% if_version gte:3.9.x %} +| Hugging Face | `/models/{model_provider}/{model_name}` | `llm/v1/chat` | [Use the model name for the specific LLM provider](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending) | +| Hugging Face | `/models/{model_provider}/{model_name}` | `llm/v1/completions` | [Use the model name for the specific LLM provider](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending) | +{% endif_version %} The following upstream URL patterns are used: @@ -103,6 +110,9 @@ The following upstream URL patterns are used: | Amazon Bedrock | `https://bedrock-runtime.{region}.amazonaws.com` | | Gemini | `https://generativelanguage.googleapis.com` | {% endif_version %} +{% if_version gte:3.9.x %} +| Hugging Face | `https://api-inference.huggingface.co` | +{% endif_version %} {:.important} > While only the **Llama2** and **Mistral** models are classed as self-hosted, the target URL can be overridden for any of the supported providers. diff --git a/app/_includes/md/plugins-hub/ai-providers-prereqs-advanced.md b/app/_includes/md/plugins-hub/ai-providers-prereqs-advanced.md index 47953563c796..7b3961e996db 100644 --- a/app/_includes/md/plugins-hub/ai-providers-prereqs-advanced.md +++ b/app/_includes/md/plugins-hub/ai-providers-prereqs-advanced.md @@ -18,11 +18,10 @@ it can point somewhere empty (for example, `http://localhost:32000`), because th {% if include.provider %} * {{include.provider}} account and subscription * {% endif %}You need a service to contain the route for the LLM provider. Create a service **first**: - -```bash -curl -X POST http://localhost:8001/services \ - --data "name=ai-proxy-advanced" \ - --data "url=http://localhost:32000" -``` -Remember that the upstream URL can point anywhere empty, as it won't be used by the plugin. + ```bash + curl -X POST http://localhost:8001/services \ + --data "name=ai-proxy-advanced" \ + --data "url=http://localhost:32000" + ``` + Remember that the upstream URL can point anywhere empty, as it won't be used by the plugin. {% endif %} \ No newline at end of file diff --git a/app/_includes/md/plugins-hub/ai-providers-prereqs.md b/app/_includes/md/plugins-hub/ai-providers-prereqs.md index 7be07654e989..7d462ceed3ef 100644 --- a/app/_includes/md/plugins-hub/ai-providers-prereqs.md +++ b/app/_includes/md/plugins-hub/ai-providers-prereqs.md @@ -18,11 +18,10 @@ it can point somewhere empty (for example, `http://localhost:32000`), because th {% if include.provider %} * {{include.provider}} account and subscription * {% endif %}You need a service to contain the route for the LLM provider. Create a service **first**: - -```bash -curl -X POST http://localhost:8001/services \ - --data "name=ai-proxy" \ - --data "url=http://localhost:32000" -``` + ```bash + curl -X POST http://localhost:8001/services \ + --data "name=ai-proxy" \ + --data "url=http://localhost:32000" + ``` Remember that the upstream URL can point anywhere empty, as it won't be used by the plugin. {% endif %} \ No newline at end of file