diff --git a/.env.example b/.env.example index 02a571b8391..0b56317ff31 100644 --- a/.env.example +++ b/.env.example @@ -65,6 +65,7 @@ PROXY= # ANYSCALE_API_KEY= # APIPIE_API_KEY= # COHERE_API_KEY= +# DEEPSEEK_API_KEY= # DATABRICKS_API_KEY= # FIREWORKS_API_KEY= # GROQ_API_KEY= @@ -74,6 +75,7 @@ PROXY= # PERPLEXITY_API_KEY= # SHUTTLEAI_API_KEY= # TOGETHERAI_API_KEY= +# UNIFY_API_KEY= #============# # Anthropic # @@ -109,6 +111,26 @@ ANTHROPIC_API_KEY=user_provided BINGAI_TOKEN=user_provided # BINGAI_HOST=https://cn.bing.com +#=================# +# AWS Bedrock # +#=================# + +# BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided +# BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey +# BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey + +# Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you. +# BEDROCK_AWS_MODELS=anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0 + +# See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns + +# Notes on specific models: +# The following models are not support due to not supporting streaming: +# ai21.j2-mid-v1 + +# The following models are not support due to not supporting conversation history: +# ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14 + #============# # Google # #============# @@ -147,7 +169,7 @@ GOOGLE_KEY=user_provided #============# OPENAI_API_KEY=user_provided -# OPENAI_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k +# OPENAI_MODELS=gpt-4o,chatgpt-4o-latest,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k DEBUG_OPENAI=false @@ -390,6 +412,7 @@ LDAP_CA_CERT_PATH= # LDAP_LOGIN_USES_USERNAME=true # LDAP_ID= # LDAP_USERNAME= +# LDAP_EMAIL= # LDAP_FULL_NAME= #========================# @@ -429,10 +452,10 @@ ALLOW_SHARED_LINKS_PUBLIC=true # Static File Cache Control # #==============================# -# Leave commented out to use default of 1 month for max-age and 1 week for s-maxage +# Leave commented out to use defaults: 1 day (86400 seconds) for s-maxage and 2 days (172800 seconds) for max-age # NODE_ENV must be set to production for these to take effect -# STATIC_CACHE_MAX_AGE=604800 -# STATIC_CACHE_S_MAX_AGE=259200 +# STATIC_CACHE_MAX_AGE=172800 +# STATIC_CACHE_S_MAX_AGE=86400 # If you have another service in front of your LibreChat doing compression, disable express based compression here # DISABLE_COMPRESSION=true diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index ccdc68d81b3..00000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,47 +0,0 @@ -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates - -version: 2 -updates: - - package-ecosystem: "npm" # See documentation for possible values - directory: "/api" # Location of package manifests - target-branch: "dev" - versioning-strategy: increase-if-necessary - schedule: - interval: "weekly" - allow: - # Allow both direct and indirect updates for all packages - - dependency-type: "all" - commit-message: - prefix: "npm api prod" - prefix-development: "npm api dev" - include: "scope" - - package-ecosystem: "npm" # See documentation for possible values - directory: "/client" # Location of package manifests - target-branch: "dev" - versioning-strategy: increase-if-necessary - schedule: - interval: "weekly" - allow: - # Allow both direct and indirect updates for all packages - - dependency-type: "all" - commit-message: - prefix: "npm client prod" - prefix-development: "npm client dev" - include: "scope" - - package-ecosystem: "npm" # See documentation for possible values - directory: "/" # Location of package manifests - target-branch: "dev" - versioning-strategy: increase-if-necessary - schedule: - interval: "weekly" - allow: - # Allow both direct and indirect updates for all packages - - dependency-type: "all" - commit-message: - prefix: "npm all prod" - prefix-development: "npm all dev" - include: "scope" - diff --git a/.github/workflows/a11y.yml b/.github/workflows/a11y.yml index e5a3be108dd..a7cfd08169b 100644 --- a/.github/workflows/a11y.yml +++ b/.github/workflows/a11y.yml @@ -4,14 +4,23 @@ on: pull_request: paths: - 'client/src/**' + workflow_dispatch: + inputs: + run_workflow: + description: 'Set to true to run this workflow' + required: true + default: 'false' jobs: axe-linter: runs-on: ubuntu-latest + if: > + (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == 'danny-avila/LibreChat') || + (github.event_name == 'workflow_dispatch' && github.event.inputs.run_workflow == 'true') steps: - uses: actions/checkout@v4 - uses: dequelabs/axe-linter-action@v1 with: api_key: ${{ secrets.AXE_LINTER_API_KEY }} - github_token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml new file mode 100644 index 00000000000..fc1c02db69f --- /dev/null +++ b/.github/workflows/deploy-dev.yml @@ -0,0 +1,41 @@ +name: Update Test Server + +on: + workflow_run: + workflows: ["Docker Dev Images Build"] + types: + - completed + workflow_dispatch: + +jobs: + deploy: + runs-on: ubuntu-latest + if: | + github.repository == 'danny-avila/LibreChat' && + (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install SSH Key + uses: shimataro/ssh-key-action@v2 + with: + key: ${{ secrets.DO_SSH_PRIVATE_KEY }} + known_hosts: ${{ secrets.DO_KNOWN_HOSTS }} + + - name: Run update script on DigitalOcean Droplet + env: + DO_HOST: ${{ secrets.DO_HOST }} + DO_USER: ${{ secrets.DO_USER }} + run: | + ssh -o StrictHostKeyChecking=no ${DO_USER}@${DO_HOST} << EOF + sudo -i -u danny bash << EEOF + cd ~/LibreChat && \ + git fetch origin main && \ + npm run update:deployed && \ + git checkout do-deploy && \ + git rebase main && \ + npm run start:deployed && \ + echo "Update completed. Application should be running now." + EEOF + EOF diff --git a/.github/workflows/frontend-review.yml b/.github/workflows/frontend-review.yml index c8ba609a72c..0756c6773c7 100644 --- a/.github/workflows/frontend-review.yml +++ b/.github/workflows/frontend-review.yml @@ -53,4 +53,4 @@ jobs: - name: Run unit tests run: npm run test:ci --verbose - working-directory: client + working-directory: client \ No newline at end of file diff --git a/.github/workflows/helmcharts.yml b/.github/workflows/helmcharts.yml index fcd8bc7df64..bc715557e4a 100644 --- a/.github/workflows/helmcharts.yml +++ b/.github/workflows/helmcharts.yml @@ -25,11 +25,9 @@ jobs: - name: Install Helm uses: azure/setup-helm@v4 env: - GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" - name: Run chart-releaser uses: helm/chart-releaser-action@v1.6.0 - with: - charts_dir: helmchart env: - CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" \ No newline at end of file + CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000000..16b4104980a --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Launch LibreChat (debug)", + "skipFiles": ["/**"], + "program": "${workspaceFolder}/api/server/index.js", + "env": { + "NODE_ENV": "production" + }, + "console": "integratedTerminal" + } + ] +} diff --git a/Dockerfile b/Dockerfile index 1ace3200d51..0793f0de11d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# v0.7.3 +# v0.7.5-rc2 # Base node image FROM node:20-alpine AS node diff --git a/Dockerfile.multi b/Dockerfile.multi index d8fe9d951f8..a32183d82f3 100644 --- a/Dockerfile.multi +++ b/Dockerfile.multi @@ -1,43 +1,44 @@ -# v0.7.3 +# Dockerfile.multi +# v0.7.5-rc2 -# Build API, Client and Data Provider +# Base for all builds FROM node:20-alpine AS base +WORKDIR /app +RUN apk --no-cache add curl +RUN npm config set fetch-retry-maxtimeout 600000 && \ + npm config set fetch-retries 5 && \ + npm config set fetch-retry-mintimeout 15000 +COPY package*.json ./ +COPY packages/data-provider/package*.json ./packages/data-provider/ +COPY client/package*.json ./client/ +COPY api/package*.json ./api/ +RUN npm ci # Build data-provider FROM base AS data-provider-build WORKDIR /app/packages/data-provider -COPY ./packages/data-provider ./ -RUN npm install; npm cache clean --force +COPY packages/data-provider ./ RUN npm run build RUN npm prune --production -# React client build +# Client build FROM base AS client-build WORKDIR /app/client -COPY ./client/package*.json ./ -# Copy data-provider to client's node_modules -COPY --from=data-provider-build /app/packages/data-provider/ /app/client/node_modules/librechat-data-provider/ -RUN npm install; npm cache clean --force -COPY ./client/ ./ +COPY client ./ +COPY --from=data-provider-build /app/packages/data-provider/dist /app/packages/data-provider/dist ENV NODE_OPTIONS="--max-old-space-size=2048" RUN npm run build +RUN npm prune --production -# Node API setup +# API setup (including client dist) FROM base AS api-build +WORKDIR /app +COPY api ./api +COPY config ./config +COPY --from=data-provider-build /app/packages/data-provider/dist ./packages/data-provider/dist +COPY --from=client-build /app/client/dist ./client/dist WORKDIR /app/api -COPY api/package*.json ./ -COPY api/ ./ -# Copy helper scripts -COPY config/ ./ -# Copy data-provider to API's node_modules -COPY --from=data-provider-build /app/packages/data-provider/ /app/api/node_modules/librechat-data-provider/ -RUN npm install --include prod; npm cache clean --force -COPY --from=client-build /app/client/dist /app/client/dist +RUN npm prune --production EXPOSE 3080 ENV HOST=0.0.0.0 CMD ["node", "server/index.js"] - -# Nginx setup -FROM nginx:1.27.0-alpine AS prod-stage -COPY ./client/nginx.conf /etc/nginx/conf.d/default.conf -CMD ["nginx", "-g", "daemon off;"] diff --git a/README.md b/README.md index 93f80444ae1..50ccd252b9c 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,11 @@ - 🖥️ UI matching ChatGPT, including Dark mode, Streaming, and latest updates - 🤖 AI model selection: - - OpenAI, Azure OpenAI, BingAI, ChatGPT, Google Vertex AI, Anthropic (Claude), Plugins, Assistants API (including Azure Assistants) + - Anthropic (Claude), AWS Bedrock, OpenAI, Azure OpenAI, BingAI, ChatGPT, Google Vertex AI, Plugins, Assistants API (including Azure Assistants) - ✅ Compatible across both **[Remote & Local AI services](https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints):** - groq, Ollama, Cohere, Mistral AI, Apple MLX, koboldcpp, OpenRouter, together.ai, Perplexity, ShuttleAI, and more +- 🪄 Generative UI with **[Code Artifacts](https://youtu.be/GfTj7O4gmd0?si=WJbdnemZpJzBrJo3)** + - Create React, HTML code, and Mermaid diagrams right in chat - 💾 Create, Save, & Share Custom Presets - 🔀 Switch between AI Endpoints and Presets, mid-chat - 🔄 Edit, Resubmit, and Continue Messages with Conversation branching @@ -81,7 +83,7 @@ LibreChat brings together the future of assistant AIs with the revolutionary tec With LibreChat, you no longer need to opt for ChatGPT Plus and can instead use free or pay-per-call APIs. We welcome contributions, cloning, and forking to enhance the capabilities of this advanced chatbot platform. -[![Watch the video](https://img.youtube.com/vi/bSVHEbVPNl4/maxresdefault.jpg)](https://www.youtube.com/watch?v=bSVHEbVPNl4) +[![Watch the video](https://raw.githubusercontent.com/LibreChat-AI/librechat.ai/main/public/images/changelog/v0.7.4.png)](https://www.youtube.com/watch?v=cvosUxogdpI) Click on the thumbnail to open the video☝️ --- diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js index 3bc33af3981..486af95c3f3 100644 --- a/api/app/clients/AnthropicClient.js +++ b/api/app/clients/AnthropicClient.js @@ -12,12 +12,13 @@ const { encodeAndFormat } = require('~/server/services/Files/images/encode'); const { truncateText, formatMessage, + addCacheControl, titleFunctionPrompt, parseParamFromPrompt, createContextHandlers, } = require('./prompts'); -const spendTokens = require('~/models/spendTokens'); -const { getModelMaxTokens } = require('~/utils'); +const { getModelMaxTokens, getModelMaxOutputTokens, matchModelName } = require('~/utils'); +const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); const { sleep } = require('~/server/utils'); const BaseClient = require('./BaseClient'); const { logger } = require('~/config'); @@ -32,6 +33,7 @@ function delayBeforeRetry(attempts, baseDelay = 1000) { return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts)); } +const tokenEventTypes = new Set(['message_start', 'message_delta']); const { legacy } = anthropicSettings; class AnthropicClient extends BaseClient { @@ -44,6 +46,30 @@ class AnthropicClient extends BaseClient { ? options.contextStrategy.toLowerCase() : 'discard'; this.setOptions(options); + /** @type {string | undefined} */ + this.systemMessage; + /** @type {AnthropicMessageStartEvent| undefined} */ + this.message_start; + /** @type {AnthropicMessageDeltaEvent| undefined} */ + this.message_delta; + /** Whether the model is part of the Claude 3 Family + * @type {boolean} */ + this.isClaude3; + /** Whether to use Messages API or Completions API + * @type {boolean} */ + this.useMessages; + /** Whether or not the model is limited to the legacy amount of output tokens + * @type {boolean} */ + this.isLegacyOutput; + /** Whether or not the model supports Prompt Caching + * @type {boolean} */ + this.supportsCacheControl; + /** The key for the usage object's input tokens + * @type {string} */ + this.inputTokensKey = 'input_tokens'; + /** The key for the usage object's output tokens + * @type {string} */ + this.outputTokensKey = 'output_tokens'; } setOptions(options) { @@ -63,14 +89,19 @@ class AnthropicClient extends BaseClient { this.options = options; } - const modelOptions = this.options.modelOptions || {}; - this.modelOptions = { - ...modelOptions, - model: modelOptions.model || anthropicSettings.model.default, - }; + this.modelOptions = Object.assign( + { + model: anthropicSettings.model.default, + }, + this.modelOptions, + this.options.modelOptions, + ); - this.isClaude3 = this.modelOptions.model.includes('claude-3'); - this.isLegacyOutput = !this.modelOptions.model.includes('claude-3-5-sonnet'); + const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic); + this.isClaude3 = modelMatch.startsWith('claude-3'); + this.isLegacyOutput = !modelMatch.startsWith('claude-3-5-sonnet'); + this.supportsCacheControl = + this.options.promptCache && this.checkPromptCacheSupport(modelMatch); if ( this.isLegacyOutput && @@ -89,7 +120,14 @@ class AnthropicClient extends BaseClient { this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ?? 100000; - this.maxResponseTokens = this.modelOptions.maxOutputTokens || 1500; + this.maxResponseTokens = + this.modelOptions.maxOutputTokens ?? + getModelMaxOutputTokens( + this.modelOptions.model, + this.options.endpointType ?? this.options.endpoint, + this.options.endpointTokenConfig, + ) ?? + 1500; this.maxPromptTokens = this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens; @@ -113,17 +151,6 @@ class AnthropicClient extends BaseClient { this.endToken = ''; this.gptEncoder = this.constructor.getTokenizer('cl100k_base'); - if (!this.modelOptions.stop) { - const stopTokens = [this.startToken]; - if (this.endToken && this.endToken !== this.startToken) { - stopTokens.push(this.endToken); - } - stopTokens.push(`${this.userLabel}`); - stopTokens.push('<|diff_marker|>'); - - this.modelOptions.stop = stopTokens; - } - return this; } @@ -147,19 +174,74 @@ class AnthropicClient extends BaseClient { options.baseURL = this.options.reverseProxyUrl; } - if (requestOptions?.model && requestOptions.model.includes('claude-3-5-sonnet')) { + if ( + this.supportsCacheControl && + requestOptions?.model && + requestOptions.model.includes('claude-3-5-sonnet') + ) { + options.defaultHeaders = { + 'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31', + }; + } else if (this.supportsCacheControl) { options.defaultHeaders = { - 'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15', + 'anthropic-beta': 'prompt-caching-2024-07-31', }; } return new Anthropic(options); } - getTokenCountForResponse(response) { + /** + * Get stream usage as returned by this client's API response. + * @returns {AnthropicStreamUsage} The stream usage object. + */ + getStreamUsage() { + const inputUsage = this.message_start?.message?.usage ?? {}; + const outputUsage = this.message_delta?.usage ?? {}; + return Object.assign({}, inputUsage, outputUsage); + } + + /** + * Calculates the correct token count for the current user message based on the token count map and API usage. + * Edge case: If the calculation results in a negative value, it returns the original estimate. + * If revisiting a conversation with a chat history entirely composed of token estimates, + * the cumulative token count going forward should become more accurate as the conversation progresses. + * @param {Object} params - The parameters for the calculation. + * @param {Record} params.tokenCountMap - A map of message IDs to their token counts. + * @param {string} params.currentMessageId - The ID of the current message to calculate. + * @param {AnthropicStreamUsage} params.usage - The usage object returned by the API. + * @returns {number} The correct token count for the current user message. + */ + calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) { + const originalEstimate = tokenCountMap[currentMessageId] || 0; + + if (!usage || typeof usage.input_tokens !== 'number') { + return originalEstimate; + } + + tokenCountMap[currentMessageId] = 0; + const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => { + const numCount = Number(count); + return sum + (isNaN(numCount) ? 0 : numCount); + }, 0); + const totalInputTokens = + (usage.input_tokens ?? 0) + + (usage.cache_creation_input_tokens ?? 0) + + (usage.cache_read_input_tokens ?? 0); + + const currentMessageTokens = totalInputTokens - totalTokensFromMap; + return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate; + } + + /** + * Get Token Count for LibreChat Message + * @param {TMessage} responseMessage + * @returns {number} + */ + getTokenCountForResponse(responseMessage) { return this.getTokenCountForMessage({ role: 'assistant', - content: response.text, + content: responseMessage.text, }); } @@ -212,7 +294,38 @@ class AnthropicClient extends BaseClient { return files; } - async recordTokenUsage({ promptTokens, completionTokens, model, context = 'message' }) { + /** + * @param {object} params + * @param {number} params.promptTokens + * @param {number} params.completionTokens + * @param {AnthropicStreamUsage} [params.usage] + * @param {string} [params.model] + * @param {string} [params.context='message'] + * @returns {Promise} + */ + async recordTokenUsage({ promptTokens, completionTokens, usage, model, context = 'message' }) { + if (usage != null && usage?.input_tokens != null) { + const input = usage.input_tokens ?? 0; + const write = usage.cache_creation_input_tokens ?? 0; + const read = usage.cache_read_input_tokens ?? 0; + + await spendStructuredTokens( + { + context, + user: this.user, + conversationId: this.conversationId, + model: model ?? this.modelOptions.model, + endpointTokenConfig: this.options.endpointTokenConfig, + }, + { + promptTokens: { input, write, read }, + completionTokens, + }, + ); + + return; + } + await spendTokens( { context, @@ -381,7 +494,10 @@ class AnthropicClient extends BaseClient { identityPrefix = `${identityPrefix}\nYou are ${this.options.modelLabel}`; } - let promptPrefix = (this.options.promptPrefix || '').trim(); + let promptPrefix = (this.options.promptPrefix ?? '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } if (promptPrefix) { // If the prompt prefix doesn't end with the end token, add it. if (!promptPrefix.endsWith(`${this.endToken}`)) { @@ -560,6 +676,22 @@ class AnthropicClient extends BaseClient { : await client.completions.create(options); } + /** + * @param {string} modelName + * @returns {boolean} + */ + checkPromptCacheSupport(modelName) { + const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic); + if ( + modelMatch === 'claude-3-5-sonnet' || + modelMatch === 'claude-3-haiku' || + modelMatch === 'claude-3-opus' + ) { + return true; + } + return false; + } + async sendCompletion(payload, { onProgress, abortController }) { if (!abortController) { abortController = new AbortController(); @@ -606,10 +738,22 @@ class AnthropicClient extends BaseClient { requestOptions.max_tokens_to_sample = maxOutputTokens || 1500; } - if (this.systemMessage) { + if (this.systemMessage && this.supportsCacheControl === true) { + requestOptions.system = [ + { + type: 'text', + text: this.systemMessage, + cache_control: { type: 'ephemeral' }, + }, + ]; + } else if (this.systemMessage) { requestOptions.system = this.systemMessage; } + if (this.supportsCacheControl === true && this.useMessages) { + requestOptions.messages = addCacheControl(requestOptions.messages); + } + logger.debug('[AnthropicClient]', { ...requestOptions }); const handleChunk = (currentChunk) => { @@ -639,6 +783,11 @@ class AnthropicClient extends BaseClient { for await (const completion of response) { // Handle each completion as before + const type = completion?.type ?? ''; + if (tokenEventTypes.has(type)) { + logger.debug(`[AnthropicClient] ${type}`, completion); + this[type] = completion; + } if (completion?.delta?.text) { handleChunk(completion.delta.text); } else if (completion.completion) { @@ -680,8 +829,10 @@ class AnthropicClient extends BaseClient { getSaveOptions() { return { maxContextTokens: this.options.maxContextTokens, + artifacts: this.options.artifacts, promptPrefix: this.options.promptPrefix, modelLabel: this.options.modelLabel, + promptCache: this.options.promptCache, resendFiles: this.options.resendFiles, iconURL: this.options.iconURL, greeting: this.options.greeting, @@ -727,6 +878,8 @@ class AnthropicClient extends BaseClient { */ async titleConvo({ text, responseText = '' }) { let title = 'New Chat'; + this.message_delta = undefined; + this.message_start = undefined; const convo = ` ${truncateText(text)} diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 1a2c6aadc22..33e3df3ac6e 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -1,6 +1,14 @@ const crypto = require('crypto'); const fetch = require('node-fetch'); -const { supportsBalanceCheck, Constants, CacheKeys, Time } = require('librechat-data-provider'); +const { + supportsBalanceCheck, + isAgentsEndpoint, + isParamEndpoint, + ErrorTypes, + Constants, + CacheKeys, + Time, +} = require('librechat-data-provider'); const { getMessages, saveMessage, updateMessage, saveConvo } = require('~/models'); const { addSpaceIfNeeded, isEnabled } = require('~/server/utils'); const checkBalance = require('~/models/checkBalance'); @@ -28,6 +36,20 @@ class BaseClient { this.userMessagePromise; /** @type {ClientDatabaseSavePromise} */ this.responsePromise; + /** @type {string} */ + this.user; + /** @type {string} */ + this.conversationId; + /** @type {string} */ + this.responseMessageId; + /** @type {TAttachment[]} */ + this.attachments; + /** The key for the usage object's input tokens + * @type {string} */ + this.inputTokensKey = 'prompt_tokens'; + /** The key for the usage object's output tokens + * @type {string} */ + this.outputTokensKey = 'completion_tokens'; } setOptions() { @@ -54,10 +76,33 @@ class BaseClient { throw new Error('Subclasses attempted to call summarizeMessages without implementing it'); } - async getTokenCountForResponse(response) { - logger.debug('`[BaseClient] recordTokenUsage` not implemented.', response); + /** + * @returns {string} + */ + getResponseModel() { + if (isAgentsEndpoint(this.options.endpoint) && this.options.agent && this.options.agent.id) { + return this.options.agent.id; + } + + return this.modelOptions.model; + } + + /** + * Abstract method to get the token count for a message. Subclasses must implement this method. + * @param {TMessage} responseMessage + * @returns {number} + */ + getTokenCountForResponse(responseMessage) { + logger.debug('`[BaseClient] recordTokenUsage` not implemented.', responseMessage); } + /** + * Abstract method to record token usage. Subclasses must implement this method. + * If a correction to the token usage is needed, the method should return an object with the corrected token counts. + * @param {number} promptTokens + * @param {number} completionTokens + * @returns {Promise} + */ async recordTokenUsage({ promptTokens, completionTokens }) { logger.debug('`[BaseClient] recordTokenUsage` not implemented.', { promptTokens, @@ -143,6 +188,8 @@ class BaseClient { this.currentMessages[this.currentMessages.length - 1].messageId = head; } + this.responseMessageId = responseMessageId; + return { ...opts, user, @@ -191,6 +238,7 @@ class BaseClient { userMessage, conversationId, responseMessageId, + sender: this.sender, }); } @@ -329,7 +377,12 @@ class BaseClient { }; } - async handleContextStrategy({ instructions, orderedMessages, formattedMessages }) { + async handleContextStrategy({ + instructions, + orderedMessages, + formattedMessages, + buildTokenMap = true, + }) { let _instructions; let tokenCount; @@ -371,9 +424,10 @@ class BaseClient { const latestMessage = orderedWithInstructions[orderedWithInstructions.length - 1]; if (payload.length === 0 && !shouldSummarize && latestMessage) { - throw new Error( - `Prompt token count of ${latestMessage.tokenCount} exceeds max token count of ${this.maxContextTokens}.`, - ); + const info = `${latestMessage.tokenCount} / ${this.maxContextTokens}`; + const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; + logger.warn(`Prompt token count exceeds max token count (${info}).`); + throw new Error(errorMessage); } if (usePrevSummary) { @@ -398,19 +452,23 @@ class BaseClient { maxContextTokens: this.maxContextTokens, }); - let tokenCountMap = orderedWithInstructions.reduce((map, message, index) => { - const { messageId } = message; - if (!messageId) { - return map; - } + /** @type {Record | undefined} */ + let tokenCountMap; + if (buildTokenMap) { + tokenCountMap = orderedWithInstructions.reduce((map, message, index) => { + const { messageId } = message; + if (!messageId) { + return map; + } - if (shouldSummarize && index === summaryIndex && !usePrevSummary) { - map.summaryMessage = { ...summaryMessage, messageId, tokenCount: summaryTokenCount }; - } + if (shouldSummarize && index === summaryIndex && !usePrevSummary) { + map.summaryMessage = { ...summaryMessage, messageId, tokenCount: summaryTokenCount }; + } - map[messageId] = orderedWithInstructions[index].tokenCount; - return map; - }, {}); + map[messageId] = orderedWithInstructions[index].tokenCount; + return map; + }, {}); + } const promptTokens = this.maxContextTokens - remainingContextTokens; @@ -512,6 +570,7 @@ class BaseClient { }); } + /** @type {string|string[]|undefined} */ const completion = await this.sendCompletion(payload, opts); this.abortController.requestCompleted = true; @@ -521,28 +580,61 @@ class BaseClient { parentMessageId: userMessage.messageId, isCreatedByUser: false, isEdited, - model: this.modelOptions.model, + model: this.getResponseModel(), sender: this.sender, - text: addSpaceIfNeeded(generation) + completion, promptTokens, iconURL: this.options.iconURL, endpoint: this.options.endpoint, ...(this.metadata ?? {}), }; + if (typeof completion === 'string') { + responseMessage.text = addSpaceIfNeeded(generation) + completion; + } else if ( + Array.isArray(completion) && + isParamEndpoint(this.options.endpoint, this.options.endpointType) + ) { + responseMessage.text = ''; + responseMessage.content = completion; + } else if (Array.isArray(completion)) { + responseMessage.text = addSpaceIfNeeded(generation) + completion.join(''); + } + if ( tokenCountMap && this.recordTokenUsage && this.getTokenCountForResponse && this.getTokenCount ) { - responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage); - const completionTokens = this.getTokenCount(completion); - await this.recordTokenUsage({ promptTokens, completionTokens }); + let completionTokens; + + /** + * Metadata about input/output costs for the current message. The client + * should provide a function to get the current stream usage metadata; if not, + * use the legacy token estimations. + * @type {StreamUsage | null} */ + const usage = this.getStreamUsage != null ? this.getStreamUsage() : null; + + if (usage != null && Number(usage[this.outputTokensKey]) > 0) { + responseMessage.tokenCount = usage[this.outputTokensKey]; + completionTokens = responseMessage.tokenCount; + await this.updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts }); + } else { + responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage); + completionTokens = this.getTokenCount(completion); + } + + await this.recordTokenUsage({ promptTokens, completionTokens, usage }); } + if (this.userMessagePromise) { await this.userMessagePromise; } + + if (this.artifactPromises) { + responseMessage.attachments = (await Promise.all(this.artifactPromises)).filter((a) => a); + } + this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user); const messageCache = getLogStores(CacheKeys.MESSAGES); messageCache.set( @@ -557,6 +649,66 @@ class BaseClient { return responseMessage; } + /** + * Stream usage should only be used for user message token count re-calculation if: + * - The stream usage is available, with input tokens greater than 0, + * - the client provides a function to calculate the current token count, + * - files are being resent with every message (default behavior; or if `false`, with no attachments), + * - the `promptPrefix` (custom instructions) is not set. + * + * In these cases, the legacy token estimations would be more accurate. + * + * TODO: included system messages in the `orderedMessages` accounting, potentially as a + * separate message in the UI. ChatGPT does this through "hidden" system messages. + * @param {object} params + * @param {StreamUsage} params.usage + * @param {Record} params.tokenCountMap + * @param {TMessage} params.userMessage + * @param {object} params.opts + */ + async updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts }) { + /** @type {boolean} */ + const shouldUpdateCount = + this.calculateCurrentTokenCount != null && + Number(usage[this.inputTokensKey]) > 0 && + (this.options.resendFiles || + (!this.options.resendFiles && !this.options.attachments?.length)) && + !this.options.promptPrefix; + + if (!shouldUpdateCount) { + return; + } + + const userMessageTokenCount = this.calculateCurrentTokenCount({ + currentMessageId: userMessage.messageId, + tokenCountMap, + usage, + }); + + if (userMessageTokenCount === userMessage.tokenCount) { + return; + } + + userMessage.tokenCount = userMessageTokenCount; + /* + Note: `AskController` saves the user message, so we update the count of its `userMessage` reference + */ + if (typeof opts?.getReqData === 'function') { + opts.getReqData({ + userMessage, + }); + } + /* + Note: we update the user message to be sure it gets the calculated token count; + though `AskController` saves the user message, EditController does not + */ + await this.userMessagePromise; + await this.updateMessageInDatabase({ + messageId: userMessage.messageId, + tokenCount: userMessageTokenCount, + }); + } + async loadHistory(conversationId, parentMessageId = null) { logger.debug('[BaseClient] Loading history:', { conversationId, parentMessageId }); @@ -644,6 +796,10 @@ class BaseClient { return { message: savedMessage, conversation }; } + /** + * Update a message in the database. + * @param {Partial} message + */ async updateMessageInDatabase(message) { await updateMessage(this.options.req, message); } @@ -767,8 +923,12 @@ class BaseClient { processValue(nestedValue); } - } else { + } else if (typeof value === 'string') { numTokens += this.getTokenCount(value); + } else if (typeof value === 'number') { + numTokens += this.getTokenCount(value.toString()); + } else if (typeof value === 'boolean') { + numTokens += this.getTokenCount(value.toString()); } }; diff --git a/api/app/clients/ChatGPTClient.js b/api/app/clients/ChatGPTClient.js index 0a7f6fc7d88..104e9e5ac3f 100644 --- a/api/app/clients/ChatGPTClient.js +++ b/api/app/clients/ChatGPTClient.js @@ -1,19 +1,21 @@ const Keyv = require('keyv'); const crypto = require('crypto'); +const { CohereClient } = require('cohere-ai'); +const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source'); +const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken'); const { + ImageDetail, EModelEndpoint, resolveHeaders, CohereConstants, mapModelToAzureConfig, } = require('librechat-data-provider'); -const { CohereClient } = require('cohere-ai'); -const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken'); -const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source'); +const { extractBaseURL, constructAzureURL, genAzureChatCompletion } = require('~/utils'); +const { createContextHandlers } = require('./prompts'); const { createCoherePayload } = require('./llm'); const { Agent, ProxyAgent } = require('undici'); const BaseClient = require('./BaseClient'); const { logger } = require('~/config'); -const { extractBaseURL, constructAzureURL, genAzureChatCompletion } = require('~/utils'); const CHATGPT_MODEL = 'gpt-3.5-turbo'; const tokenizersCache = {}; @@ -612,21 +614,66 @@ ${botMessage.message} async buildPrompt(messages, { isChatGptModel = false, promptPrefix = null }) { promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim(); + + // Handle attachments and create augmentedPrompt + if (this.options.attachments) { + const attachments = await this.options.attachments; + const lastMessage = messages[messages.length - 1]; + + if (this.message_file_map) { + this.message_file_map[lastMessage.messageId] = attachments; + } else { + this.message_file_map = { + [lastMessage.messageId]: attachments, + }; + } + + const files = await this.addImageURLs(lastMessage, attachments); + this.options.attachments = files; + + this.contextHandlers = createContextHandlers(this.options.req, lastMessage.text); + } + + if (this.message_file_map) { + this.contextHandlers = createContextHandlers( + this.options.req, + messages[messages.length - 1].text, + ); + } + + // Calculate image token cost and process embedded files + messages.forEach((message, i) => { + if (this.message_file_map && this.message_file_map[message.messageId]) { + const attachments = this.message_file_map[message.messageId]; + for (const file of attachments) { + if (file.embedded) { + this.contextHandlers?.processFile(file); + continue; + } + + messages[i].tokenCount = + (messages[i].tokenCount || 0) + + this.calculateImageTokenCost({ + width: file.width, + height: file.height, + detail: this.options.imageDetail ?? ImageDetail.auto, + }); + } + } + }); + + if (this.contextHandlers) { + this.augmentedPrompt = await this.contextHandlers.createContext(); + promptPrefix = this.augmentedPrompt + promptPrefix; + } + if (promptPrefix) { // If the prompt prefix doesn't end with the end token, add it. if (!promptPrefix.endsWith(`${this.endToken}`)) { promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`; } promptPrefix = `${this.startToken}Instructions:\n${promptPrefix}`; - } else { - const currentDateString = new Date().toLocaleDateString('en-us', { - year: 'numeric', - month: 'long', - day: 'numeric', - }); - promptPrefix = `${this.startToken}Instructions:\nYou are ChatGPT, a large language model trained by OpenAI. Respond conversationally.\nCurrent date: ${currentDateString}${this.endToken}\n\n`; } - const promptSuffix = `${this.startToken}${this.chatGptLabel}:\n`; // Prompt ChatGPT to respond. const instructionsPayload = { @@ -714,10 +761,6 @@ ${botMessage.message} this.maxResponseTokens, ); - if (this.options.debug) { - console.debug(`Prompt : ${prompt}`); - } - if (isChatGptModel) { return { prompt: [instructionsPayload, messagePayload], context }; } diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 2dd921c3cec..92dd54be8d5 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -120,19 +120,7 @@ class GoogleClient extends BaseClient { .filter((ex) => ex) .filter((obj) => obj.input.content !== '' && obj.output.content !== ''); - const modelOptions = this.options.modelOptions || {}; - this.modelOptions = { - ...modelOptions, - // set some good defaults (check for undefined in some cases because they may be 0) - model: modelOptions.model || settings.model.default, - temperature: - typeof modelOptions.temperature === 'undefined' - ? settings.temperature.default - : modelOptions.temperature, - topP: typeof modelOptions.topP === 'undefined' ? settings.topP.default : modelOptions.topP, - topK: typeof modelOptions.topK === 'undefined' ? settings.topK.default : modelOptions.topK, - // stop: modelOptions.stop // no stop method for now - }; + this.modelOptions = this.options.modelOptions || {}; this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments)); @@ -402,8 +390,13 @@ class GoogleClient extends BaseClient { parameters: this.modelOptions, }; - if (this.options.promptPrefix) { - payload.instances[0].context = this.options.promptPrefix; + let promptPrefix = (this.options.promptPrefix ?? '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } + + if (promptPrefix) { + payload.instances[0].context = promptPrefix; } if (this.options.examples.length > 0) { @@ -457,7 +450,10 @@ class GoogleClient extends BaseClient { identityPrefix = `${identityPrefix}\nYou are ${this.options.modelLabel}`; } - let promptPrefix = (this.options.promptPrefix || '').trim(); + let promptPrefix = (this.options.promptPrefix ?? '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } if (promptPrefix) { // If the prompt prefix doesn't end with the end token, add it. if (!promptPrefix.endsWith(`${this.endToken}`)) { @@ -682,11 +678,16 @@ class GoogleClient extends BaseClient { contents: _payload, }; + let promptPrefix = (this.options.promptPrefix ?? '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } + if (this.options?.promptPrefix?.length) { requestOptions.systemInstruction = { parts: [ { - text: this.options.promptPrefix, + text: promptPrefix, }, ], }; @@ -779,11 +780,16 @@ class GoogleClient extends BaseClient { contents: _payload, }; + let promptPrefix = (this.options.promptPrefix ?? '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } + if (this.options?.promptPrefix?.length) { requestOptions.systemInstruction = { parts: [ { - text: this.options.promptPrefix, + text: promptPrefix, }, ], }; @@ -808,7 +814,7 @@ class GoogleClient extends BaseClient { }); reply = titleResponse.content; - + // TODO: RECORD TOKEN USAGE return reply; } } @@ -854,6 +860,7 @@ class GoogleClient extends BaseClient { getSaveOptions() { return { + artifacts: this.options.artifacts, promptPrefix: this.options.promptPrefix, modelLabel: this.options.modelLabel, iconURL: this.options.iconURL, diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index ccc5165fc71..e4c12ee5877 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -6,6 +6,7 @@ const { ImageDetail, EModelEndpoint, resolveHeaders, + openAISettings, ImageDetailCost, CohereConstants, getResponseSender, @@ -18,6 +19,7 @@ const { constructAzureURL, getModelMaxTokens, genAzureChatCompletion, + getModelMaxOutputTokens, } = require('~/utils'); const { truncateText, @@ -27,9 +29,9 @@ const { createContextHandlers, } = require('./prompts'); const { encodeAndFormat } = require('~/server/services/Files/images/encode'); +const { spendTokens } = require('~/models/spendTokens'); const { isEnabled, sleep } = require('~/server/utils'); const { handleOpenAIErrors } = require('./tools/util'); -const spendTokens = require('~/models/spendTokens'); const { createLLM, RunManager } = require('./llm'); const ChatGPTClient = require('./ChatGPTClient'); const { summaryBuffer } = require('./memory'); @@ -63,6 +65,11 @@ class OpenAIClient extends BaseClient { /** @type {string | undefined} - The API Completions URL */ this.completionsUrl; + + /** @type {OpenAIUsageMetadata | undefined} */ + this.usage; + /** @type {boolean|undefined} */ + this.isO1Model; } // TODO: PluginsClient calls this 3x, unneeded @@ -85,26 +92,15 @@ class OpenAIClient extends BaseClient { this.apiKey = this.options.openaiApiKey; } - const modelOptions = this.options.modelOptions || {}; + this.modelOptions = Object.assign( + { + model: openAISettings.model.default, + }, + this.modelOptions, + this.options.modelOptions, + ); - if (!this.modelOptions) { - this.modelOptions = { - ...modelOptions, - model: modelOptions.model || 'gpt-3.5-turbo', - temperature: - typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature, - top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p, - presence_penalty: - typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty, - stop: modelOptions.stop, - }; - } else { - // Update the modelOptions if it already exists - this.modelOptions = { - ...this.modelOptions, - ...modelOptions, - }; - } + this.isO1Model = /\bo1\b/i.test(this.modelOptions.model); this.defaultVisionModel = this.options.visionModel ?? 'gpt-4-vision-preview'; if (typeof this.options.attachments?.then === 'function') { @@ -150,7 +146,8 @@ class OpenAIClient extends BaseClient { const { model } = this.modelOptions; - this.isChatCompletion = this.useOpenRouter || !!reverseProxy || model.includes('gpt'); + this.isChatCompletion = + /\bo1\b/i.test(model) || model.includes('gpt') || this.useOpenRouter || !!reverseProxy; this.isChatGptModel = this.isChatCompletion; if ( model.includes('text-davinci') || @@ -181,7 +178,14 @@ class OpenAIClient extends BaseClient { logger.debug('[OpenAIClient] maxContextTokens', this.maxContextTokens); } - this.maxResponseTokens = this.modelOptions.max_tokens || 1024; + this.maxResponseTokens = + this.modelOptions.max_tokens ?? + getModelMaxOutputTokens( + model, + this.options.endpointType ?? this.options.endpoint, + this.options.endpointTokenConfig, + ) ?? + 1024; this.maxPromptTokens = this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens; @@ -199,8 +203,8 @@ class OpenAIClient extends BaseClient { model: this.modelOptions.model, endpoint: this.options.endpoint, endpointType: this.options.endpointType, - chatGptLabel: this.options.chatGptLabel, modelDisplayLabel: this.options.modelDisplayLabel, + chatGptLabel: this.options.chatGptLabel || this.options.modelLabel, }); this.userLabel = this.options.userLabel || 'User'; @@ -413,6 +417,7 @@ class OpenAIClient extends BaseClient { getSaveOptions() { return { + artifacts: this.options.artifacts, maxContextTokens: this.options.maxContextTokens, chatGptLabel: this.options.chatGptLabel, promptPrefix: this.options.promptPrefix, @@ -475,6 +480,9 @@ class OpenAIClient extends BaseClient { let promptTokens; promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } if (this.options.attachments) { const attachments = await this.options.attachments; @@ -541,7 +549,7 @@ class OpenAIClient extends BaseClient { promptPrefix = this.augmentedPrompt + promptPrefix; } - if (promptPrefix) { + if (promptPrefix && this.isO1Model !== true) { promptPrefix = `Instructions:\n${promptPrefix.trim()}`; instructions = { role: 'system', @@ -569,6 +577,16 @@ class OpenAIClient extends BaseClient { messages, }; + /** EXPERIMENTAL */ + if (promptPrefix && this.isO1Model === true) { + const lastUserMessageIndex = payload.findLastIndex((message) => message.role === 'user'); + if (lastUserMessageIndex !== -1) { + payload[ + lastUserMessageIndex + ].content = `${promptPrefix}\n${payload[lastUserMessageIndex].content}`; + } + } + if (tokenCountMap) { tokenCountMap.instructions = instructions?.tokenCount; result.tokenCountMap = tokenCountMap; @@ -629,6 +647,12 @@ class OpenAIClient extends BaseClient { if (completionResult && typeof completionResult === 'string') { reply = completionResult; + } else if ( + completionResult && + typeof completionResult === 'object' && + Array.isArray(completionResult.choices) + ) { + reply = completionResult.choices[0]?.text?.replace(this.endToken, ''); } } else if (typeof opts.onProgress === 'function' || this.options.useChatCompletion) { reply = await this.chatCompletion({ @@ -827,7 +851,7 @@ class OpenAIClient extends BaseClient { const instructionsPayload = [ { - role: this.options.titleMessageRole ?? 'system', + role: this.options.titleMessageRole ?? (this.isOllama ? 'user' : 'system'), content: `Please generate ${titleInstruction} ${convo} @@ -893,6 +917,60 @@ ${convo} return title; } + /** + * Get stream usage as returned by this client's API response. + * @returns {OpenAIUsageMetadata} The stream usage object. + */ + getStreamUsage() { + if ( + this.usage && + typeof this.usage === 'object' && + 'completion_tokens_details' in this.usage && + this.usage.completion_tokens_details && + typeof this.usage.completion_tokens_details === 'object' && + 'reasoning_tokens' in this.usage.completion_tokens_details + ) { + const outputTokens = Math.abs( + this.usage.completion_tokens_details.reasoning_tokens - this.usage[this.outputTokensKey], + ); + return { + ...this.usage.completion_tokens_details, + [this.inputTokensKey]: this.usage[this.inputTokensKey], + [this.outputTokensKey]: outputTokens, + }; + } + return this.usage; + } + + /** + * Calculates the correct token count for the current user message based on the token count map and API usage. + * Edge case: If the calculation results in a negative value, it returns the original estimate. + * If revisiting a conversation with a chat history entirely composed of token estimates, + * the cumulative token count going forward should become more accurate as the conversation progresses. + * @param {Object} params - The parameters for the calculation. + * @param {Record} params.tokenCountMap - A map of message IDs to their token counts. + * @param {string} params.currentMessageId - The ID of the current message to calculate. + * @param {OpenAIUsageMetadata} params.usage - The usage object returned by the API. + * @returns {number} The correct token count for the current user message. + */ + calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) { + const originalEstimate = tokenCountMap[currentMessageId] || 0; + + if (!usage || typeof usage[this.inputTokensKey] !== 'number') { + return originalEstimate; + } + + tokenCountMap[currentMessageId] = 0; + const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => { + const numCount = Number(count); + return sum + (isNaN(numCount) ? 0 : numCount); + }, 0); + const totalInputTokens = usage[this.inputTokensKey] ?? 0; + + const currentMessageTokens = totalInputTokens - totalTokensFromMap; + return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate; + } + async summarizeMessages({ messagesToRefine, remainingContextTokens }) { logger.debug('[OpenAIClient] Summarizing messages...'); let context = messagesToRefine; @@ -1008,7 +1086,16 @@ ${convo} } } - async recordTokenUsage({ promptTokens, completionTokens, context = 'message' }) { + /** + * @param {object} params + * @param {number} params.promptTokens + * @param {number} params.completionTokens + * @param {OpenAIUsageMetadata} [params.usage] + * @param {string} [params.model] + * @param {string} [params.context='message'] + * @returns {Promise} + */ + async recordTokenUsage({ promptTokens, completionTokens, usage, context = 'message' }) { await spendTokens( { context, @@ -1019,6 +1106,24 @@ ${convo} }, { promptTokens, completionTokens }, ); + + if ( + usage && + typeof usage === 'object' && + 'reasoning_tokens' in usage && + typeof usage.reasoning_tokens === 'number' + ) { + await spendTokens( + { + context: 'reasoning', + model: this.modelOptions.model, + conversationId: this.conversationId, + user: this.user ?? this.options.req.user?.id, + endpointTokenConfig: this.options.endpointTokenConfig, + }, + { completionTokens: usage.reasoning_tokens }, + ); + } } getTokenCountForResponse(response) { @@ -1031,7 +1136,7 @@ ${convo} async chatCompletion({ payload, onProgress, abortController = null }) { let error = null; const errorCallback = (err) => (error = err); - let intermediateReply = ''; + const intermediateReply = []; try { if (!abortController) { abortController = new AbortController(); @@ -1125,6 +1230,11 @@ ${convo} opts.defaultHeaders = { ...opts.defaultHeaders, 'api-key': this.apiKey }; } + if (this.isO1Model === true && modelOptions.max_tokens != null) { + modelOptions.max_completion_tokens = modelOptions.max_tokens; + delete modelOptions.max_tokens; + } + if (process.env.OPENAI_ORGANIZATION) { opts.organization = process.env.OPENAI_ORGANIZATION; } @@ -1194,7 +1304,20 @@ ${convo} } let UnexpectedRoleError = false; + /** @type {Promise} */ + let streamPromise; + /** @type {(value: void | PromiseLike) => void} */ + let streamResolve; + + if (modelOptions.stream && this.isO1Model) { + delete modelOptions.stream; + delete modelOptions.stop; + } + if (modelOptions.stream) { + streamPromise = new Promise((resolve) => { + streamResolve = resolve; + }); const stream = await openai.beta.chat.completions .stream({ ...modelOptions, @@ -1206,26 +1329,30 @@ ${convo} .on('error', (err) => { handleOpenAIErrors(err, errorCallback, 'stream'); }) - .on('finalChatCompletion', (finalChatCompletion) => { + .on('finalChatCompletion', async (finalChatCompletion) => { const finalMessage = finalChatCompletion?.choices?.[0]?.message; - if (finalMessage && finalMessage?.role !== 'assistant') { + if (!finalMessage) { + return; + } + await streamPromise; + if (finalMessage?.role !== 'assistant') { finalChatCompletion.choices[0].message.role = 'assistant'; } - if (finalMessage && !finalMessage?.content?.trim()) { - finalChatCompletion.choices[0].message.content = intermediateReply; + if (typeof finalMessage.content !== 'string' || finalMessage.content.trim() === '') { + finalChatCompletion.choices[0].message.content = intermediateReply.join(''); } }) .on('finalMessage', (message) => { if (message?.role !== 'assistant') { - stream.messages.push({ role: 'assistant', content: intermediateReply }); + stream.messages.push({ role: 'assistant', content: intermediateReply.join('') }); UnexpectedRoleError = true; } }); for await (const chunk of stream) { const token = chunk.choices[0]?.delta?.content || ''; - intermediateReply += token; + intermediateReply.push(token); onProgress(token); if (abortController.signal.aborted) { stream.controller.abort(); @@ -1235,6 +1362,8 @@ ${convo} await sleep(streamRate); } + streamResolve(); + if (!UnexpectedRoleError) { chatCompletion = await stream.finalChatCompletion().catch((err) => { handleOpenAIErrors(err, errorCallback, 'finalChatCompletion'); @@ -1262,19 +1391,31 @@ ${convo} throw new Error('Chat completion failed'); } - const { message, finish_reason } = chatCompletion.choices[0]; - if (chatCompletion) { - this.metadata = { finish_reason }; + const { choices } = chatCompletion; + this.usage = chatCompletion.usage; + + if (!Array.isArray(choices) || choices.length === 0) { + logger.warn('[OpenAIClient] Chat completion response has no choices'); + return intermediateReply.join(''); } + const { message, finish_reason } = choices[0] ?? {}; + this.metadata = { finish_reason }; + logger.debug('[OpenAIClient] chatCompletion response', chatCompletion); - if (!message?.content?.trim() && intermediateReply.length) { + if (!message) { + logger.warn('[OpenAIClient] Message is undefined in chatCompletion response'); + return intermediateReply.join(''); + } + + if (typeof message.content !== 'string' || message.content.trim() === '') { + const reply = intermediateReply.join(''); logger.debug( '[OpenAIClient] chatCompletion: using intermediateReply due to empty message.content', - { intermediateReply }, + { intermediateReply: reply }, ); - return intermediateReply; + return reply; } return message.content; @@ -1283,7 +1424,7 @@ ${convo} err?.message?.includes('abort') || (err instanceof OpenAI.APIError && err?.message?.includes('abort')) ) { - return intermediateReply; + return intermediateReply.join(''); } if ( err?.message?.includes( @@ -1298,10 +1439,10 @@ ${convo} (err instanceof OpenAI.OpenAIError && err?.message?.includes('missing finish_reason')) ) { logger.error('[OpenAIClient] Known OpenAI error:', err); - return intermediateReply; + return intermediateReply.join(''); } else if (err instanceof OpenAI.APIError) { - if (intermediateReply) { - return intermediateReply; + if (intermediateReply.length > 0) { + return intermediateReply.join(''); } else { throw err; } diff --git a/api/app/clients/PluginsClient.js b/api/app/clients/PluginsClient.js index a23fb019ba2..da7988acaf6 100644 --- a/api/app/clients/PluginsClient.js +++ b/api/app/clients/PluginsClient.js @@ -42,6 +42,7 @@ class PluginsClient extends OpenAIClient { getSaveOptions() { return { + artifacts: this.options.artifacts, chatGptLabel: this.options.chatGptLabel, promptPrefix: this.options.promptPrefix, tools: this.options.tools, @@ -145,16 +146,22 @@ class PluginsClient extends OpenAIClient { // initialize agent const initializer = this.functionsAgent ? initializeFunctionsAgent : initializeCustomAgent; + + let customInstructions = (this.options.promptPrefix ?? '').trim(); + if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { + customInstructions = `${customInstructions ?? ''}\n${this.options.artifactsPrompt}`.trim(); + } + this.executor = await initializer({ model, signal, pastMessages, tools: this.tools, + customInstructions, verbose: this.options.debug, returnIntermediateSteps: true, customName: this.options.chatGptLabel, currentDateString: this.currentDateString, - customInstructions: this.options.promptPrefix, callbackManager: CallbackManager.fromHandlers({ async handleAgentAction(action, runId) { handleAction(action, runId, onAgentAction); diff --git a/api/app/clients/llm/RunManager.js b/api/app/clients/llm/RunManager.js index 7ab0b06b520..51abe480a91 100644 --- a/api/app/clients/llm/RunManager.js +++ b/api/app/clients/llm/RunManager.js @@ -1,5 +1,5 @@ const { createStartHandler } = require('~/app/clients/callbacks'); -const spendTokens = require('~/models/spendTokens'); +const { spendTokens } = require('~/models/spendTokens'); const { logger } = require('~/config'); class RunManager { diff --git a/api/app/clients/llm/createLLM.js b/api/app/clients/llm/createLLM.js index 09b29cca8e9..3344ced4ba3 100644 --- a/api/app/clients/llm/createLLM.js +++ b/api/app/clients/llm/createLLM.js @@ -8,7 +8,7 @@ const { isEnabled } = require('~/server/utils'); * @param {Object} options - The options for creating the LLM. * @param {ModelOptions} options.modelOptions - The options specific to the model, including modelName, temperature, presence_penalty, frequency_penalty, and other model-related settings. * @param {ConfigOptions} options.configOptions - Configuration options for the API requests, including proxy settings and custom headers. - * @param {Callbacks} options.callbacks - Callback functions for managing the lifecycle of the LLM, including token buffers, context, and initial message count. + * @param {Callbacks} [options.callbacks] - Callback functions for managing the lifecycle of the LLM, including token buffers, context, and initial message count. * @param {boolean} [options.streaming=false] - Determines if the LLM should operate in streaming mode. * @param {string} options.openAIApiKey - The API key for OpenAI, used for authentication. * @param {AzureOptions} [options.azure={}] - Optional Azure-specific configurations. If provided, Azure configurations take precedence over OpenAI configurations. diff --git a/api/app/clients/prompts/addCacheControl.js b/api/app/clients/prompts/addCacheControl.js new file mode 100644 index 00000000000..eed5910dc94 --- /dev/null +++ b/api/app/clients/prompts/addCacheControl.js @@ -0,0 +1,43 @@ +/** + * Anthropic API: Adds cache control to the appropriate user messages in the payload. + * @param {Array} messages - The array of message objects. + * @returns {Array} - The updated array of message objects with cache control added. + */ +function addCacheControl(messages) { + if (!Array.isArray(messages) || messages.length < 2) { + return messages; + } + + const updatedMessages = [...messages]; + let userMessagesModified = 0; + + for (let i = updatedMessages.length - 1; i >= 0 && userMessagesModified < 2; i--) { + const message = updatedMessages[i]; + if (message.role !== 'user') { + continue; + } + + if (typeof message.content === 'string') { + message.content = [ + { + type: 'text', + text: message.content, + cache_control: { type: 'ephemeral' }, + }, + ]; + userMessagesModified++; + } else if (Array.isArray(message.content)) { + for (let j = message.content.length - 1; j >= 0; j--) { + if (message.content[j].type === 'text') { + message.content[j].cache_control = { type: 'ephemeral' }; + userMessagesModified++; + break; + } + } + } + } + + return updatedMessages; +} + +module.exports = addCacheControl; diff --git a/api/app/clients/prompts/addCacheControl.spec.js b/api/app/clients/prompts/addCacheControl.spec.js new file mode 100644 index 00000000000..c46ffd95e31 --- /dev/null +++ b/api/app/clients/prompts/addCacheControl.spec.js @@ -0,0 +1,227 @@ +const addCacheControl = require('./addCacheControl'); + +describe('addCacheControl', () => { + test('should add cache control to the last two user messages with array content', () => { + const messages = [ + { role: 'user', content: [{ type: 'text', text: 'Hello' }] }, + { role: 'assistant', content: [{ type: 'text', text: 'Hi there' }] }, + { role: 'user', content: [{ type: 'text', text: 'How are you?' }] }, + { role: 'assistant', content: [{ type: 'text', text: 'I\'m doing well, thanks!' }] }, + { role: 'user', content: [{ type: 'text', text: 'Great!' }] }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content[0]).not.toHaveProperty('cache_control'); + expect(result[2].content[0].cache_control).toEqual({ type: 'ephemeral' }); + expect(result[4].content[0].cache_control).toEqual({ type: 'ephemeral' }); + }); + + test('should add cache control to the last two user messages with string content', () => { + const messages = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + { role: 'user', content: 'How are you?' }, + { role: 'assistant', content: 'I\'m doing well, thanks!' }, + { role: 'user', content: 'Great!' }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content).toBe('Hello'); + expect(result[2].content[0]).toEqual({ + type: 'text', + text: 'How are you?', + cache_control: { type: 'ephemeral' }, + }); + expect(result[4].content[0]).toEqual({ + type: 'text', + text: 'Great!', + cache_control: { type: 'ephemeral' }, + }); + }); + + test('should handle mixed string and array content', () => { + const messages = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + { role: 'user', content: [{ type: 'text', text: 'How are you?' }] }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content[0]).toEqual({ + type: 'text', + text: 'Hello', + cache_control: { type: 'ephemeral' }, + }); + expect(result[2].content[0].cache_control).toEqual({ type: 'ephemeral' }); + }); + + test('should handle less than two user messages', () => { + const messages = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content[0]).toEqual({ + type: 'text', + text: 'Hello', + cache_control: { type: 'ephemeral' }, + }); + expect(result[1].content).toBe('Hi there'); + }); + + test('should return original array if no user messages', () => { + const messages = [ + { role: 'assistant', content: 'Hi there' }, + { role: 'assistant', content: 'How can I help?' }, + ]; + + const result = addCacheControl(messages); + + expect(result).toEqual(messages); + }); + + test('should handle empty array', () => { + const messages = []; + const result = addCacheControl(messages); + expect(result).toEqual([]); + }); + + test('should handle non-array input', () => { + const messages = 'not an array'; + const result = addCacheControl(messages); + expect(result).toBe('not an array'); + }); + + test('should not modify assistant messages', () => { + const messages = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + { role: 'user', content: 'How are you?' }, + ]; + + const result = addCacheControl(messages); + + expect(result[1].content).toBe('Hi there'); + }); + + test('should handle multiple content items in user messages', () => { + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'Hello' }, + { type: 'image', url: 'http://example.com/image.jpg' }, + { type: 'text', text: 'This is an image' }, + ], + }, + { role: 'assistant', content: 'Hi there' }, + { role: 'user', content: 'How are you?' }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content[0]).not.toHaveProperty('cache_control'); + expect(result[0].content[1]).not.toHaveProperty('cache_control'); + expect(result[0].content[2].cache_control).toEqual({ type: 'ephemeral' }); + expect(result[2].content[0]).toEqual({ + type: 'text', + text: 'How are you?', + cache_control: { type: 'ephemeral' }, + }); + }); + + test('should handle an array with mixed content types', () => { + const messages = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + { role: 'user', content: [{ type: 'text', text: 'How are you?' }] }, + { role: 'assistant', content: 'I\'m doing well, thanks!' }, + { role: 'user', content: 'Great!' }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content).toEqual('Hello'); + expect(result[2].content[0]).toEqual({ + type: 'text', + text: 'How are you?', + cache_control: { type: 'ephemeral' }, + }); + expect(result[4].content).toEqual([ + { + type: 'text', + text: 'Great!', + cache_control: { type: 'ephemeral' }, + }, + ]); + expect(result[1].content).toBe('Hi there'); + expect(result[3].content).toBe('I\'m doing well, thanks!'); + }); + + test('should handle edge case with multiple content types', () => { + const messages = [ + { + role: 'user', + content: [ + { + type: 'image', + source: { type: 'base64', media_type: 'image/png', data: 'some_base64_string' }, + }, + { + type: 'image', + source: { type: 'base64', media_type: 'image/png', data: 'another_base64_string' }, + }, + { type: 'text', text: 'what do all these images have in common' }, + ], + }, + { role: 'assistant', content: 'I see multiple images.' }, + { role: 'user', content: 'Correct!' }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content[0]).not.toHaveProperty('cache_control'); + expect(result[0].content[1]).not.toHaveProperty('cache_control'); + expect(result[0].content[2].cache_control).toEqual({ type: 'ephemeral' }); + expect(result[2].content[0]).toEqual({ + type: 'text', + text: 'Correct!', + cache_control: { type: 'ephemeral' }, + }); + }); + + test('should handle user message with no text block', () => { + const messages = [ + { + role: 'user', + content: [ + { + type: 'image', + source: { type: 'base64', media_type: 'image/png', data: 'some_base64_string' }, + }, + { + type: 'image', + source: { type: 'base64', media_type: 'image/png', data: 'another_base64_string' }, + }, + ], + }, + { role: 'assistant', content: 'I see two images.' }, + { role: 'user', content: 'Correct!' }, + ]; + + const result = addCacheControl(messages); + + expect(result[0].content[0]).not.toHaveProperty('cache_control'); + expect(result[0].content[1]).not.toHaveProperty('cache_control'); + expect(result[2].content[0]).toEqual({ + type: 'text', + text: 'Correct!', + cache_control: { type: 'ephemeral' }, + }); + }); +}); diff --git a/api/app/clients/prompts/artifacts.js b/api/app/clients/prompts/artifacts.js new file mode 100644 index 00000000000..b907a16b56c --- /dev/null +++ b/api/app/clients/prompts/artifacts.js @@ -0,0 +1,527 @@ +const dedent = require('dedent'); +const { EModelEndpoint, ArtifactModes } = require('librechat-data-provider'); +const { generateShadcnPrompt } = require('~/app/clients/prompts/shadcn-docs/generate'); +const { components } = require('~/app/clients/prompts/shadcn-docs/components'); + +// eslint-disable-next-line no-unused-vars +const artifactsPromptV1 = dedent`The assistant can create and reference artifacts during conversations. + +Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity. + +# Good artifacts are... +- Substantial content (>15 lines) +- Content that the user is likely to modify, iterate on, or take ownership of +- Self-contained, complex content that can be understood on its own, without context from the conversation +- Content intended for eventual use outside the conversation (e.g., reports, emails, presentations) +- Content likely to be referenced or reused multiple times + +# Don't use artifacts for... +- Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples +- Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept +- Suggestions, commentary, or feedback on existing artifacts +- Conversational or explanatory content that doesn't represent a standalone piece of work +- Content that is dependent on the current conversational context to be useful +- Content that is unlikely to be modified or iterated upon by the user +- Request from users that appears to be a one-off question + +# Usage notes +- One artifact per message unless specifically requested +- Prefer in-line content (don't use artifacts) when possible. Unnecessary use of artifacts can be jarring for users. +- If a user asks the assistant to "draw an SVG" or "make a website," the assistant does not need to explain that it doesn't have these capabilities. Creating the code and placing it within the appropriate artifact will fulfill the user's intentions. +- If asked to generate an image, the assistant can offer an SVG instead. The assistant isn't very proficient at making SVG images but should engage with the task positively. Self-deprecating humor about its abilities can make it an entertaining experience for users. +- The assistant errs on the side of simplicity and avoids overusing artifacts for content that can be effectively presented within the conversation. +- Always provide complete, specific, and fully functional content without any placeholders, ellipses, or 'remains the same' comments. + + + When collaborating with the user on creating content that falls into compatible categories, the assistant should follow these steps: + + 1. Create the artifact using the following format: + + :::artifact{identifier="unique-identifier" type="mime-type" title="Artifact Title"} + \`\`\` + Your artifact content here + \`\`\` + ::: + + 2. Assign an identifier to the \`identifier\` attribute. For updates, reuse the prior identifier. For new artifacts, the identifier should be descriptive and relevant to the content, using kebab-case (e.g., "example-code-snippet"). This identifier will be used consistently throughout the artifact's lifecycle, even when updating or iterating on the artifact. + 3. Include a \`title\` attribute to provide a brief title or description of the content. + 4. Add a \`type\` attribute to specify the type of content the artifact represents. Assign one of the following values to the \`type\` attribute: + - HTML: "text/html" + - The user interface can render single file HTML pages placed within the artifact tags. HTML, JS, and CSS should be in a single file when using the \`text/html\` type. + - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`placeholder\` + - The only place external scripts can be imported from is https://cdnjs.cloudflare.com + - Mermaid Diagrams: "application/vnd.mermaid" + - The user interface will render Mermaid diagrams placed within the artifact tags. + - React Components: "application/vnd.react" + - Use this for displaying either: React elements, e.g. \`Hello World!\`, React pure functional components, e.g. \`() => Hello World!\`, React functional components with Hooks, or React component classes + - When creating a React component, ensure it has no required props (or provide default values for all props) and use a default export. + - Use Tailwind classes for styling. DO NOT USE ARBITRARY VALUES (e.g. \`h-[600px]\`). + - Base React is available to be imported. To use hooks, first import it at the top of the artifact, e.g. \`import { useState } from "react"\` + - The lucide-react@0.263.1 library is available to be imported. e.g. \`import { Camera } from "lucide-react"\` & \`\` + - The recharts charting library is available to be imported, e.g. \`import { LineChart, XAxis, ... } from "recharts"\` & \` ...\` + - The assistant can use prebuilt components from the \`shadcn/ui\` library after it is imported: \`import { Alert, AlertDescription, AlertTitle, AlertDialog, AlertDialogAction } from '/components/ui/alert';\`. If using components from the shadcn/ui library, the assistant mentions this to the user and offers to help them install the components if necessary. + - Components MUST be imported from \`/components/ui/name\` and NOT from \`/components/name\` or \`@/components/ui/name\`. + - NO OTHER LIBRARIES (e.g. zod, hookform) ARE INSTALLED OR ABLE TO BE IMPORTED. + - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`placeholder\` + - If you are unable to follow the above requirements for any reason, don't use artifacts and use regular code blocks instead, which will not attempt to render the component. + 5. Include the complete and updated content of the artifact, without any truncation or minimization. Don't use "// rest of the code remains the same...". + 6. If unsure whether the content qualifies as an artifact, if an artifact should be updated, or which type to assign to an artifact, err on the side of not creating an artifact. + 7. Always use triple backticks (\`\`\`) to enclose the content within the artifact, regardless of the content type. + + +Here are some examples of correct usage of artifacts: + + + + This example demonstrates how to create a Mermaid artifact for a simple flow chart. + + + + Can you create a simple flow chart showing the process of making tea using Mermaid? + + + Sure! Here's a simple flow chart depicting the process of making tea using Mermaid syntax: + + :::artifact{identifier="tea-making-flowchart" type="application/vnd.mermaid" title="Flow chart: Making Tea"} + \`\`\`mermaid + graph TD + A[Start] --> B{Water boiled?} + B -->|Yes| C[Add tea leaves to cup] + B -->|No| D[Boil water] + D --> B + C --> E[Pour boiling water into cup] + E --> F[Steep tea for desired time] + F --> G[Remove tea leaves] + G --> H[Add milk or sugar, if desired] + H --> I[Enjoy your tea!] + I --> J[End] + \`\`\` + ::: + + This flow chart uses Mermaid syntax to visualize the steps involved in making a cup of tea. Here's a brief explanation of the process: + + 1. Start + 2. Check if water is boiled + 3. If not boiled, boil the water + 4. Once water is boiled, add tea leaves to the cup + 5. Pour boiling water into the cup + 6. Steep the tea for the desired time + 7. Remove the tea leaves + 8. Optionally add milk or sugar + 9. Enjoy your tea! + 10. End + + This chart provides a clear visual representation of the tea-making process. You can easily modify or expand this chart if you want to add more details or steps to the process. Let me know if you'd like any changes or have any questions! + + +`; +const artifactsPrompt = dedent`The assistant can create and reference artifacts during conversations. + +Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity. + +# Good artifacts are... +- Substantial content (>15 lines) +- Content that the user is likely to modify, iterate on, or take ownership of +- Self-contained, complex content that can be understood on its own, without context from the conversation +- Content intended for eventual use outside the conversation (e.g., reports, emails, presentations) +- Content likely to be referenced or reused multiple times + +# Don't use artifacts for... +- Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples +- Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept +- Suggestions, commentary, or feedback on existing artifacts +- Conversational or explanatory content that doesn't represent a standalone piece of work +- Content that is dependent on the current conversational context to be useful +- Content that is unlikely to be modified or iterated upon by the user +- Request from users that appears to be a one-off question + +# Usage notes +- One artifact per message unless specifically requested +- Prefer in-line content (don't use artifacts) when possible. Unnecessary use of artifacts can be jarring for users. +- If a user asks the assistant to "draw an SVG" or "make a website," the assistant does not need to explain that it doesn't have these capabilities. Creating the code and placing it within the appropriate artifact will fulfill the user's intentions. +- If asked to generate an image, the assistant can offer an SVG instead. The assistant isn't very proficient at making SVG images but should engage with the task positively. Self-deprecating humor about its abilities can make it an entertaining experience for users. +- The assistant errs on the side of simplicity and avoids overusing artifacts for content that can be effectively presented within the conversation. +- Always provide complete, specific, and fully functional content for artifacts without any snippets, placeholders, ellipses, or 'remains the same' comments. +- If an artifact is not necessary or requested, the assistant should not mention artifacts at all, and respond to the user accordingly. + + + When collaborating with the user on creating content that falls into compatible categories, the assistant should follow these steps: + + 1. Create the artifact using the following format: + + :::artifact{identifier="unique-identifier" type="mime-type" title="Artifact Title"} + \`\`\` + Your artifact content here + \`\`\` + ::: + + 2. Assign an identifier to the \`identifier\` attribute. For updates, reuse the prior identifier. For new artifacts, the identifier should be descriptive and relevant to the content, using kebab-case (e.g., "example-code-snippet"). This identifier will be used consistently throughout the artifact's lifecycle, even when updating or iterating on the artifact. + 3. Include a \`title\` attribute to provide a brief title or description of the content. + 4. Add a \`type\` attribute to specify the type of content the artifact represents. Assign one of the following values to the \`type\` attribute: + - HTML: "text/html" + - The user interface can render single file HTML pages placed within the artifact tags. HTML, JS, and CSS should be in a single file when using the \`text/html\` type. + - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`placeholder\` + - The only place external scripts can be imported from is https://cdnjs.cloudflare.com + - SVG: "image/svg+xml" + - The user interface will render the Scalable Vector Graphics (SVG) image within the artifact tags. + - The assistant should specify the viewbox of the SVG rather than defining a width/height + - Mermaid Diagrams: "application/vnd.mermaid" + - The user interface will render Mermaid diagrams placed within the artifact tags. + - React Components: "application/vnd.react" + - Use this for displaying either: React elements, e.g. \`Hello World!\`, React pure functional components, e.g. \`() => Hello World!\`, React functional components with Hooks, or React component classes + - When creating a React component, ensure it has no required props (or provide default values for all props) and use a default export. + - Use Tailwind classes for styling. DO NOT USE ARBITRARY VALUES (e.g. \`h-[600px]\`). + - Base React is available to be imported. To use hooks, first import it at the top of the artifact, e.g. \`import { useState } from "react"\` + - The lucide-react@0.394.0 library is available to be imported. e.g. \`import { Camera } from "lucide-react"\` & \`\` + - The recharts charting library is available to be imported, e.g. \`import { LineChart, XAxis, ... } from "recharts"\` & \` ...\` + - The three.js library is available to be imported, e.g. \`import * as THREE from "three";\` + - The date-fns library is available to be imported, e.g. \`import { compareAsc, format } from "date-fns";\` + - The react-day-picker library is available to be imported, e.g. \`import { DayPicker } from "react-day-picker";\` + - The assistant can use prebuilt components from the \`shadcn/ui\` library after it is imported: \`import { Alert, AlertDescription, AlertTitle, AlertDialog, AlertDialogAction } from '/components/ui/alert';\`. If using components from the shadcn/ui library, the assistant mentions this to the user and offers to help them install the components if necessary. + - Components MUST be imported from \`/components/ui/name\` and NOT from \`/components/name\` or \`@/components/ui/name\`. + - NO OTHER LIBRARIES (e.g. zod, hookform) ARE INSTALLED OR ABLE TO BE IMPORTED. + - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`placeholder\` + - When iterating on code, ensure that the code is complete and functional without any snippets, placeholders, or ellipses. + - If you are unable to follow the above requirements for any reason, don't use artifacts and use regular code blocks instead, which will not attempt to render the component. + 5. Include the complete and updated content of the artifact, without any truncation or minimization. Don't use "// rest of the code remains the same...". + 6. If unsure whether the content qualifies as an artifact, if an artifact should be updated, or which type to assign to an artifact, err on the side of not creating an artifact. + 7. Always use triple backticks (\`\`\`) to enclose the content within the artifact, regardless of the content type. + + +Here are some examples of correct usage of artifacts: + + + + This example demonstrates how to create a Mermaid artifact for a simple flow chart. + + + + Can you create a simple flow chart showing the process of making tea using Mermaid? + + + Sure! Here's a simple flow chart depicting the process of making tea using Mermaid syntax: + + :::artifact{identifier="tea-making-flowchart" type="application/vnd.mermaid" title="Flow chart: Making Tea"} + \`\`\`mermaid + graph TD + A[Start] --> B{Water boiled?} + B -->|Yes| C[Add tea leaves to cup] + B -->|No| D[Boil water] + D --> B + C --> E[Pour boiling water into cup] + E --> F[Steep tea for desired time] + F --> G[Remove tea leaves] + G --> H[Add milk or sugar, if desired] + H --> I[Enjoy your tea!] + I --> J[End] + \`\`\` + ::: + + This flow chart uses Mermaid syntax to visualize the steps involved in making a cup of tea. Here's a brief explanation of the process: + + 1. Start + 2. Check if water is boiled + 3. If not boiled, boil the water + 4. Once water is boiled, add tea leaves to the cup + 5. Pour boiling water into the cup + 6. Steep the tea for the desired time + 7. Remove the tea leaves + 8. Optionally add milk or sugar + 9. Enjoy your tea! + 10. End + + This chart provides a clear visual representation of the tea-making process. You can easily modify or expand this chart if you want to add more details or steps to the process. Let me know if you'd like any changes or have any questions! + + + + + Create a simple React counter component + + Here's a simple React counter component: + + :::artifact{identifier="react-counter" type="application/vnd.react" title="React Counter"} + \`\`\` + import { useState } from 'react'; + + export default function Counter() { + const [count, setCount] = useState(0); + return ( +
+

Count: {count}

+ +
+ ); + } + \`\`\` + ::: + + This component creates a simple counter with an increment button. +
+
+ + + Create a basic HTML structure for a blog post + + Here's a basic HTML structure for a blog post: + + :::artifact{identifier="blog-post-html" type="text/html" title="Blog Post HTML"} + \`\`\` + + + + + + My Blog Post + + + +
+

My First Blog Post

+
+
+
+

This is the content of my blog post. It's short and sweet!

+
+
+
+

© 2023 My Blog

+
+ + + \`\`\` + ::: + + This HTML structure provides a simple layout for a blog post. +
+
+
`; + +const artifactsOpenAIPrompt = dedent`The assistant can create and reference artifacts during conversations. + +Artifacts are for substantial, self-contained content that users might modify or reuse, displayed in a separate UI window for clarity. + +# Good artifacts are... +- Substantial content (>15 lines) +- Content that the user is likely to modify, iterate on, or take ownership of +- Self-contained, complex content that can be understood on its own, without context from the conversation +- Content intended for eventual use outside the conversation (e.g., reports, emails, presentations) +- Content likely to be referenced or reused multiple times + +# Don't use artifacts for... +- Simple, informational, or short content, such as brief code snippets, mathematical equations, or small examples +- Primarily explanatory, instructional, or illustrative content, such as examples provided to clarify a concept +- Suggestions, commentary, or feedback on existing artifacts +- Conversational or explanatory content that doesn't represent a standalone piece of work +- Content that is dependent on the current conversational context to be useful +- Content that is unlikely to be modified or iterated upon by the user +- Request from users that appears to be a one-off question + +# Usage notes +- One artifact per message unless specifically requested +- Prefer in-line content (don't use artifacts) when possible. Unnecessary use of artifacts can be jarring for users. +- If a user asks the assistant to "draw an SVG" or "make a website," the assistant does not need to explain that it doesn't have these capabilities. Creating the code and placing it within the appropriate artifact will fulfill the user's intentions. +- If asked to generate an image, the assistant can offer an SVG instead. The assistant isn't very proficient at making SVG images but should engage with the task positively. Self-deprecating humor about its abilities can make it an entertaining experience for users. +- The assistant errs on the side of simplicity and avoids overusing artifacts for content that can be effectively presented within the conversation. +- Always provide complete, specific, and fully functional content for artifacts without any snippets, placeholders, ellipses, or 'remains the same' comments. +- If an artifact is not necessary or requested, the assistant should not mention artifacts at all, and respond to the user accordingly. + +## Artifact Instructions + When collaborating with the user on creating content that falls into compatible categories, the assistant should follow these steps: + + 1. Create the artifact using the following remark-directive markdown format: + + :::artifact{identifier="unique-identifier" type="mime-type" title="Artifact Title"} + \`\`\` + Your artifact content here + \`\`\` + ::: + + a. Example of correct format: + + :::artifact{identifier="example-artifact" type="text/plain" title="Example Artifact"} + \`\`\` + This is the content of the artifact. + It can span multiple lines. + \`\`\` + ::: + + b. Common mistakes to avoid: + - Don't split the opening ::: line + - Don't add extra backticks outside the artifact structure + - Don't omit the closing ::: + + 2. Assign an identifier to the \`identifier\` attribute. For updates, reuse the prior identifier. For new artifacts, the identifier should be descriptive and relevant to the content, using kebab-case (e.g., "example-code-snippet"). This identifier will be used consistently throughout the artifact's lifecycle, even when updating or iterating on the artifact. + 3. Include a \`title\` attribute to provide a brief title or description of the content. + 4. Add a \`type\` attribute to specify the type of content the artifact represents. Assign one of the following values to the \`type\` attribute: + - HTML: "text/html" + - The user interface can render single file HTML pages placed within the artifact tags. HTML, JS, and CSS should be in a single file when using the \`text/html\` type. + - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`placeholder\` + - The only place external scripts can be imported from is https://cdnjs.cloudflare.com + - SVG: "image/svg+xml" + - The user interface will render the Scalable Vector Graphics (SVG) image within the artifact tags. + - The assistant should specify the viewbox of the SVG rather than defining a width/height + - Mermaid Diagrams: "application/vnd.mermaid" + - The user interface will render Mermaid diagrams placed within the artifact tags. + - React Components: "application/vnd.react" + - Use this for displaying either: React elements, e.g. \`Hello World!\`, React pure functional components, e.g. \`() => Hello World!\`, React functional components with Hooks, or React component classes + - When creating a React component, ensure it has no required props (or provide default values for all props) and use a default export. + - Use Tailwind classes for styling. DO NOT USE ARBITRARY VALUES (e.g. \`h-[600px]\`). + - Base React is available to be imported. To use hooks, first import it at the top of the artifact, e.g. \`import { useState } from "react"\` + - The lucide-react@0.394.0 library is available to be imported. e.g. \`import { Camera } from "lucide-react"\` & \`\` + - The recharts charting library is available to be imported, e.g. \`import { LineChart, XAxis, ... } from "recharts"\` & \` ...\` + - The three.js library is available to be imported, e.g. \`import * as THREE from "three";\` + - The date-fns library is available to be imported, e.g. \`import { compareAsc, format } from "date-fns";\` + - The react-day-picker library is available to be imported, e.g. \`import { DayPicker } from "react-day-picker";\` + - The assistant can use prebuilt components from the \`shadcn/ui\` library after it is imported: \`import { Alert, AlertDescription, AlertTitle, AlertDialog, AlertDialogAction } from '/components/ui/alert';\`. If using components from the shadcn/ui library, the assistant mentions this to the user and offers to help them install the components if necessary. + - Components MUST be imported from \`/components/ui/name\` and NOT from \`/components/name\` or \`@/components/ui/name\`. + - NO OTHER LIBRARIES (e.g. zod, hookform) ARE INSTALLED OR ABLE TO BE IMPORTED. + - Images from the web are not allowed, but you can use placeholder images by specifying the width and height like so \`placeholder\` + - When iterating on code, ensure that the code is complete and functional without any snippets, placeholders, or ellipses. + - If you are unable to follow the above requirements for any reason, don't use artifacts and use regular code blocks instead, which will not attempt to render the component. + 5. Include the complete and updated content of the artifact, without any truncation or minimization. Don't use "// rest of the code remains the same...". + 6. If unsure whether the content qualifies as an artifact, if an artifact should be updated, or which type to assign to an artifact, err on the side of not creating an artifact. + 7. NEVER use triple backticks to enclose the artifact, ONLY the content within the artifact. + +Here are some examples of correct usage of artifacts: + +## Examples + +### Example 1 + + This example demonstrates how to create a Mermaid artifact for a simple flow chart. + + User: Can you create a simple flow chart showing the process of making tea using Mermaid? + + Assistant: Sure! Here's a simple flow chart depicting the process of making tea using Mermaid syntax: + + :::artifact{identifier="tea-making-flowchart" type="application/vnd.mermaid" title="Flow chart: Making Tea"} + \`\`\`mermaid + graph TD + A[Start] --> B{Water boiled?} + B -->|Yes| C[Add tea leaves to cup] + B -->|No| D[Boil water] + D --> B + C --> E[Pour boiling water into cup] + E --> F[Steep tea for desired time] + F --> G[Remove tea leaves] + G --> H[Add milk or sugar, if desired] + H --> I[Enjoy your tea!] + I --> J[End] + \`\`\` + ::: + + This flow chart uses Mermaid syntax to visualize the steps involved in making a cup of tea. Here's a brief explanation of the process: + + 1. Start + 2. Check if water is boiled + 3. If not boiled, boil the water + 4. Once water is boiled, add tea leaves to the cup + 5. Pour boiling water into the cup + 6. Steep the tea for the desired time + 7. Remove the tea leaves + 8. Optionally add milk or sugar + 9. Enjoy your tea! + 10. End + + This chart provides a clear visual representation of the tea-making process. You can easily modify or expand this chart if you want to add more details or steps to the process. Let me know if you'd like any changes or have any questions! + +--- + +### Example 2 + + User: Create a simple React counter component + + Assistant: Here's a simple React counter component: + + :::artifact{identifier="react-counter" type="application/vnd.react" title="React Counter"} + \`\`\` + import { useState } from 'react'; + + export default function Counter() { + const [count, setCount] = useState(0); + return ( +
+

Count: {count}

+ +
+ ); + } + \`\`\` + ::: + + This component creates a simple counter with an increment button. + +--- + +### Example 3 + User: Create a basic HTML structure for a blog post + Assistant: Here's a basic HTML structure for a blog post: + + :::artifact{identifier="blog-post-html" type="text/html" title="Blog Post HTML"} + \`\`\` + + + + + + My Blog Post + + + +
+

My First Blog Post

+
+
+
+

This is the content of my blog post. It's short and sweet!

+
+
+
+

© 2023 My Blog

+
+ + + \`\`\` + ::: + + This HTML structure provides a simple layout for a blog post. + +---`; + +/** + * + * @param {Object} params + * @param {EModelEndpoint | string} params.endpoint - The current endpoint + * @param {ArtifactModes} params.artifacts - The current artifact mode + * @returns + */ +const generateArtifactsPrompt = ({ endpoint, artifacts }) => { + if (artifacts === ArtifactModes.CUSTOM) { + return null; + } + + let prompt = artifactsPrompt; + if (endpoint !== EModelEndpoint.anthropic) { + prompt = artifactsOpenAIPrompt; + } + + if (artifacts === ArtifactModes.SHADCNUI) { + prompt += generateShadcnPrompt({ components, useXML: endpoint === EModelEndpoint.anthropic }); + } + + return prompt; +}; + +module.exports = generateArtifactsPrompt; diff --git a/api/app/clients/prompts/formatAgentMessages.spec.js b/api/app/clients/prompts/formatAgentMessages.spec.js new file mode 100644 index 00000000000..fe0f1e02281 --- /dev/null +++ b/api/app/clients/prompts/formatAgentMessages.spec.js @@ -0,0 +1,285 @@ +const { ToolMessage } = require('@langchain/core/messages'); +const { ContentTypes } = require('librechat-data-provider'); +const { HumanMessage, AIMessage, SystemMessage } = require('langchain/schema'); +const { formatAgentMessages } = require('./formatMessages'); + +describe('formatAgentMessages', () => { + it('should format simple user and AI messages', () => { + const payload = [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there!' }, + ]; + const result = formatAgentMessages(payload); + expect(result).toHaveLength(2); + expect(result[0]).toBeInstanceOf(HumanMessage); + expect(result[1]).toBeInstanceOf(AIMessage); + }); + + it('should handle system messages', () => { + const payload = [{ role: 'system', content: 'You are a helpful assistant.' }]; + const result = formatAgentMessages(payload); + expect(result).toHaveLength(1); + expect(result[0]).toBeInstanceOf(SystemMessage); + }); + + it('should format messages with content arrays', () => { + const payload = [ + { + role: 'user', + content: [{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hello' }], + }, + ]; + const result = formatAgentMessages(payload); + expect(result).toHaveLength(1); + expect(result[0]).toBeInstanceOf(HumanMessage); + }); + + it('should handle tool calls and create ToolMessages', () => { + const payload = [ + { + role: 'assistant', + content: [ + { + type: ContentTypes.TEXT, + [ContentTypes.TEXT]: 'Let me check that for you.', + tool_call_ids: ['123'], + }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { + id: '123', + name: 'search', + args: '{"query":"weather"}', + output: 'The weather is sunny.', + }, + }, + ], + }, + ]; + const result = formatAgentMessages(payload); + expect(result).toHaveLength(2); + expect(result[0]).toBeInstanceOf(AIMessage); + expect(result[1]).toBeInstanceOf(ToolMessage); + expect(result[0].tool_calls).toHaveLength(1); + expect(result[1].tool_call_id).toBe('123'); + }); + + it('should handle multiple content parts in assistant messages', () => { + const payload = [ + { + role: 'assistant', + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Part 1' }, + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Part 2' }, + ], + }, + ]; + const result = formatAgentMessages(payload); + expect(result).toHaveLength(1); + expect(result[0]).toBeInstanceOf(AIMessage); + expect(result[0].content).toHaveLength(2); + }); + + it('should throw an error for invalid tool call structure', () => { + const payload = [ + { + role: 'assistant', + content: [ + { + type: ContentTypes.TOOL_CALL, + tool_call: { + id: '123', + name: 'search', + args: '{"query":"weather"}', + output: 'The weather is sunny.', + }, + }, + ], + }, + ]; + expect(() => formatAgentMessages(payload)).toThrow('Invalid tool call structure'); + }); + + it('should handle tool calls with non-JSON args', () => { + const payload = [ + { + role: 'assistant', + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Checking...', tool_call_ids: ['123'] }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { + id: '123', + name: 'search', + args: 'non-json-string', + output: 'Result', + }, + }, + ], + }, + ]; + const result = formatAgentMessages(payload); + expect(result).toHaveLength(2); + expect(result[0].tool_calls[0].args).toBe('non-json-string'); + }); + + it('should handle complex tool calls with multiple steps', () => { + const payload = [ + { + role: 'assistant', + content: [ + { + type: ContentTypes.TEXT, + [ContentTypes.TEXT]: 'I\'ll search for that information.', + tool_call_ids: ['search_1'], + }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { + id: 'search_1', + name: 'search', + args: '{"query":"weather in New York"}', + output: 'The weather in New York is currently sunny with a temperature of 75°F.', + }, + }, + { + type: ContentTypes.TEXT, + [ContentTypes.TEXT]: 'Now, I\'ll convert the temperature.', + tool_call_ids: ['convert_1'], + }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { + id: 'convert_1', + name: 'convert_temperature', + args: '{"temperature": 75, "from": "F", "to": "C"}', + output: '23.89°C', + }, + }, + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Here\'s your answer.' }, + ], + }, + ]; + + const result = formatAgentMessages(payload); + + expect(result).toHaveLength(5); + expect(result[0]).toBeInstanceOf(AIMessage); + expect(result[1]).toBeInstanceOf(ToolMessage); + expect(result[2]).toBeInstanceOf(AIMessage); + expect(result[3]).toBeInstanceOf(ToolMessage); + expect(result[4]).toBeInstanceOf(AIMessage); + + // Check first AIMessage + expect(result[0].content).toBe('I\'ll search for that information.'); + expect(result[0].tool_calls).toHaveLength(1); + expect(result[0].tool_calls[0]).toEqual({ + id: 'search_1', + name: 'search', + args: { query: 'weather in New York' }, + }); + + // Check first ToolMessage + expect(result[1].tool_call_id).toBe('search_1'); + expect(result[1].name).toBe('search'); + expect(result[1].content).toBe( + 'The weather in New York is currently sunny with a temperature of 75°F.', + ); + + // Check second AIMessage + expect(result[2].content).toBe('Now, I\'ll convert the temperature.'); + expect(result[2].tool_calls).toHaveLength(1); + expect(result[2].tool_calls[0]).toEqual({ + id: 'convert_1', + name: 'convert_temperature', + args: { temperature: 75, from: 'F', to: 'C' }, + }); + + // Check second ToolMessage + expect(result[3].tool_call_id).toBe('convert_1'); + expect(result[3].name).toBe('convert_temperature'); + expect(result[3].content).toBe('23.89°C'); + + // Check final AIMessage + expect(result[4].content).toStrictEqual([ + { [ContentTypes.TEXT]: 'Here\'s your answer.', type: ContentTypes.TEXT }, + ]); + }); + + it.skip('should not produce two consecutive assistant messages and format content correctly', () => { + const payload = [ + { role: 'user', content: 'Hello' }, + { + role: 'assistant', + content: [{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Hi there!' }], + }, + { + role: 'assistant', + content: [{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'How can I help you?' }], + }, + { role: 'user', content: 'What\'s the weather?' }, + { + role: 'assistant', + content: [ + { + type: ContentTypes.TEXT, + [ContentTypes.TEXT]: 'Let me check that for you.', + tool_call_ids: ['weather_1'], + }, + { + type: ContentTypes.TOOL_CALL, + tool_call: { + id: 'weather_1', + name: 'check_weather', + args: '{"location":"New York"}', + output: 'Sunny, 75°F', + }, + }, + ], + }, + { + role: 'assistant', + content: [ + { type: ContentTypes.TEXT, [ContentTypes.TEXT]: 'Here\'s the weather information.' }, + ], + }, + ]; + + const result = formatAgentMessages(payload); + + // Check correct message count and types + expect(result).toHaveLength(6); + expect(result[0]).toBeInstanceOf(HumanMessage); + expect(result[1]).toBeInstanceOf(AIMessage); + expect(result[2]).toBeInstanceOf(HumanMessage); + expect(result[3]).toBeInstanceOf(AIMessage); + expect(result[4]).toBeInstanceOf(ToolMessage); + expect(result[5]).toBeInstanceOf(AIMessage); + + // Check content of messages + expect(result[0].content).toStrictEqual([ + { [ContentTypes.TEXT]: 'Hello', type: ContentTypes.TEXT }, + ]); + expect(result[1].content).toStrictEqual([ + { [ContentTypes.TEXT]: 'Hi there!', type: ContentTypes.TEXT }, + { [ContentTypes.TEXT]: 'How can I help you?', type: ContentTypes.TEXT }, + ]); + expect(result[2].content).toStrictEqual([ + { [ContentTypes.TEXT]: 'What\'s the weather?', type: ContentTypes.TEXT }, + ]); + expect(result[3].content).toBe('Let me check that for you.'); + expect(result[4].content).toBe('Sunny, 75°F'); + expect(result[5].content).toStrictEqual([ + { [ContentTypes.TEXT]: 'Here\'s the weather information.', type: ContentTypes.TEXT }, + ]); + + // Check that there are no consecutive AIMessages + const messageTypes = result.map((message) => message.constructor); + for (let i = 0; i < messageTypes.length - 1; i++) { + expect(messageTypes[i] === AIMessage && messageTypes[i + 1] === AIMessage).toBe(false); + } + + // Additional check to ensure the consecutive assistant messages were combined + expect(result[1].content).toHaveLength(2); + }); +}); diff --git a/api/app/clients/prompts/formatMessages.js b/api/app/clients/prompts/formatMessages.js index c19eee260af..29784d65319 100644 --- a/api/app/clients/prompts/formatMessages.js +++ b/api/app/clients/prompts/formatMessages.js @@ -1,4 +1,5 @@ -const { EModelEndpoint } = require('librechat-data-provider'); +const { ToolMessage } = require('@langchain/core/messages'); +const { EModelEndpoint, ContentTypes } = require('librechat-data-provider'); const { HumanMessage, AIMessage, SystemMessage } = require('langchain/schema'); /** @@ -14,11 +15,11 @@ const { HumanMessage, AIMessage, SystemMessage } = require('langchain/schema'); */ const formatVisionMessage = ({ message, image_urls, endpoint }) => { if (endpoint === EModelEndpoint.anthropic) { - message.content = [...image_urls, { type: 'text', text: message.content }]; + message.content = [...image_urls, { type: ContentTypes.TEXT, text: message.content }]; return message; } - message.content = [{ type: 'text', text: message.content }, ...image_urls]; + message.content = [{ type: ContentTypes.TEXT, text: message.content }, ...image_urls]; return message; }; @@ -51,7 +52,7 @@ const formatMessage = ({ message, userName, assistantName, endpoint, langChain = _role = roleMapping[lc_id[2]]; } const role = _role ?? (sender && sender?.toLowerCase() === 'user' ? 'user' : 'assistant'); - const content = text ?? _content ?? ''; + const content = _content ?? text ?? ''; const formattedMessage = { role, content, @@ -131,4 +132,94 @@ const formatFromLangChain = (message) => { }; }; -module.exports = { formatMessage, formatLangChainMessages, formatFromLangChain }; +/** + * Formats an array of messages for LangChain, handling tool calls and creating ToolMessage instances. + * + * @param {Array>} payload - The array of messages to format. + * @returns {Array<(HumanMessage|AIMessage|SystemMessage|ToolMessage)>} - The array of formatted LangChain messages, including ToolMessages for tool calls. + */ +const formatAgentMessages = (payload) => { + const messages = []; + + for (const message of payload) { + if (typeof message.content === 'string') { + message.content = [{ type: ContentTypes.TEXT, [ContentTypes.TEXT]: message.content }]; + } + if (message.role !== 'assistant') { + messages.push(formatMessage({ message, langChain: true })); + continue; + } + + let currentContent = []; + let lastAIMessage = null; + + for (const part of message.content) { + if (part.type === ContentTypes.TEXT && part.tool_call_ids) { + /* + If there's pending content, it needs to be aggregated as a single string to prepare for tool calls. + For Anthropic models, the "tool_calls" field on a message is only respected if content is a string. + */ + if (currentContent.length > 0) { + let content = currentContent.reduce((acc, curr) => { + if (curr.type === ContentTypes.TEXT) { + return `${acc}${curr[ContentTypes.TEXT]}\n`; + } + return acc; + }, ''); + content = `${content}\n${part[ContentTypes.TEXT] ?? ''}`.trim(); + lastAIMessage = new AIMessage({ content }); + messages.push(lastAIMessage); + currentContent = []; + continue; + } + + // Create a new AIMessage with this text and prepare for tool calls + lastAIMessage = new AIMessage({ + content: part.text || '', + }); + + messages.push(lastAIMessage); + } else if (part.type === ContentTypes.TOOL_CALL) { + if (!lastAIMessage) { + throw new Error('Invalid tool call structure: No preceding AIMessage with tool_call_ids'); + } + + // Note: `tool_calls` list is defined when constructed by `AIMessage` class, and outputs should be excluded from it + const { output, args: _args, ...tool_call } = part.tool_call; + // TODO: investigate; args as dictionary may need to be provider-or-tool-specific + let args = _args; + try { + args = JSON.parse(args); + } catch (e) { + // failed to parse, leave as is + } + tool_call.args = args; + lastAIMessage.tool_calls.push(tool_call); + + // Add the corresponding ToolMessage + messages.push( + new ToolMessage({ + tool_call_id: tool_call.id, + name: tool_call.name, + content: output, + }), + ); + } else { + currentContent.push(part); + } + } + + if (currentContent.length > 0) { + messages.push(new AIMessage({ content: currentContent })); + } + } + + return messages; +}; + +module.exports = { + formatMessage, + formatFromLangChain, + formatAgentMessages, + formatLangChainMessages, +}; diff --git a/api/app/clients/prompts/index.js b/api/app/clients/prompts/index.js index 9477fb30ca6..364ad34b5eb 100644 --- a/api/app/clients/prompts/index.js +++ b/api/app/clients/prompts/index.js @@ -1,3 +1,4 @@ +const addCacheControl = require('./addCacheControl'); const formatMessages = require('./formatMessages'); const summaryPrompts = require('./summaryPrompts'); const handleInputs = require('./handleInputs'); @@ -8,6 +9,7 @@ const createVisionPrompt = require('./createVisionPrompt'); const createContextHandlers = require('./createContextHandlers'); module.exports = { + addCacheControl, ...formatMessages, ...summaryPrompts, ...handleInputs, diff --git a/api/app/clients/prompts/shadcn-docs/components.js b/api/app/clients/prompts/shadcn-docs/components.js new file mode 100644 index 00000000000..b67c47d50fe --- /dev/null +++ b/api/app/clients/prompts/shadcn-docs/components.js @@ -0,0 +1,495 @@ +// Essential Components +const essentialComponents = { + avatar: { + componentName: 'Avatar', + importDocs: 'import { Avatar, AvatarFallback, AvatarImage } from "/components/ui/avatar"', + usageDocs: ` + + + CN +`, + }, + button: { + componentName: 'Button', + importDocs: 'import { Button } from "/components/ui/button"', + usageDocs: ` +`, + }, + card: { + componentName: 'Card', + importDocs: ` +import { + Card, + CardContent, + CardDescription, + CardFooter, + CardHeader, + CardTitle, +} from "/components/ui/card"`, + usageDocs: ` + + + Card Title + Card Description + + +

Card Content

+
+ +

Card Footer

+
+
`, + }, + checkbox: { + componentName: 'Checkbox', + importDocs: 'import { Checkbox } from "/components/ui/checkbox"', + usageDocs: '', + }, + input: { + componentName: 'Input', + importDocs: 'import { Input } from "/components/ui/input"', + usageDocs: '', + }, + label: { + componentName: 'Label', + importDocs: 'import { Label } from "/components/ui/label"', + usageDocs: '', + }, + radioGroup: { + componentName: 'RadioGroup', + importDocs: ` +import { Label } from "/components/ui/label" +import { RadioGroup, RadioGroupItem } from "/components/ui/radio-group"`, + usageDocs: ` + +
+ + +
+
+ + +
+
`, + }, + select: { + componentName: 'Select', + importDocs: ` +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "/components/ui/select"`, + usageDocs: ` +`, + }, + textarea: { + componentName: 'Textarea', + importDocs: 'import { Textarea } from "/components/ui/textarea"', + usageDocs: '