From 8f4c7daa979521c6d602accf0a37217b5fcfc176 Mon Sep 17 00:00:00 2001 From: Mayo Date: Tue, 21 Mar 2023 22:47:49 +0100 Subject: [PATCH] add chunk loop to prevent pinecone upsert errors --- README.md | 22 ++++++++++++++++++++++ config/pinecone.ts | 2 +- package.json | 2 +- pnpm-lock.yaml | 16 ++-------------- scripts/ingest-data.ts | 22 +++++++++++++++------- 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index a7cadbb1f..e4d796977 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. L The visual guide of this repo and tutorial is in the `visual guide` folder. +**If you run into errors, please review the troubleshooting section further down this page.** + ## Development 1. Clone the repo @@ -58,6 +60,26 @@ PINECONE_ENVIRONMENT= Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `npm run dev` to launch the local dev environment and then type a question in the chat interface. +## Troubleshooting + +In general, keep an eye out in the `issues` and `discussions` section of this repo for solutions. + +**General errors** + +- Make sure you're running the latest Node version. Run `node -v` +- Make sure you're using the same versions of LangChain and Pinecone as this repo. +- Check that you've created an `.env` file that contains your valid (and working) API keys. +- If you change `modelName` in `OpenAIChat` note that the correct name of the alternative model is `gpt-3.5-turbo` +- Pinecone indexes of users on the Starter(free) plan are deleted after 7 days of inactivity. To prevent this, send an API request to Pinecone to reset the counter. + +**Pinecone errors** + +- Make sure your pinecone dashboard `environment` and `index` matches the one in your `config` folder. +- Check that you've set the vector dimensions to `1536`. +- Switch your Environment in pinecone to `us-east1-gcp` if the other environment is causing issues. + +If you're stuck after trying all these steps, delete `node_modules`, restart your computer, then `pnpm install` again. + ## Credit Frontend of this repo is inspired by [langchain-chat-nextjs](https://github.com/zahidkhawaja/langchain-chat-nextjs) diff --git a/config/pinecone.ts b/config/pinecone.ts index 2de6181d4..f1851c8da 100644 --- a/config/pinecone.ts +++ b/config/pinecone.ts @@ -4,6 +4,6 @@ const PINECONE_INDEX_NAME = 'langchainjsfundamentals'; -const PINECONE_NAME_SPACE = 'demo'; //namespace is optional for your vectors +const PINECONE_NAME_SPACE = 'pdf-test'; //namespace is optional for your vectors export { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE }; diff --git a/package.json b/package.json index db4c411e8..8d10f96c9 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,7 @@ "@radix-ui/react-accordion": "^1.1.1", "clsx": "^1.2.1", "dotenv": "^16.0.3", - "langchain": "^0.0.33", + "langchain": "0.0.33", "lucide-react": "^0.125.0", "next": "13.2.3", "pdf-parse": "1.1.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4e92e69e6..24cdbcfa6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -13,7 +13,7 @@ specifiers: dotenv: ^16.0.3 eslint: 8.35.0 eslint-config-next: 13.2.3 - langchain: ^0.0.33 + langchain: 0.0.33 lucide-react: ^0.125.0 next: 13.2.3 pdf-parse: 1.1.1 @@ -337,7 +337,7 @@ packages: detect-libc: 2.0.1 https-proxy-agent: 5.0.1 make-dir: 3.1.0 - node-fetch: 2.6.9 + node-fetch: 2.6.7 nopt: 5.0.0 npmlog: 5.0.1 rimraf: 3.0.2 @@ -3230,18 +3230,6 @@ packages: whatwg-url: 5.0.0 dev: false - /node-fetch/2.6.9: - resolution: {integrity: sha512-DJm/CJkZkRjKKj4Zi4BsKVZh3ValV5IR5s7LVZnW+6YMh0W1BfNA8XSs6DLMGYlId5F3KnA70uu2qepcR08Qqg==} - engines: {node: 4.x || >=6.0.0} - peerDependencies: - encoding: ^0.1.0 - peerDependenciesMeta: - encoding: - optional: true - dependencies: - whatwg-url: 5.0.0 - dev: false - /node-gyp/8.4.1: resolution: {integrity: sha512-olTJRgUtAb/hOXG0E93wZDs5YiJlgbXxTwQAFHyNlRsXQnYzUaF2aGgujZbw+hR8aF4ZG/rST57bWMWD16jr9w==} engines: {node: '>= 10.12.0'} diff --git a/scripts/ingest-data.ts b/scripts/ingest-data.ts index 4a7d16ddf..392cfd5a9 100644 --- a/scripts/ingest-data.ts +++ b/scripts/ingest-data.ts @@ -30,14 +30,22 @@ export const run = async () => { /*create and store the embeddings in the vectorStore*/ const embeddings = new OpenAIEmbeddings(); const index = pinecone.Index(PINECONE_INDEX_NAME); //change to your own index name + //embed the PDF documents - await PineconeStore.fromDocuments( - index, - docs, - embeddings, - 'text', - PINECONE_NAME_SPACE, - ); + + /* Pinecone recommends a limit of 100 vectors per upsert request to avoid errors*/ + const chunkSize = 50; + for (let i = 0; i < docs.length; i += chunkSize) { + const chunk = docs.slice(i, i + chunkSize); + console.log('chunk', i, chunk); + await PineconeStore.fromDocuments( + index, + chunk, + embeddings, + 'text', + PINECONE_NAME_SPACE, + ); + } } catch (error) { console.log('error', error); throw new Error('Failed to ingest your data');