From fd1020b0a2d04a091c3d939a202015d19c5b960e Mon Sep 17 00:00:00 2001 From: Mayo Date: Fri, 17 Mar 2023 02:23:33 +0100 Subject: [PATCH] first commit --- .env.example | 6 + .eslintrc.json | 3 + .gitignore | 40 + .prettierrc | 6 + README.md | 63 + components/layout.tsx | 24 + components/ui/LoadingDots.tsx | 23 + components/ui/TextArea.tsx | 23 + components/ui/accordion.tsx | 61 + config/pinecone.ts | 9 + docs/MorseVsFrederick.pdf | Bin 0 -> 291943 bytes next.config.js | 11 + package.json | 58 + pages/_app.tsx | 20 + pages/_document.tsx | 13 + pages/api/chat.ts | 62 + pages/index.tsx | 309 ++ pnpm-lock.yaml | 4606 ++++++++++++++++++++++++++++ postcss.config.cjs | 6 + public/bot-image.png | Bin 0 -> 9184 bytes public/favicon.ico | Bin 0 -> 25931 bytes public/usericon.png | Bin 0 -> 16770 bytes scripts/ingest-data.ts | 50 + styles/Home.module.css | 262 ++ styles/base.css | 3 + styles/chrome-bug.css | 12 + styles/loading-dots.module.css | 69 + tailwind.config.cjs | 11 + tsconfig.json | 29 + types/chat.ts | 8 + utils/cn.ts | 6 + utils/makechain.ts | 60 + utils/openai-client.ts | 9 + utils/pinecone-client.ts | 24 + visual-guide/gpt-langchain-pdf.png | Bin 0 -> 1522633 bytes 35 files changed, 5886 insertions(+) create mode 100644 .env.example create mode 100644 .eslintrc.json create mode 100644 .gitignore create mode 100644 .prettierrc create mode 100644 README.md create mode 100644 components/layout.tsx create mode 100644 components/ui/LoadingDots.tsx create mode 100644 components/ui/TextArea.tsx create mode 100644 components/ui/accordion.tsx create mode 100644 config/pinecone.ts create mode 100644 docs/MorseVsFrederick.pdf create mode 100644 next.config.js create mode 100644 package.json create mode 100644 pages/_app.tsx create mode 100644 pages/_document.tsx create mode 100644 pages/api/chat.ts create mode 100644 pages/index.tsx create mode 100644 pnpm-lock.yaml create mode 100644 postcss.config.cjs create mode 100644 public/bot-image.png create mode 100644 public/favicon.ico create mode 100644 public/usericon.png create mode 100644 scripts/ingest-data.ts create mode 100644 styles/Home.module.css create mode 100644 styles/base.css create mode 100644 styles/chrome-bug.css create mode 100644 styles/loading-dots.module.css create mode 100644 tailwind.config.cjs create mode 100644 tsconfig.json create mode 100644 types/chat.ts create mode 100644 utils/cn.ts create mode 100644 utils/makechain.ts create mode 100644 utils/openai-client.ts create mode 100644 utils/pinecone-client.ts create mode 100644 visual-guide/gpt-langchain-pdf.png diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..77bb5630e --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +OPENAI_API_KEY= + +# Update these with your Supabase details from your project settings > API +PINECONE_API_KEY= +PINECONE_ENVIRONMENT= + diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 000000000..bffb357a7 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,3 @@ +{ + "extends": "next/core-web-vitals" +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..1759b24fb --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.js + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# local env files +.env*.local +.env + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts + +#Notion_db +/Notion_DB diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 000000000..0238bf0f0 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,6 @@ +{ + "trailingComma": "all", + "singleQuote": true, + "printWidth": 80, + "tabWidth": 2 +} diff --git a/README.md b/README.md new file mode 100644 index 000000000..ce318e9c5 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +# GPT-4 & LangChain - Create a ChatGPT Chatbot for Your PDF Docs + +Use the new GPT-4 api to build a chatGPT chatbot for Large PDF docs (56 pages used in this example). + +Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Pinecone is a vectorstore for storing embeddings and your PDF in text to later retrieve similar docs. + +[Tutorial video](https://www.youtube.com/watch?v=ih9PBGVVOO4) + +[Get in touch via twitter if you have questions](https://twitter.com/mayowaoshin) + +The visual guide of this repo and tutorial is in the `visual guide` folder. + +## Development + +1. Clone the repo + +``` +git clone [github https url] +``` + +2. Install packages + +``` +pnpm install +``` + +3. Set up your `.env` file + +- Copy `.env.example` into `.env` + Your `.env` file should look like this: + +``` +OPENAI_API_KEY= + +PINECONE_API_KEY= +PINECONE_ENVIRONMENT= + +``` + +- Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file. +- Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys. + +4. In the `config` folder, replace the `PINECONE_INDEX_NAME` and `PINECONE_NAME_SPACE` with your own details from your pinecone dashboard. + +5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAIChat` to a different api model if you don't have access to `gpt-4`. + +## Convert your PDF to embeddings + +1. In `docs` folder replace the pdf with your own pdf doc. + +2. In `scripts/ingest-data.ts` replace `filePath` with `docs/{yourdocname}.pdf` + +3. Run the script `npm run ingest` to 'ingest' and embed your docs + +4. Check Pinecone dashboard to verify your namespace and vectors have been added. + +## Run the app + +Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `npm run dev` to launch the local dev environment and then type a question in the chat interface. + +## Credit + +Frontend of this repo is inspired by [langchain-chat-nextjs](https://github.com/zahidkhawaja/langchain-chat-nextjs) diff --git a/components/layout.tsx b/components/layout.tsx new file mode 100644 index 000000000..4481b4dcb --- /dev/null +++ b/components/layout.tsx @@ -0,0 +1,24 @@ +interface LayoutProps { + children?: React.ReactNode; +} + +export default function Layout({ children }: LayoutProps) { + return ( +
+
+
+ +
+
+
+
+ {children} +
+
+
+ ); +} diff --git a/components/ui/LoadingDots.tsx b/components/ui/LoadingDots.tsx new file mode 100644 index 000000000..46f2b9167 --- /dev/null +++ b/components/ui/LoadingDots.tsx @@ -0,0 +1,23 @@ +import styles from '@/styles/loading-dots.module.css'; + +const LoadingDots = ({ + color = '#000', + style = 'small', +}: { + color: string; + style: string; +}) => { + return ( + + + + + + ); +}; + +export default LoadingDots; + +LoadingDots.defaultProps = { + style: 'small', +}; diff --git a/components/ui/TextArea.tsx b/components/ui/TextArea.tsx new file mode 100644 index 000000000..aa1f87a9c --- /dev/null +++ b/components/ui/TextArea.tsx @@ -0,0 +1,23 @@ +import * as React from 'react'; +import { cn } from '@/utils/cn'; + +export interface TextareaProps + extends React.TextareaHTMLAttributes {} + +const Textarea = React.forwardRef( + ({ className, ...props }, ref) => { + return ( +