forked from mayooear/gpt4-pdf-chatbot-langchain
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit fd1020b
Showing
35 changed files
with
5,886 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
OPENAI_API_KEY= | ||
|
||
# Update these with your Supabase details from your project settings > API | ||
PINECONE_API_KEY= | ||
PINECONE_ENVIRONMENT= | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"extends": "next/core-web-vitals" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. | ||
|
||
# dependencies | ||
/node_modules | ||
/.pnp | ||
.pnp.js | ||
|
||
# testing | ||
/coverage | ||
|
||
# next.js | ||
/.next/ | ||
/out/ | ||
|
||
# production | ||
/build | ||
|
||
# misc | ||
.DS_Store | ||
*.pem | ||
|
||
# debug | ||
npm-debug.log* | ||
yarn-debug.log* | ||
yarn-error.log* | ||
.pnpm-debug.log* | ||
|
||
# local env files | ||
.env*.local | ||
.env | ||
|
||
# vercel | ||
.vercel | ||
|
||
# typescript | ||
*.tsbuildinfo | ||
next-env.d.ts | ||
|
||
#Notion_db | ||
/Notion_DB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"trailingComma": "all", | ||
"singleQuote": true, | ||
"printWidth": 80, | ||
"tabWidth": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# GPT-4 & LangChain - Create a ChatGPT Chatbot for Your PDF Docs | ||
|
||
Use the new GPT-4 api to build a chatGPT chatbot for Large PDF docs (56 pages used in this example). | ||
|
||
Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Pinecone is a vectorstore for storing embeddings and your PDF in text to later retrieve similar docs. | ||
|
||
[Tutorial video](https://www.youtube.com/watch?v=ih9PBGVVOO4) | ||
|
||
[Get in touch via twitter if you have questions](https://twitter.com/mayowaoshin) | ||
|
||
The visual guide of this repo and tutorial is in the `visual guide` folder. | ||
|
||
## Development | ||
|
||
1. Clone the repo | ||
|
||
``` | ||
git clone [github https url] | ||
``` | ||
|
||
2. Install packages | ||
|
||
``` | ||
pnpm install | ||
``` | ||
|
||
3. Set up your `.env` file | ||
|
||
- Copy `.env.example` into `.env` | ||
Your `.env` file should look like this: | ||
|
||
``` | ||
OPENAI_API_KEY= | ||
PINECONE_API_KEY= | ||
PINECONE_ENVIRONMENT= | ||
``` | ||
|
||
- Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file. | ||
- Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys. | ||
|
||
4. In the `config` folder, replace the `PINECONE_INDEX_NAME` and `PINECONE_NAME_SPACE` with your own details from your pinecone dashboard. | ||
|
||
5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAIChat` to a different api model if you don't have access to `gpt-4`. | ||
|
||
## Convert your PDF to embeddings | ||
|
||
1. In `docs` folder replace the pdf with your own pdf doc. | ||
|
||
2. In `scripts/ingest-data.ts` replace `filePath` with `docs/{yourdocname}.pdf` | ||
|
||
3. Run the script `npm run ingest` to 'ingest' and embed your docs | ||
|
||
4. Check Pinecone dashboard to verify your namespace and vectors have been added. | ||
|
||
## Run the app | ||
|
||
Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `npm run dev` to launch the local dev environment and then type a question in the chat interface. | ||
|
||
## Credit | ||
|
||
Frontend of this repo is inspired by [langchain-chat-nextjs](https://github.com/zahidkhawaja/langchain-chat-nextjs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
interface LayoutProps { | ||
children?: React.ReactNode; | ||
} | ||
|
||
export default function Layout({ children }: LayoutProps) { | ||
return ( | ||
<div className="mx-auto flex flex-col space-y-4"> | ||
<header className="container sticky top-0 z-40 bg-white"> | ||
<div className="h-16 border-b border-b-slate-200 py-4"> | ||
<nav className="ml-4 pl-6"> | ||
<a href="#" className="hover:text-slate-600 cursor-pointer"> | ||
Home | ||
</a> | ||
</nav> | ||
</div> | ||
</header> | ||
<div className="container"> | ||
<main className="flex w-full flex-1 flex-col overflow-hidden"> | ||
{children} | ||
</main> | ||
</div> | ||
</div> | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import styles from '@/styles/loading-dots.module.css'; | ||
|
||
const LoadingDots = ({ | ||
color = '#000', | ||
style = 'small', | ||
}: { | ||
color: string; | ||
style: string; | ||
}) => { | ||
return ( | ||
<span className={style == 'small' ? styles.loading2 : styles.loading}> | ||
<span style={{ backgroundColor: color }} /> | ||
<span style={{ backgroundColor: color }} /> | ||
<span style={{ backgroundColor: color }} /> | ||
</span> | ||
); | ||
}; | ||
|
||
export default LoadingDots; | ||
|
||
LoadingDots.defaultProps = { | ||
style: 'small', | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import * as React from 'react'; | ||
import { cn } from '@/utils/cn'; | ||
|
||
export interface TextareaProps | ||
extends React.TextareaHTMLAttributes<HTMLTextAreaElement> {} | ||
|
||
const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>( | ||
({ className, ...props }, ref) => { | ||
return ( | ||
<textarea | ||
className={cn( | ||
'flex h-20 w-full rounded-md border border-slate-300 bg-transparent py-2 px-3 text-sm placeholder:text-slate-400 focus:outline-none focus:ring-2 focus:ring-slate-400 focus:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 dark:border-slate-700 dark:text-slate-50 dark:focus:ring-slate-400 dark:focus:ring-offset-slate-900', | ||
className, | ||
)} | ||
ref={ref} | ||
{...props} | ||
/> | ||
); | ||
}, | ||
); | ||
Textarea.displayName = 'Textarea'; | ||
|
||
export { Textarea }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import * as React from 'react'; | ||
import * as AccordionPrimitive from '@radix-ui/react-accordion'; | ||
import { ChevronDown } from 'lucide-react'; | ||
|
||
import { cn } from '@/utils/cn'; | ||
|
||
const Accordion = AccordionPrimitive.Root; | ||
|
||
const AccordionItem = React.forwardRef< | ||
React.ElementRef<typeof AccordionPrimitive.Item>, | ||
React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Item> | ||
>(({ className, ...props }, ref) => ( | ||
<AccordionPrimitive.Item | ||
ref={ref} | ||
className={cn( | ||
'border-b border-b-slate-200 dark:border-b-slate-700', | ||
className, | ||
)} | ||
{...props} | ||
/> | ||
)); | ||
AccordionItem.displayName = 'AccordionItem'; | ||
|
||
const AccordionTrigger = React.forwardRef< | ||
React.ElementRef<typeof AccordionPrimitive.Trigger>, | ||
React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Trigger> | ||
>(({ className, children, ...props }, ref) => ( | ||
<AccordionPrimitive.Header className="flex"> | ||
<AccordionPrimitive.Trigger | ||
ref={ref} | ||
className={cn( | ||
'flex flex-1 items-center justify-between py-4 font-medium transition-all hover:underline [&[data-state=open]>svg]:rotate-180', | ||
className, | ||
)} | ||
{...props} | ||
> | ||
{children} | ||
<ChevronDown className="h-4 w-4 transition-transform duration-200" /> | ||
</AccordionPrimitive.Trigger> | ||
</AccordionPrimitive.Header> | ||
)); | ||
AccordionTrigger.displayName = AccordionPrimitive.Trigger.displayName; | ||
|
||
const AccordionContent = React.forwardRef< | ||
React.ElementRef<typeof AccordionPrimitive.Content>, | ||
React.ComponentPropsWithoutRef<typeof AccordionPrimitive.Content> | ||
>(({ className, children, ...props }, ref) => ( | ||
<AccordionPrimitive.Content | ||
ref={ref} | ||
className={cn( | ||
'data-[state=open]:animate-accordion-down data-[state=closed]:animate-accordion-up overflow-hidden text-sm transition-all', | ||
className, | ||
)} | ||
{...props} | ||
> | ||
<div className="pt-0 pb-4">{children}</div> | ||
</AccordionPrimitive.Content> | ||
)); | ||
AccordionContent.displayName = AccordionPrimitive.Content.displayName; | ||
|
||
export { Accordion, AccordionItem, AccordionTrigger, AccordionContent }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
/** | ||
* Change the index and namespace to your own | ||
*/ | ||
|
||
const PINECONE_INDEX_NAME = 'langchainjsfundamentals'; | ||
|
||
const PINECONE_NAME_SPACE = 'demo'; //namespace is optional for your vectors | ||
|
||
export { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE }; |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
/** @type {import('next').NextConfig} */ | ||
const nextConfig = { | ||
reactStrictMode: true, | ||
swcMinify: true, | ||
webpack(config) { | ||
config.experiments = { ...config.experiments, topLevelAwait: true }; | ||
return config; | ||
}, | ||
}; | ||
|
||
export default nextConfig; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
{ | ||
"name": "gpt4-langchain-pdf-chatbot", | ||
"version": "0.1.0", | ||
"private": true, | ||
"license": "MIT", | ||
"author": "Mayooear<twitter:@mayowaoshin>", | ||
"type": "module", | ||
"scripts": { | ||
"dev": "next dev", | ||
"build": "next build", | ||
"start": "next start", | ||
"type-check": "tsc --noEmit", | ||
"lint": "eslint --ignore-path .gitignore \"**/*.+(ts|js|tsx)\"", | ||
"format": "prettier --ignore-path .gitignore \"**/*.+(ts|js|tsx)\" --write", | ||
"ingest": "tsx -r dotenv/config scripts/ingest-data.ts" | ||
}, | ||
"dependencies": { | ||
"@microsoft/fetch-event-source": "^2.0.1", | ||
"@pinecone-database/pinecone": "^0.0.10", | ||
"@radix-ui/react-accordion": "^1.1.1", | ||
"clsx": "^1.2.1", | ||
"dotenv": "^16.0.3", | ||
"langchain": "^0.0.33", | ||
"lucide-react": "^0.125.0", | ||
"next": "13.2.3", | ||
"pdf-parse": "1.1.1", | ||
"react": "18.2.0", | ||
"react-dom": "18.2.0", | ||
"react-markdown": "^8.0.5", | ||
"tailwind-merge": "^1.10.0" | ||
}, | ||
"devDependencies": { | ||
"@types/node": "^18.14.6", | ||
"@types/react": "^18.0.28", | ||
"@types/react-dom": "^18.0.11", | ||
"@typescript-eslint/parser": "^5.54.0", | ||
"autoprefixer": "^10.4.13", | ||
"eslint": "8.35.0", | ||
"eslint-config-next": "13.2.3", | ||
"postcss": "^8.4.21", | ||
"prettier": "^2.8.4", | ||
"tailwindcss": "^3.2.7", | ||
"tsx": "^3.12.3", | ||
"typescript": "^4.9.5" | ||
}, | ||
"keywords": [ | ||
"starter", | ||
"gpt4", | ||
"pinecone", | ||
"typescript", | ||
"nextjs", | ||
"langchain", | ||
"law", | ||
"legal", | ||
"pdf", | ||
"openai" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import '@/styles/base.css'; | ||
import type { AppProps } from 'next/app'; | ||
import { Inter } from 'next/font/google'; | ||
|
||
const inter = Inter({ | ||
variable: '--font-inter', | ||
subsets: ['latin'], | ||
}); | ||
|
||
function MyApp({ Component, pageProps }: AppProps) { | ||
return ( | ||
<> | ||
<main className={inter.variable}> | ||
<Component {...pageProps} /> | ||
</main> | ||
</> | ||
); | ||
} | ||
|
||
export default MyApp; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import { Html, Head, Main, NextScript } from "next/document"; | ||
|
||
export default function Document() { | ||
return ( | ||
<Html lang="en"> | ||
<Head /> | ||
<body> | ||
<Main /> | ||
<NextScript /> | ||
</body> | ||
</Html> | ||
); | ||
} |
Oops, something went wrong.