From 06acaad07a4955d6efc088f51a7f5699e2d12f19 Mon Sep 17 00:00:00 2001 From: folajimi Date: Mon, 24 Apr 2023 19:47:20 +0100 Subject: [PATCH] added credentials --- .eslintrc.json | 3 + .gitignore | 39 + .prettierrc | 6 + README.md | 187 ++ components/Chat.tsx | 171 ++ components/ChatBotSettings.tsx | 138 + components/Credentials.tsx | 143 + components/FileUploadArea.tsx | 160 ++ components/WebsiteUrlUpload.tsx | 63 + components/layout.tsx | 42 + components/ui/accordion.tsx | 61 + components/ui/button.tsx | 53 + components/ui/command.tsx | 158 ++ components/ui/dialog.tsx | 128 + components/ui/input.tsx | 24 + components/ui/label.tsx | 23 + components/ui/loadingdots.tsx | 23 + components/ui/progress.tsx | 26 + components/ui/skeleton.tsx | 15 + components/ui/textarea.tsx | 23 + components/ui/toast.tsx | 127 + components/ui/toaster.tsx | 34 + config/fileuploadconfig.ts | 7 + config/pinecone.ts | 14 + context/credentials-context.tsx | 68 + declarations/pdf-parse.d.ts | 5 + hooks/use-toast.ts | 187 ++ license.md | 44 + next.config.js | 24 + package.json | 66 + pages/_app.tsx | 17 + pages/_document.tsx | 13 + pages/about.tsx | 98 + pages/api/chat.ts | 56 + pages/api/delete-namespace.ts | 46 + pages/api/ingest-url.ts | 56 + pages/api/ingest.ts | 111 + pages/chatbot.tsx | 170 ++ pages/index.tsx | 343 +++ postcss.config.cjs | 6 + public/bot-image.png | Bin 0 -> 9184 bytes public/favicon.ico | Bin 0 -> 25931 bytes public/usericon.png | Bin 0 -> 16770 bytes scripts/ingest-data.ts | 57 + styles/Home.module.css | 262 ++ styles/base.css | 3 + styles/chrome-bug.css | 12 + styles/loading-dots.module.css | 69 + tailwind.config.cjs | 12 + tsconfig.json | 30 + types/chat.ts | 8 + types/misc.ts | 8 + types/pinecone.ts | 5 + utils/cn.ts | 6 + utils/customPDFLoader.ts | 18 + utils/extractTextFromFiles.ts | 67 + utils/extractTextFromWebsiteUrl.ts | 10 + utils/formidable.ts | 50 + utils/helpers.ts | 9 + utils/logClass.ts | 48 + utils/makechain.ts | 40 + utils/manualPDFLoader.ts | 61 + utils/openai-client.ts | 9 + utils/pinecone-client.ts | 31 + utils/pinecone-local-client.ts | 27 + yarn.lock | 4194 ++++++++++++++++++++++++++++ 66 files changed, 8014 insertions(+) create mode 100644 .eslintrc.json create mode 100644 .gitignore create mode 100644 .prettierrc create mode 100644 README.md create mode 100644 components/Chat.tsx create mode 100644 components/ChatBotSettings.tsx create mode 100644 components/Credentials.tsx create mode 100644 components/FileUploadArea.tsx create mode 100644 components/WebsiteUrlUpload.tsx create mode 100644 components/layout.tsx create mode 100644 components/ui/accordion.tsx create mode 100644 components/ui/button.tsx create mode 100644 components/ui/command.tsx create mode 100644 components/ui/dialog.tsx create mode 100644 components/ui/input.tsx create mode 100644 components/ui/label.tsx create mode 100644 components/ui/loadingdots.tsx create mode 100644 components/ui/progress.tsx create mode 100644 components/ui/skeleton.tsx create mode 100644 components/ui/textarea.tsx create mode 100644 components/ui/toast.tsx create mode 100644 components/ui/toaster.tsx create mode 100644 config/fileuploadconfig.ts create mode 100644 config/pinecone.ts create mode 100644 context/credentials-context.tsx create mode 100644 declarations/pdf-parse.d.ts create mode 100644 hooks/use-toast.ts create mode 100644 license.md create mode 100644 next.config.js create mode 100644 package.json create mode 100644 pages/_app.tsx create mode 100644 pages/_document.tsx create mode 100644 pages/about.tsx create mode 100644 pages/api/chat.ts create mode 100644 pages/api/delete-namespace.ts create mode 100644 pages/api/ingest-url.ts create mode 100644 pages/api/ingest.ts create mode 100644 pages/chatbot.tsx create mode 100644 pages/index.tsx create mode 100644 postcss.config.cjs create mode 100644 public/bot-image.png create mode 100644 public/favicon.ico create mode 100644 public/usericon.png create mode 100644 scripts/ingest-data.ts create mode 100644 styles/Home.module.css create mode 100644 styles/base.css create mode 100644 styles/chrome-bug.css create mode 100644 styles/loading-dots.module.css create mode 100644 tailwind.config.cjs create mode 100644 tsconfig.json create mode 100644 types/chat.ts create mode 100644 types/misc.ts create mode 100644 types/pinecone.ts create mode 100644 utils/cn.ts create mode 100644 utils/customPDFLoader.ts create mode 100644 utils/extractTextFromFiles.ts create mode 100644 utils/extractTextFromWebsiteUrl.ts create mode 100644 utils/formidable.ts create mode 100644 utils/helpers.ts create mode 100644 utils/logClass.ts create mode 100644 utils/makechain.ts create mode 100644 utils/manualPDFLoader.ts create mode 100644 utils/openai-client.ts create mode 100644 utils/pinecone-client.ts create mode 100644 utils/pinecone-local-client.ts create mode 100644 yarn.lock diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 0000000..ea782d7 --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,3 @@ +{ + "extends": "next/core-web-vitals" +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..be9d92d --- /dev/null +++ b/.gitignore @@ -0,0 +1,39 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.js + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.pnpm-debug.log* + +# local env files +.env*.local +.env +docs.json +embedding.json + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..3fdc523 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,6 @@ +{ + "trailingComma": "all", + "singleQuote": true, + "printWidth": 80, + "tabWidth": 2 +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..02bfdbb --- /dev/null +++ b/README.md @@ -0,0 +1,187 @@ +# GPT-4 & LangChain - Create a ChatGPT Chatbot for Your PDF Files + +Use the new GPT-4 api to build a chatGPT chatbot for multiple Large PDF files. + +Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Pinecone is a vectorstore for storing embeddings and your PDF in text to later retrieve similar docs. + +The visual guide of this repo and tutorial is in the `visual guide` folder. + +**If you run into errors, please review the troubleshooting section further down this page.** + +## Development + +1. Make sure you have installed node and yarn + +[Node installation](https://nodejs.org/en/download) + +Yarn installation in your terminal after installing node + +`npm install -g yarn` + +Check that both are installed. + +``` +node -v +yarn -v +``` + +Node must be at least version 18.x.x + +Clone the repo + +2. Install packages + +``` +yarn install +``` + +You should see a `node_modules` folder afterwards. + +3. In the `config` folder, replace the `PINECONE_NAME_SPACE` with a `namespace` where you'd like to store your embeddings on Pinecone when you run `npm run ingest` manually or use the `api/ingest` via uploading on the frontend. This namespace will later be used for queries and retrieval. + +--- + +## If you want to "ingest" manually + +--- + +Set up your `.env` file and insert credentials + +- Copy `.env.example` into `.env` + Your `.env` file should look like this: + +``` +OPENAI_API_KEY= + +PINECONE_API_KEY= +PINECONE_ENVIRONMENT= + +PINECONE_INDEX_NAME= + +``` + +- Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file. +- Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys, and also retrieve your environment and index name from the dashboard. + +### Convert your PDF files to embeddings + +**This repo can load multiple PDF files** + +1. Inside `docs` folder, add your pdf files or folders that contain pdf files. + +2. Run the script `npm run ingest` to 'ingest' and embed your docs. If you run into errors troubleshoot below. + +3. Check Pinecone dashboard to verify your namespace and vectors have been added. + +**You can also manually ingest other file types by adding more loaders to the `DirectoryLoader`** + +### Chat with your docs + +Run `npm run dev` to load `localhost:3000`, then visit the `Chatbot` page to chat with your docs. + +--- + +## If you want to "ingest" via the UI upload + +--- + +If you would prefer to use the UI upload in `upload` page, you don't need to `.env` file. + +First, run `npm run dev` to load `localhost:3000`, then click on `Add credentials` to input your key credentials. Then click `Save.` + +Drag or upload a file into the upload area and then click `upload`. You should then be redirected to the chatbot. + +## Adapting for your use case + +In `utils/makechain.ts` chain change the `QA_PROMPT` prompt for your own usecase. Change `modelName` in `OpenAI` to `gpt-4`, if you have access `gpt-4` api. + +## Troubleshooting + +**General errors** + +- Make sure you're running the latest Node version. Run `node -v` +- Make sure you're using the same versions of LangChain and Pinecone as this repo. +- Check that you've created an `.env` file that contains your valid (and working) API keys, environment and index name. +- If you change `modelName` in `OpenAI` note that you need access to `gpt-4` for it to work. +- Make sure you have access to `gpt-4` if you decide to use it. Test your openAI keys outside the repo and make sure it works and that you have enough API credits. +- Your pdf file is corrupted and cannot be parsed. + +**Pinecone errors** + +- Make sure your pinecone dashboard `environment` and `index` matches the one in the `pinecone.ts` and `.env` files. +- Check that you've set the vector dimensions to `1536`. +- Make sure your pinecone namespace is in lowercase. +- Pinecone indexes of users on the Starter(free) plan are deleted after 7 days of inactivity. To prevent this, send an API request to Pinecone to reset the counter. +- Retry from scratch with a new Pinecone index and cloned repo. + +## Deployment + +## Key files + +`config/fileuploadconfig.ts`: Controls to the maxfilesize and maxnumberfiles allowed per upload. These settings are preconfigured for Vercel serveless function limits. + +`utils/extractTextFromFiles.ts`: handles the logic for 'loading' various file types. + +`utils/manualPDFLoader.ts`: this file is used for the manual ingest process run in `ingest-data.ts` + +`utils/customPDFLoader`: The PDF 'loader' that parses the uploaded files into LangChain `Documents`. Modify the `metadata` as required. + +`utils/formidable.ts`: Responsible for parsing uploading files. + +`utils/makechain.ts`: Logic responsible for combining question to standalone question, retrieving relevant docs and then outputting a final result. Change the `OpenAIChat` `modelName` to `gpt-3.5-turbo` if you don't have access to `gpt-4`. Modify the `QA_Prompt` for your use case. + +`utils/pinecone-client.ts`: The pinecone client that takes credentials from the UI. + +`utils/pinecone-local-client.ts`: The pinecone client that uses the credentials from the `.env` file. + +`api/ingest.ts`: Api route responsible for 'ingesting' the uploaded files. + +`api/ingest-url.ts`: Api route responsible for 'ingesting' uploaded url. + +`api/delete-namespace.ts`: Api route responsible for delete the specified namespace from the index. Use the `pinecone-local-client.ts` + +`api/chat.ts`: Api route responsible for the 'chat' process, including retrieval of relevant documents. + +`pages/credentials.tsx`: Main page for uploading credentials from the UI. + +`components/FileUploadArea.tsx`: The file upload drop area. Modify the accepted files here as well the number of files allowed and max file size. + +`public`: In the public folder you can change the default images of bot and user. Make sure to change the file names in the frontend `components/chat.tsx` as well: + +For example: + +``` +; + handleSubmit: (e: React.FormEvent) => void; + handleEnter: (e: React.KeyboardEvent) => void; + setQuery: React.Dispatch>; + query: string; + sourceDocs?: Document[]; + messageListRef: React.RefObject; +} + +export function Chat({ + chatMessages, + loading, + textAreaRef, + handleSubmit, + handleEnter, + setQuery, + query, + messageListRef, +}: ChatProps) { + return ( + <> +
+
+ +
+
+
+ {chatMessages.map((message, index) => { + let icon; + let className; + if (message.type === 'apiMessage') { + icon = ( + AI + ); + className = styles.apimessage; + } else { + icon = ( + Me + ); + // The latest message sent by the user will be animated while waiting for a response + className = + loading && index === chatMessages.length - 1 + ? styles.usermessagewaiting + : styles.usermessage; + } + return ( + <> +
+ {icon} +
+ + {message.message} + +
+
+ {message.sourceDocs && ( +
+ + {message.sourceDocs.map((doc, index) => ( +
+ + +

Source {index + 1}

+
+ + + {doc.pageContent} + +

+ Source: {doc.metadata.source} +

+
+
+
+ ))} +
+
+ )} + + ); + })} +
+
+
+
+
+