Skip to content

Commit

Permalink
index + createchatchain
Browse files Browse the repository at this point in the history
  • Loading branch information
hellokayas committed Apr 14, 2024
1 parent 485b958 commit fba3be4
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 0 deletions.
52 changes: 52 additions & 0 deletions doc_generator/Utils/createChatChain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from langchain.chains import LLMChain, ChatVectorDBChain, loadQAChain
from langchain.llms import OpenAIChat
from langchain.prompts import PromptTemplate


# Define the prompt template for condensing the follow-up question
condense_prompt = PromptTemplate(
template="Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.\n\n"
"Chat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:"
)

def make_qa_prompt(project_name, repository_url, content_type, chat_prompt, target_audience):
additional_instructions = f"\nHere are some additional instructions for answering questions about {content_type}:\n{chat_prompt}" if chat_prompt else ""
return PromptTemplate(
template=f"You are an AI assistant for a software project called {project_name}. You are trained on all the {content_type} that makes up this project.\n"
f"The {content_type} for the project is located at {repository_url}.\n"
"You are given the following extracted parts of a technical summary of files in a {content_type} and a question. "
"Provide a conversational answer with hyperlinks back to GitHub.\n"
"You should only use hyperlinks that are explicitly listed in the context. Do NOT make up a hyperlink that is not listed.\n"
"Include lots of {content_type} examples and links to the {content_type} examples, where appropriate.\n"
"Assume the reader is a {target_audience} but is not deeply familiar with {project_name}.\n"
"Assume the reader does not know anything about how the project is structured or which folders/files are provided in the context.\n"
"Do not reference the context in your answer. Instead use the context to inform your answer.\n"
"If you don't know the answer, just say \"Hmm, I'm not sure.\" Don't try to make up an answer.\n"
"If the question is not about the {project_name}, politely inform them that you are tuned to only answer questions about the {project_name}.\n"
"Your answer should be at least 100 words and no more than 300 words.\n"
"Do not include information that is not directly relevant to the question, even if the context includes it.\n"
"Always include a list of reference links to GitHub from the context. Links should ONLY come from the context.\n"
f"{additional_instructions}\n"
"Question: {question}\n\n"
"Context:\n{context}\n\n"
"Answer in Markdown:"
)

def make_chain(project_name, repository_url, content_type, chat_prompt, target_audience, vectorstore, llms, on_token_stream=None):
llm = llms[1] if len(llms) > 1 else llms[0]
question_generator = LLMChain(
llm=OpenAIChat(temperature=0.1, model_name=llm),
prompt=condense_prompt
)

qa_prompt = make_qa_prompt(project_name, repository_url, content_type, chat_prompt, target_audience)
doc_chain = loadQAChain(
llm=OpenAIChat(temperature=0.2, frequency_penalty=0, presence_penalty=0, model_name=llm, streaming=bool(on_token_stream)),
prompt=qa_prompt
)

return ChatVectorDBChain(
vectorstore=vectorstore,
combine_documents_chain=doc_chain,
question_generator_chain=question_generator
)
50 changes: 50 additions & 0 deletions doc_generator/Utils/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from prompt_toolkit import prompt
from prompt_toolkit.shortcuts import clear
import os
from langchain.embeddings import OpenAIEmbeddings
import hnswlib
from markdown2 import markdown
from createChatChain import make_chain

chat_history = []

def display_welcome_message(project_name):
print(f"Welcome to the {project_name} chatbot.")
print(f"Ask any questions related to the {project_name} codebase, and I'll try to help. Type 'exit' to quit.\n")

def query(name, repository_url, output, content_type, chat_prompt, target_audience, llms):
data_path = os.path.join(output, 'docs', 'data')
vector_store = hnswlib.load(data_path, OpenAIEmbeddings()) # Assuming a synchronous load or pre-loaded
chain = make_chain(name, repository_url, content_type, chat_prompt, target_audience, vector_store, llms)

clear()
display_welcome_message(name)

while True:
question = prompt(f"How can I help with {name}?\n")
if question.strip().lower() == 'exit':
break

print('Thinking...')
try:
response = chain.call({'question': question, 'chat_history': chat_history})
chat_history.append((question, response['text']))
print('\n\nMarkdown:\n')
print(markdown(response['text']))
except Exception as error:
print(f"Something went wrong: {error}")

if __name__ == "__main__":
# Example config objects, these need to be defined or imported properly
repo_config = {
"name": "ProjectName",
"repository_url": "https://github.com/yourproject",
"output": "/path/to/output",
"content_type": "docs",
"chat_prompt": "Additional instructions here",
"target_audience": "developers"
}
user_config = {
"llms": ["gpt-3.5-turbo"]
}
query(**repo_config, **user_config)

0 comments on commit fba3be4

Please sign in to comment.