Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
rlancemartin committed Nov 20, 2023
1 parent b2543f6 commit a6bb9ba
Showing 1 changed file with 14 additions and 7 deletions.
21 changes: 14 additions & 7 deletions templates/rag-gpt-crawler/rag_gpt_crawler/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,24 @@
# Load output from gpt crawler
path_to_gptcrawler = "/Users/rlm/Desktop/Code/gpt-crawler/output.json"
data = json.loads(Path(path_to_gptcrawler).read_text())
docs = [Document(page_content=dict_['html'],
metadata={'title': dict_['title'],
'url': dict_['url']}) for dict_ in data]
docs = [
Document(
page_content=dict_["html"],
metadata={"title": dict_["title"], "url": dict_["url"]},
)
for dict_ in data
]

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
all_splits = text_splitter.split_documents(docs)

# Add to vectorDB
vectorstore = Chroma.from_documents(documents=all_splits,
collection_name="rag-gpt-builder",
embedding=OpenAIEmbeddings(),
)
vectorstore = Chroma.from_documents(
documents=all_splits,
collection_name="rag-gpt-builder",
embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever()

# RAG prompt
Expand All @@ -48,8 +53,10 @@
| StrOutputParser()
)


# Add typing for input
class Question(BaseModel):
__root__: str


chain = chain.with_types(input_type=Question)

0 comments on commit a6bb9ba

Please sign in to comment.