From c9ae5a1beebd82c463c46c34b21b5ce491019975 Mon Sep 17 00:00:00 2001
From: Maksym Zhytnikov <63515947+Maxxx-zh@users.noreply.github.com>
Date: Mon, 20 May 2024 21:35:35 +0300
Subject: [PATCH] [FSTORE-1404] LLM PDF Tutorial (#266)

* LLM PDF Search Tutorial  using RAG and Fine-Tuning
---
 README.md                                     |   4 +-
 .../llm_pdfs/1_feature_backfill.ipynb         | 285 +++++++++++++
 .../llm_pdfs/1a_feature_pipeline.py           |  69 ++++
 .../llm_pdfs/1b_dataset_generation.ipynb      | 290 ++++++++++++++
 .../llm_pdfs/2_training_pipeline.ipynb        | 376 ++++++++++++++++++
 .../llm_pdfs/3_inference_pipeline.ipynb       | 366 +++++++++++++++++
 advanced_tutorials/llm_pdfs/app.py            | 127 ++++++
 advanced_tutorials/llm_pdfs/config.py         |  16 +
 .../functions/connect_to_google_drive.py      |  19 +
 .../llm_pdfs/functions/llm_chain.py           | 133 +++++++
 .../llm_pdfs/functions/pdf_preprocess.py      |  96 +++++
 .../llm_pdfs/functions/prompt_engineering.py  | 151 +++++++
 .../llm_pdfs/functions/text_preprocess.py     |  73 ++++
 advanced_tutorials/llm_pdfs/requirements.txt  |  22 +
 14 files changed, 2026 insertions(+), 1 deletion(-)
 create mode 100644 advanced_tutorials/llm_pdfs/1_feature_backfill.ipynb
 create mode 100644 advanced_tutorials/llm_pdfs/1a_feature_pipeline.py
 create mode 100644 advanced_tutorials/llm_pdfs/1b_dataset_generation.ipynb
 create mode 100644 advanced_tutorials/llm_pdfs/2_training_pipeline.ipynb
 create mode 100644 advanced_tutorials/llm_pdfs/3_inference_pipeline.ipynb
 create mode 100644 advanced_tutorials/llm_pdfs/app.py
 create mode 100644 advanced_tutorials/llm_pdfs/config.py
 create mode 100644 advanced_tutorials/llm_pdfs/functions/connect_to_google_drive.py
 create mode 100644 advanced_tutorials/llm_pdfs/functions/llm_chain.py
 create mode 100644 advanced_tutorials/llm_pdfs/functions/pdf_preprocess.py
 create mode 100644 advanced_tutorials/llm_pdfs/functions/prompt_engineering.py
 create mode 100644 advanced_tutorials/llm_pdfs/functions/text_preprocess.py
 create mode 100644 advanced_tutorials/llm_pdfs/requirements.txt

diff --git a/README.md b/README.md
index dd8e87cc..01793905 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ In order to understand the tutorials you need to be familiar with general concep
     - [Iris](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/iris): Classify iris flower species.
     - [Loan Approval](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/loan_approval): Predict loan approvals.
 - Advanced Tutorials:
-    - [Air Quality](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/air_quality): Predict the Air Quality value (PM2.5) in Europe and USA using weather features and air quality features of the previous days.
+    - [Air Quality](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/air_quality): Creating an air quality AI assistant that displays and explains air quality indicators for specific dates or periods, using Function Calling for LLMs and a RAG approach without a vector database.
     - [Bitcoin](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/bitcoin): Predict Bitcoin price using timeseries features and tweets sentiment analysis.
     - [Citibike](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/citibike): Predict the number of citibike users on each citibike station in the New York City.
     - [Credit Scores](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/credit_scores): Predict clients' repayment abilities.
@@ -50,6 +50,8 @@ In order to understand the tutorials you need to be familiar with general concep
     - [NYC Taxi Fares](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/nyc_taxi_fares): Predict the fare amount for a taxi ride in New York City given the pickup and dropoff locations.
     - [Recommender System](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/recommender-system): Build a recommender system for fashion items.
     - [TimeSeries](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/timeseries): Timeseries price prediction.
+    - [LLM PDF](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/llm_pdfs): An AI assistant that utilizes a Retrieval-Augmented Generation (RAG) system to provide accurate answers to user questions by retrieving relevant context from PDF documents.
+    - [Fraud Cheque Detection](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/fraud_cheque_detection): Building an AI assistant that detects fraudulent scanned cheque images and generates explanations for the fraud classification, using a fine-tuned open-source LLM.
     - [Keras model and Sklearn Transformation Functions with Hopsworks Model Registry](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/transformation_functions/keras): How to register Sklearn Transformation Functions and Keras model in the Hopsworks Model Registry, how to retrieve them and then use in training and inference pipelines.
     - [PyTorch model and Sklearn Transformation Functions with Hopsworks Model Registry](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/transformation_functions/pytorch): How to register Sklearn Transformation Functions and PyTorch model in the Hopsworks Model Registry, how to retrieve them and then use in training and inference pipelines.
     - [Sklearn Transformation Functions With Hopsworks Model Registy](https://github.com/logicalclocks/hopsworks-tutorials/tree/master/advanced_tutorials/transformation_functions/sklearn): How to register sklearn.pipeline with transformation functions and classifier in Hopsworks Model Registry and use it in training and inference pipelines.
diff --git a/advanced_tutorials/llm_pdfs/1_feature_backfill.ipynb b/advanced_tutorials/llm_pdfs/1_feature_backfill.ipynb
new file mode 100644
index 00000000..652fec9e
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/1_feature_backfill.ipynb
@@ -0,0 +1,285 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "82622ee3",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">📝 Imports </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ade7fe1f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -r requirements.txt -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ab771e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import PyPDF2\n",
+    "import pandas as pd\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "\n",
+    "from functions.pdf_preprocess import (\n",
+    "    download_files_to_folder, \n",
+    "    process_pdf_file,\n",
+    ")\n",
+    "from functions.text_preprocess import process_text_data\n",
+    "import config\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7e8f1796",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">💾 Download files from Google Drive </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea8c756e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Call the function to download files\n",
+    "new_files = download_files_to_folder(\n",
+    "    config.FOLDER_ID, \n",
+    "    config.DOWNLOAD_PATH,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f783e27e",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">🧬 Text Extraction </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b3b6715",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize an empty list\n",
+    "document_text = []\n",
+    "\n",
+    "for file in new_files:\n",
+    "    process_pdf_file(\n",
+    "        file, \n",
+    "        document_text, \n",
+    "        config.DOWNLOAD_PATH,\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "348b723e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a DataFrame\n",
+    "columns = [\"file_name\", \"file_link\", \"page_number\", \"text\"]\n",
+    "df_text = pd.DataFrame(\n",
+    "    data=document_text,\n",
+    "    columns=columns,\n",
+    ")\n",
+    "# Display the DataFrame\n",
+    "df_text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62a70763",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Process text data using the process_text_data function\n",
+    "df_text_processed = process_text_data(df_text)\n",
+    "\n",
+    "# Display the processed DataFrame\n",
+    "df_text_processed"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10f9ea36",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">⚙️ Embeddings Creation </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9805c689",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the SentenceTransformer model\n",
+    "model = SentenceTransformer(\n",
+    "    config.MODEL_SENTENCE_TRANSFORMER,\n",
+    ").to(config.DEVICE)\n",
+    "model.device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1b7a89a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate embeddings for the 'text' column using the SentenceTransformer model\n",
+    "df_text_processed['embeddings'] = pd.Series(\n",
+    "    model.encode(df_text_processed['text']).tolist(),\n",
+    ")\n",
+    "\n",
+    "# Create a new column 'context_id' with values ranging from 0 to the number of rows in the DataFrame\n",
+    "df_text_processed['context_id'] = [*range(df_text_processed.shape[0])]\n",
+    "\n",
+    "# Display the resulting DataFrame with the added 'embeddings' and 'context_id' columns\n",
+    "df_text_processed"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2bced31",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🔮 Connecting to Hopsworks Feature Store </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7caf764d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import hopsworks\n",
+    "\n",
+    "project = hopsworks.login()\n",
+    "\n",
+    "fs = project.get_feature_store() "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0ed9ac69",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🪄 Feature Group Creation </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f5e486b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hsfs import embedding\n",
+    "\n",
+    "# Create the Embedding Index\n",
+    "emb = embedding.EmbeddingIndex()\n",
+    "\n",
+    "emb.add_embedding(\n",
+    "    \"embeddings\", \n",
+    "    model.get_sentence_embedding_dimension(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e32b548",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get or create the 'documents_fg' feature group\n",
+    "documents_fg = fs.get_or_create_feature_group(\n",
+    "    name=\"documents_fg\",\n",
+    "    embedding_index=emb,\n",
+    "    primary_key=['context_id'],\n",
+    "    version=1,\n",
+    "    description='Information from various files, presenting details like file names, source links, and structured text excerpts from different pages and paragraphs.',\n",
+    "    online_enabled=True,\n",
+    ")\n",
+    "\n",
+    "documents_fg.insert(df_text_processed)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d39a9ed6",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\">🪄 Feature View Creation </span>\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a7bc2f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get or create the 'documents' feature view\n",
+    "feature_view = fs.get_or_create_feature_view(\n",
+    "    name=\"documents\",\n",
+    "    version=1,\n",
+    "    description='Chunked context for RAG system',\n",
+    "    query=documents_fg.select([\"file_name\", \"file_link\", \"page_number\", \"paragraph\", \"text\"]),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "708b9a5f",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/advanced_tutorials/llm_pdfs/1a_feature_pipeline.py b/advanced_tutorials/llm_pdfs/1a_feature_pipeline.py
new file mode 100644
index 00000000..84c90c8d
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/1a_feature_pipeline.py
@@ -0,0 +1,69 @@
+import PyPDF2
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+
+from functions.pdf_preprocess import download_files_to_folder, process_pdf_file
+from functions.text_preprocess import process_text_data
+import config
+
+import hopsworks
+
+def pipeline():
+    # Call the function to download files
+    new_files = download_files_to_folder(
+        config.FOLDER_ID, 
+        config.DOWNLOAD_PATH,
+    )
+    
+    if len(new_files) == 0:
+        print('⛳️ Your folder is up to date!')
+        return
+    
+    # Initialize an empty list
+    document_text = []
+
+    for file in new_files:
+        process_pdf_file(
+            file, 
+            document_text, 
+            config.DOWNLOAD_PATH,
+        )
+        
+    # Create a DataFrame
+    columns = ["file_name", "page_number", "text"]
+    df_text = pd.DataFrame(
+        data=document_text,
+        columns=columns,
+    )
+
+    # Process text data using the process_text_data function
+    df_text_processed = process_text_data(df_text)
+
+    # Retrieve a SentenceTransformer
+    model = SentenceTransformer(
+        config.MODEL_SENTENCE_TRANSFORMER,
+    ).to(config.DEVICE)
+
+    # Generate embeddings for the 'text' column using the SentenceTransformer model
+    df_text_processed['embeddings'] = pd.Series(
+        model.encode(df_text_processed['text']).tolist(),
+    )
+
+    # Create a new column 'context_id' with values ranging from 0 to the number of rows in the DataFrame
+    df_text_processed['context_id'] = [*range(df_text_processed.shape[0])]
+
+
+    project = hopsworks.login()
+
+    fs = project.get_feature_store() 
+
+    documents_fg = fs.get_feature_group(
+        name="documents_fg",
+        version=1,
+    )
+
+    documents_fg.insert(df_text_processed)
+    return
+
+if __name__ == '__main__':
+    pipeline()
diff --git a/advanced_tutorials/llm_pdfs/1b_dataset_generation.ipynb b/advanced_tutorials/llm_pdfs/1b_dataset_generation.ipynb
new file mode 100644
index 00000000..d2fd826e
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/1b_dataset_generation.ipynb
@@ -0,0 +1,290 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0279e128",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">📝 Imports </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8efd4e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from openai import OpenAI\n",
+    "import getpass\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "import json_repair\n",
+    "from tqdm import tqdm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4d389343",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">⚙️ Settings </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "270b84fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\") or getpass.getpass('🔑 Enter your OpenAI API key: ')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d58f52ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = OpenAI(\n",
+    "    api_key=os.environ[\"OPENAI_API_KEY\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c16fbf15",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🔮 Connecting to Hopsworks Feature Store </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a8916cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import hopsworks\n",
+    "\n",
+    "project = hopsworks.login()\n",
+    "\n",
+    "fs = project.get_feature_store() "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "32f2bbae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Retrieve the 'documents' feature view\n",
+    "feature_view = fs.get_feature_view(\n",
+    "    name='documents',\n",
+    "    version=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f60460ab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize batch scoring for feature view\n",
+    "feature_view.init_batch_scoring()\n",
+    "\n",
+    "# Get batch data from the feature view\n",
+    "data = feature_view.get_batch_data()\n",
+    "\n",
+    "# Filter data to include only rows where the 'text' column length is greater than 2500\n",
+    "data_filtered = data[data.text.str.len() > 2500]\n",
+    "\n",
+    "# Display the filtered data\n",
+    "data_filtered"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d3d2fcb2",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">🪄 Dataset Generation</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80d80597",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_questions(context):\n",
+    "\n",
+    "    instruction = \"\"\"\n",
+    "    The given text is the result of the text extraction from the PDF files. \n",
+    "    Generate 3 meaningful questions on the text and the respective answers.\n",
+    "    Reply strictly in the JSON format:\n",
+    "    {\n",
+    "      \"questions\": [\"question1\", \"question2\", \"question3\"],\n",
+    "      \"answers\": [\"answer1\", \"answer2\", \"answer3\"]\n",
+    "    }\n",
+    "\n",
+    "    Ensure that the lists of questions and answers are complete and properly formatted. \n",
+    "    DO NOT include any additional information or characters outside the specified JSON format. \n",
+    "    The response must consist only of the requested JSON structure. \n",
+    "    If the generated content does not meet the specified format, please make the necessary adjustments to ensure compliance.\"\"\"\n",
+    "\n",
+    "    prompt = f\"\\nContext: {context}\\nQuestion: {instruction}\"\n",
+    "\n",
+    "    # Create a chatbot\n",
+    "    completion = client.chat.completions.create(\n",
+    "        model=\"gpt-3.5-turbo\",\n",
+    "        # Pre-define conversation messages for the possible roles \n",
+    "        messages=[\n",
+    "            {\"role\": \"user\", \"content\": prompt},\n",
+    "        ]\n",
+    "    )\n",
+    "    response = json_repair.loads(completion.choices[0].message.content)\n",
+    "    \n",
+    "    response['context'] = context\n",
+    "    \n",
+    "    return response\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d3642f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate question-answer pairs\n",
+    "generated_questions = [\n",
+    "    generate_questions(text)\n",
+    "    for text \n",
+    "    in tqdm(data_filtered['text'])\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1f1cc46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a DataFrame from the generated_questions\n",
+    "df = pd.DataFrame(generated_questions)\n",
+    "\n",
+    "# Display the first few rows of the DataFrame\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f906442",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Explode the DataFrame to expand lists in specified columns ('questions' and 'answers')\n",
+    "df_expanded = df.explode(['questions', 'answers']).reset_index(drop=True)\n",
+    "\n",
+    "# Reset the index to create a new default integer index\n",
+    "df_expanded.reset_index(inplace=True)\n",
+    "\n",
+    "# Rename the 'index' column to 'record_id' for clarity\n",
+    "df_expanded.rename(columns={'index': 'record_id'}, inplace=True)\n",
+    "\n",
+    "# Display the expanded DataFrame\n",
+    "df_expanded"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4fe81b9f",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🪄 CQA Feature Group Creation </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a84b387",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get or create the 'cqa_fg' feature group\n",
+    "cqa_fg = fs.get_or_create_feature_group(\n",
+    "    name=\"cqa_fg\",\n",
+    "    version=1,\n",
+    "    description='Context-Question-Response Data',\n",
+    "    primary_key=['record_id'],\n",
+    ")\n",
+    "\n",
+    "cqa_fg.insert(df_expanded)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2ed251e4",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🪄 CQA Feature View Creation </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed7146f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get or create the 'cqa' feature view\n",
+    "feature_view = fs.get_or_create_feature_view(\n",
+    "    name=\"cqa\",\n",
+    "    version=1,\n",
+    "    query=cqa_fg.select([\"context\", \"questions\", \"responses\"]),\n",
+    "    description='Context-Question-Response pairs for model fine-tuning',\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "02f6f11a",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/advanced_tutorials/llm_pdfs/2_training_pipeline.ipynb b/advanced_tutorials/llm_pdfs/2_training_pipeline.ipynb
new file mode 100644
index 00000000..4177c447
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/2_training_pipeline.ipynb
@@ -0,0 +1,376 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "cc6015d0",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">📝 Imports </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ba30ecb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from datasets import Dataset\n",
+    "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
+    "from peft import LoraConfig\n",
+    "from transformers import TrainingArguments\n",
+    "from trl import SFTTrainer\n",
+    "\n",
+    "from functions.prompt_engineering import generate_prompt\n",
+    "import config"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1270e5f8",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🔮 Connecting to Hopsworks Feature Store </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e517b1cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import hopsworks\n",
+    "\n",
+    "project = hopsworks.login()\n",
+    "\n",
+    "fs = project.get_feature_store() \n",
+    "mr = project.get_model_registry()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "86043802",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\">🪝 Feature View Retrieval </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4007db72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Retrieve the 'cqa' feature view\n",
+    "feature_view = fs.get_feature_view(\n",
+    "    name='cqa',\n",
+    "    version=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83b00e9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize batch scoring for the feature view\n",
+    "feature_view.init_batch_scoring()\n",
+    "\n",
+    "# Get batch data from the feature view\n",
+    "data = feature_view.get_batch_data()\n",
+    "\n",
+    "# Display the first three rows of the batch data\n",
+    "data.head(3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "64dab547",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\">🗄️ Dataset Creation </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "594f4e1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate prompts for each record in the DataFrame using context, questions, and responses\n",
+    "prompts = data.apply(\n",
+    "    lambda record: generate_prompt(record['context'], record['questions']) + f'\\n### RESPONSE:\\n{record[\"responses\"]}', \n",
+    "    axis=1,\n",
+    ").tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6bd1e493",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a dataset from a dictionary with a single column named \"text\" containing prompts\n",
+    "dataset = Dataset.from_dict({\n",
+    "    \"text\": prompts,\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0756b8e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(dataset[10]['text'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc161e58",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">⬇️ Model Loading </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62477b0e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the tokenizer for Mistral-7B-Instruct model\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\n",
+    "    config.MODEL_ID,\n",
+    ")\n",
+    "\n",
+    "# Set the pad token to the unknown token to handle padding\n",
+    "tokenizer.pad_token = tokenizer.unk_token\n",
+    "\n",
+    "# Set the padding side to \"right\" to prevent warnings during tokenization\n",
+    "tokenizer.padding_side = \"right\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0153d320",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# BitsAndBytesConfig int-4 config\n",
+    "bnb_config = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True, \n",
+    "    bnb_4bit_use_double_quant=True, \n",
+    "    bnb_4bit_quant_type=\"nf4\", \n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f8a4d9ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the Mistral-7B-Instruct model with quantization configuration\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    config.MODEL_ID,\n",
+    "    device_map=\"auto\",\n",
+    "    quantization_config=bnb_config,\n",
+    ")\n",
+    "\n",
+    "# Configure the pad token ID in the model to match the tokenizer's pad token ID\n",
+    "model.config.pad_token_id = tokenizer.pad_token_id"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93c7ba90",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">⚙️ Configuration </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18d24668",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "peft_config = LoraConfig(\n",
+    "        lora_alpha=64,\n",
+    "        lora_dropout=0.1,\n",
+    "        r=32,\n",
+    "        bias=\"none\",\n",
+    "        task_type=\"CAUSAL_LM\", \n",
+    "        target_modules=[\n",
+    "            \"q_proj\",\n",
+    "            \"k_proj\",\n",
+    "            \"v_proj\",\n",
+    "            \"o_proj\",\n",
+    "            \"gate_proj\",\n",
+    "            \"up_proj\",\n",
+    "            \"down_proj\",\n",
+    "            \"lm_head\",\n",
+    "        ],\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ebade183",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_arguments = TrainingArguments(\n",
+    "    output_dir=\"mistral7b_finetuned\",       # directory to save and repository id\n",
+    "    num_train_epochs=3,                     # number of training epochs\n",
+    "    per_device_train_batch_size=3,          # batch size per device during training\n",
+    "    gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass\n",
+    "    gradient_checkpointing=True,            # use gradient checkpointing to save memory\n",
+    "    optim=\"adamw_torch_fused\",              # use fused adamw optimizer\n",
+    "    logging_steps=10,                       # log every 10 steps\n",
+    "    save_strategy=\"epoch\",                  # save checkpoint every epoch\n",
+    "    learning_rate=2e-4,                     # learning rate, based on QLoRA paper\n",
+    "    bf16=True,                              # use bfloat16 precision\n",
+    "    tf32=True,                              # use tf32 precision\n",
+    "    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper\n",
+    "    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper\n",
+    "    lr_scheduler_type=\"constant\",           # use constant learning rate scheduler\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36e79a43",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">🏃🏻‍♂️ Training</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13af595e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the Supervised Fine-tuning Trainer\n",
+    "trainer = SFTTrainer(\n",
+    "    model=model,\n",
+    "    train_dataset=dataset,\n",
+    "    peft_config=peft_config,\n",
+    "    max_seq_length=4096,\n",
+    "    tokenizer=tokenizer,\n",
+    "    args=training_arguments,\n",
+    "    dataset_text_field='text',\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2c9a416",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Train the model\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "85e840c2",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">💾 Saving Model</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75940ca2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the trained model\n",
+    "trainer.save_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bfaae161",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">🗄️ Model Registry</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2ff14642",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a Python model in the model registry\n",
+    "model_llm = mr.python.create_model(\n",
+    "    name=\"mistral_model\", \n",
+    "    description=\"Mistral Fine-tuned Model\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fbce3ba9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the model directory with the fine-tuned model to the model registry\n",
+    "model_llm.save(training_arguments.output_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ecc9b1d0",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/advanced_tutorials/llm_pdfs/3_inference_pipeline.ipynb b/advanced_tutorials/llm_pdfs/3_inference_pipeline.ipynb
new file mode 100644
index 00000000..9e2f00dd
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/3_inference_pipeline.ipynb
@@ -0,0 +1,366 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "be60a8be",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">📝 Imports </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f95e1e54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sentence_transformers import SentenceTransformer\n",
+    "from FlagEmbedding import FlagReranker\n",
+    "\n",
+    "from functions.llm_chain import get_llm_chain\n",
+    "from functions.prompt_engineering import get_context_and_source\n",
+    "import config\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f3a2715",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🔮 Connecting to Hopsworks Feature Store </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d292081d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import hopsworks\n",
+    "\n",
+    "project = hopsworks.login()\n",
+    "\n",
+    "fs = project.get_feature_store()\n",
+    "mr = project.get_model_registry()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "733aa65d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Retrieve the 'documents' feature view\n",
+    "feature_view = fs.get_feature_view(\n",
+    "    name=\"documents\", \n",
+    "    version=1,\n",
+    ")   \n",
+    "\n",
+    "# Initialize serving\n",
+    "feature_view.init_serving(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1e562e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get the Mistral model from Model Registry\n",
+    "mistral_model = mr.get_model(\n",
+    "    name=\"mistral_model\",\n",
+    "    version=1,\n",
+    ")\n",
+    "\n",
+    "# Download the Mistral model files to a local directory\n",
+    "saved_model_dir = mistral_model.download()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0235999b",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">⛓️ LLM Chain </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc70c06b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_chain = get_llm_chain(saved_model_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e6b5249d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "session_id = {\n",
+    "    \"configurable\": {\"session_id\": \"default\"}\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9377ab5",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27\">🗄️ Sentence Transformer Loading </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "89b5ce52",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the Sentence Transformer\n",
+    "sentence_transformer = SentenceTransformer(\n",
+    "    config.MODEL_SENTENCE_TRANSFORMER,\n",
+    ").to(config.DEVICE)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40126e56",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\"> 🧬 Reranking </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72cfcbd2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_reranker():\n",
+    "    reranker = FlagReranker(\n",
+    "        'BAAI/bge-reranker-large', \n",
+    "        use_fp16=True,\n",
+    "    ) \n",
+    "    return reranker"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "491e3847",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Retrieve a reranker\n",
+    "reranker = get_reranker()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c739dd2d",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\">🗄️ Context Retrieval </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "987d3108",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# User Question Example\n",
+    "user_input = 'What are the best risk reporting practices?' "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02199904",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Retrieve reranked context and source\n",
+    "context, source = get_context_and_source(\n",
+    "    user_input, \n",
+    "    sentence_transformer,\n",
+    "    feature_view, \n",
+    "    reranker,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "decf4d3d",
+   "metadata": {},
+   "source": [
+    "## <span style=\"color:#ff5f27;\">🚀 Model Inference </span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "622bfb9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate model response\n",
+    "model_output = llm_chain.invoke({\n",
+    "        \"context\": context, \n",
+    "        \"question\": user_input,\n",
+    "    },\n",
+    "    session_id,\n",
+    ")\n",
+    "\n",
+    "print(model_output.split('### RESPONSE:\\n')[-1] + source)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5711145e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_input = 'What is Adaptability?'\n",
+    "\n",
+    "context, source = get_context_and_source(\n",
+    "    user_input, \n",
+    "    sentence_transformer,\n",
+    "    feature_view, \n",
+    "    reranker,\n",
+    ")\n",
+    "\n",
+    "model_output = llm_chain.invoke({\n",
+    "        \"context\": context, \n",
+    "        \"question\": user_input,\n",
+    "    },\n",
+    "    session_id,\n",
+    ")\n",
+    "\n",
+    "print(model_output.split('### RESPONSE:\\n')[-1] + source)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "044e9b15",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_input = 'What is a risk management?'\n",
+    "\n",
+    "context, source = get_context_and_source(\n",
+    "    user_input, \n",
+    "    sentence_transformer,\n",
+    "    feature_view, \n",
+    "    reranker,\n",
+    ")\n",
+    "\n",
+    "model_output = llm_chain.invoke({\n",
+    "        \"context\": context, \n",
+    "        \"question\": user_input,\n",
+    "    },\n",
+    "    session_id,\n",
+    ")\n",
+    "\n",
+    "print(model_output.split('### RESPONSE:\\n')[-1] + source)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02be4b75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_input = 'What is the purpose of maintaining an up-to-date data-flow diagram?'\n",
+    "\n",
+    "context, source = get_context_and_source(\n",
+    "    user_input, \n",
+    "    sentence_transformer,\n",
+    "    feature_view, \n",
+    "    reranker,\n",
+    ")\n",
+    "\n",
+    "model_output = llm_chain.invoke({\n",
+    "        \"context\": context, \n",
+    "        \"question\": user_input,\n",
+    "    },\n",
+    "    session_id,\n",
+    ")\n",
+    "\n",
+    "print(model_output.split('### RESPONSE:\\n')[-1] + source)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43a409ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_input = 'Why are security and privacy controls important?'\n",
+    "\n",
+    "context, source = get_context_and_source(\n",
+    "    user_input, \n",
+    "    sentence_transformer,\n",
+    "    feature_view, \n",
+    "    reranker,\n",
+    ")\n",
+    "\n",
+    "model_output = llm_chain.invoke({\n",
+    "        \"context\": context, \n",
+    "        \"question\": user_input,\n",
+    "    },\n",
+    "    session_id,\n",
+    ")\n",
+    "\n",
+    "print(model_output.split('### RESPONSE:\\n')[-1] + source)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "108ca3db",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/advanced_tutorials/llm_pdfs/app.py b/advanced_tutorials/llm_pdfs/app.py
new file mode 100644
index 00000000..4db37f4c
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/app.py
@@ -0,0 +1,127 @@
+import streamlit as st
+import hopsworks
+from sentence_transformers import SentenceTransformer
+from FlagEmbedding import FlagReranker
+from functions.prompt_engineering import get_context_and_source
+from functions.llm_chain import get_llm_chain
+import config
+import warnings
+warnings.filterwarnings('ignore')
+
+st.title("💬 AI assistant")
+
+@st.cache_resource()
+def connect_to_hopsworks():
+    # Initialize Hopsworks feature store connection
+    project = hopsworks.login()
+    fs = project.get_feature_store()
+    mr = project.get_model_registry()
+
+    # Retrieve the 'documents' feature view
+    feature_view = fs.get_feature_view(
+        name="documents", 
+        version=1,
+    )
+
+    # Initialize serving
+    feature_view.init_serving(1)
+    
+    # Get the Mistral model from Model Registry
+    mistral_model = mr.get_model(
+        name="mistral_model",
+        version=1,
+    )
+    
+    # Download the Mistral model files to a local directory
+    saved_model_dir = mistral_model.download()
+
+    return feature_view, saved_model_dir
+
+
+@st.cache_resource()
+def get_models(saved_model_dir):
+
+    # Load the Sentence Transformer
+    sentence_transformer = SentenceTransformer(
+        config.MODEL_SENTENCE_TRANSFORMER,
+    ).to(config.DEVICE)
+
+    llm_chain = get_llm_chain(saved_model_dir)
+
+    return sentence_transformer, llm_chain
+
+
+@st.cache_resource()
+def get_reranker():
+    reranker = FlagReranker(
+        'BAAI/bge-reranker-large', 
+        use_fp16=True,
+    ) 
+    return reranker
+
+
+def predict(user_query, sentence_transformer, feature_view, reranker, llm_chain):
+    
+    st.write('⚙️ Generating Response...')
+    
+    session_id = {
+        "configurable": {"session_id": "default"}
+    }
+    
+    # Retrieve reranked context and source
+    context, source = get_context_and_source(
+        user_query, 
+        sentence_transformer,
+        feature_view, 
+        reranker,
+    )
+    
+    # Generate model response
+    model_output = llm_chain.invoke({
+            "context": context, 
+            "question": user_query,
+        },
+        session_id,
+    )
+
+    return model_output.split('### RESPONSE:\n')[-1] + source
+
+
+# Retrieve the feature view and the saved_model_dir
+feature_view, saved_model_dir = connect_to_hopsworks()
+
+# Load and retrieve the sentence_transformer and llm_chain
+sentence_transformer, llm_chain = get_models(saved_model_dir)
+
+# Retrieve the reranking model
+reranker = get_reranker()
+
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+
+# Display chat messages from history on app rerun
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+
+# React to user input
+if user_query := st.chat_input("How can I help you?"):
+    # Display user message in chat message container
+    st.chat_message("user").markdown(user_query)
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": user_query})
+
+    response = predict(
+        user_query, 
+        sentence_transformer, 
+        feature_view,
+        reranker,
+        llm_chain,
+    )
+
+    # Display assistant response in chat message container
+    with st.chat_message("assistant"):
+        st.markdown(response)
+    # Add assistant response to chat history
+    st.session_state.messages.append({"role": "assistant", "content": response})
diff --git a/advanced_tutorials/llm_pdfs/config.py b/advanced_tutorials/llm_pdfs/config.py
new file mode 100644
index 00000000..1b1ee098
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/config.py
@@ -0,0 +1,16 @@
+import torch
+
+# The unique identifier for the Google Drive folder where your PDF files are stored
+FOLDER_ID = '{YOUR_FOLDER_ID}'
+
+# The local directory path where downloaded data will be saved.
+DOWNLOAD_PATH = "data"
+
+# The identifier of the pre-trained sentence transformer model for producing sentence embeddings.
+MODEL_SENTENCE_TRANSFORMER = 'all-MiniLM-L6-v2'
+
+# The computing device to be used for model inference and training.
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+# The identifier for the Mistral-7B-Instruct model
+MODEL_ID = 'mistralai/Mistral-7B-Instruct-v0.2'
diff --git a/advanced_tutorials/llm_pdfs/functions/connect_to_google_drive.py b/advanced_tutorials/llm_pdfs/functions/connect_to_google_drive.py
new file mode 100644
index 00000000..a20a164c
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/functions/connect_to_google_drive.py
@@ -0,0 +1,19 @@
+from apiclient import discovery
+from httplib2 import Http
+from oauth2client import client, file, tools
+
+
+# Define path variables
+credentials_file_path = '../credentials/credentials.json'
+clientsecret_file_path = '../credentials/client_secret.json'
+
+# Define API scope
+SCOPE = 'https://www.googleapis.com/auth/drive'
+
+# Define store
+store = file.Storage(credentials_file_path)
+credentials = store.get()
+# Get access token
+if not credentials or credentials.invalid:
+    flow = client.flow_from_clientsecrets(clientsecret_file_path, SCOPE)
+    credentials = tools.run_flow(flow, store)
\ No newline at end of file
diff --git a/advanced_tutorials/llm_pdfs/functions/llm_chain.py b/advanced_tutorials/llm_pdfs/functions/llm_chain.py
new file mode 100644
index 00000000..46e56952
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/functions/llm_chain.py
@@ -0,0 +1,133 @@
+import os
+import getpass
+import torch
+import transformers
+from peft import AutoPeftModelForCausalLM
+from transformers import AutoTokenizer
+from langchain.llms import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_community.chat_message_histories import ChatMessageHistory
+from langchain_core.chat_history import BaseChatMessageHistory
+
+
+def load_llm(model_dir) -> tuple:
+    """
+    Load the LLM and its corresponding tokenizer.
+
+    Args:
+        model_dir (str): Path to the pre-trained fine-tuned model.
+
+    Returns:
+        tuple: A tuple containing the tokenizer and loaded model.
+    """
+    # Setup the HuggingFace API Key
+    os.environ["HF_API_KEY"] = os.getenv("HF_API_KEY") or getpass.getpass('🔑 Enter your HuggingFace API key: ')
+
+    # Load a model from the saved model directory
+    model_llm = AutoPeftModelForCausalLM.from_pretrained(
+        model_dir,
+        device_map="auto",
+        torch_dtype=torch.float16,
+        token=os.environ["HF_API_KEY"],
+    )
+
+    # Load the tokenizer from the saved model directory
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_dir,
+        token=os.environ["HF_API_KEY"],
+    )
+
+    # Set the pad token to the end-of-sequence token
+    tokenizer.pad_token = tokenizer.eos_token
+
+    # Set the padding side to "right" to remove warnings
+    tokenizer.padding_side = "right"
+
+    # Print device
+    print(f'⛳️ Device: {model_llm.device}')
+    return tokenizer, model_llm
+
+
+def get_prompt_template():
+    # Define a template for generating prompts
+    prompt_template = """
+    [INST] 
+    Instruction: Prioritize brevity and clarity in responses. 
+    Avoid unnecessary repetition and keep answers concise, adhering to a maximum of 750 characters. 
+    Eliminate redundant phrases and sentences. 
+    If details are repeated, provide them only once for better readability. 
+    Focus on delivering key information without unnecessary repetition. 
+    If a concept is already conveyed, there's no need to restate it. Ensure responses remain clear and to the point.
+    Make sure you do not repeat any sentences in your answer.
+    [/INST]
+
+    Previous conversation:
+    {chat_history}
+
+    ### CONTEXT:
+
+    {context}
+
+    ### QUESTION:
+    [INST]{question}[/INST]"""
+    return prompt_template
+
+
+def get_llm_chain(model_dir):
+    """
+    Initializes and returns a language model chain for text generation using Hugging Face's transformers library.
+
+    Parameters:
+    - model_dir (str): Path to the pre-trained fine-tuned model.
+
+    Returns:
+    - LLMChain: A configured chain consisting of a Hugging Face pipeline for text generation and prompt handling.
+    """
+    
+    def get_global_history(session_id: str) -> BaseChatMessageHistory:
+        return global_chat_history
+    
+    # Load LLM and its corresponding tokenizer
+    tokenizer, model = load_llm(model_dir)
+    
+    # Create a text generation pipeline using the loaded model and tokenizer
+    text_generation_pipeline = transformers.pipeline(
+        model=model,                          # The pre-trained language model for text generation
+        tokenizer=tokenizer,                  # The tokenizer corresponding to the language model
+        task="text-generation",               # Specify the task as text generation
+        temperature=0.2,                      # Controls the randomness of the generation (higher values for more randomness)
+        repetition_penalty=1.5,               # Controls the penalty for repeating tokens in generated text
+        return_full_text=True,                # Return the full generated text instead of just the generated tokens
+        max_new_tokens=750,                   # Limit the maximum number of newly generated tokens
+        pad_token_id=tokenizer.eos_token_id,  # Use the end-of-sequence token as the padding token
+        do_sample=True,                       # Enable sampling during text generation
+    )
+
+    # Create a Hugging Face pipeline for Mistral LLM using the text generation pipeline
+    mistral_llm = HuggingFacePipeline(
+        pipeline=text_generation_pipeline,
+    )
+
+    # Create prompt from prompt template 
+    prompt = PromptTemplate(
+        input_variables=["context", "question", "chat_history"],
+        template=get_prompt_template(),
+    )
+
+    # Create the runnable sequence
+    runnable = prompt | mistral_llm | StrOutputParser()
+
+    # Initialize a global chat history (shared for all invocations)
+    global_chat_history = ChatMessageHistory()
+
+    # Create the RunnableWithMessageHistory using the global history
+    llm_chain = RunnableWithMessageHistory(
+        runnable,
+        get_global_history,
+        input_messages_key="question",
+        history_messages_key="chat_history",
+    )
+
+    return llm_chain
diff --git a/advanced_tutorials/llm_pdfs/functions/pdf_preprocess.py b/advanced_tutorials/llm_pdfs/functions/pdf_preprocess.py
new file mode 100644
index 00000000..495207df
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/functions/pdf_preprocess.py
@@ -0,0 +1,96 @@
+from pydrive.auth import GoogleAuth
+from pydrive.drive import GoogleDrive
+import PyPDF2
+import os
+from typing import List, Dict, Union
+
+def download_files_to_folder(folder_id: str, download_path: str) -> List:
+    """
+    Download files from a specified Google Drive folder to a local folder.
+
+    Parameters:
+    - folder_id (str): The ID of the Google Drive folder.
+    - download_path (str): The local folder path where files will be downloaded.
+
+    Returns:
+    - List: A list containing information about newly downloaded files.
+    """
+    # Authenticate with Google Drive
+    gauth = GoogleAuth()
+    gauth.LoadCredentialsFile("credentials/credentials.json")
+
+    if gauth.credentials is None:
+        gauth.LocalWebserverAuth()
+    elif gauth.access_token_expired:
+        gauth.Refresh()
+    else:
+        # Initialize the saved creds
+        gauth.Authorize()
+
+    # Save the current credentials to a file
+    gauth.SaveCredentialsFile("credentials/credentials.json")
+
+    drive = GoogleDrive(gauth)
+
+    # Create the local folder if it doesn't exist
+    if not os.path.exists(download_path):
+        os.makedirs(download_path)
+
+    # List files in the specified Google Drive folder
+    file_list = drive.ListFile({'q': f"'{folder_id}' in parents and trashed=false"}).GetList()
+
+    # Initialize a list to store information about new files
+    new_files = []
+    print('⛳️ Loading...')
+
+    # Iterate through each file in the list
+    for file in file_list:
+        # Check if the file already exists locally
+        local_file_path = os.path.join(download_path, file["title"])
+
+        if not os.path.isfile(local_file_path):
+            # Download the file content and save it to the local folder
+            file.GetContentFile(local_file_path)
+
+            # Append information about the downloaded file to the list
+            new_files.append(file)
+
+    # Print the list of newly downloaded files
+    if len(new_files) == 0:
+        print("⛳️ There are no new files")
+        return new_files
+    
+    print("⛳️ Newly downloaded files:")
+    for file in new_files:
+        print("title: %s, id: %s" % (file["title"], file["id"]))
+
+    return new_files
+
+
+def process_pdf_file(file_info: Dict, 
+                     document_text: List,
+                     pdfs_path: str = 'data/') -> List:
+    """
+    Process content of a PDF file and append information to the document_text list.
+
+    Parameters:
+    - file_info (Dict): Information about the PDF file.
+    - document_text (List): List containing document information.
+    - pdfs_path (str): Path to the folder containing PDF files (default is 'data/').
+
+    Returns:
+    - List: Updated document_text list.
+    """
+    file_title = file_info["title"]
+    
+    if file_title.split('.')[-1] == 'pdf':
+        print(f'⛳️ File Name: {file_title}')
+        
+        pdf_path = os.path.join(pdfs_path, file_title)
+        pdf_reader = PyPDF2.PdfReader(pdf_path)
+        pages_amount = len(pdf_reader.pages)
+        print(f'Amount of pages: {pages_amount}')
+        
+        for i, page in enumerate(pdf_reader.pages):
+            document_text.append([file_title, file_info['embedLink'], i+1, page.extract_text()])
+    return document_text
diff --git a/advanced_tutorials/llm_pdfs/functions/prompt_engineering.py b/advanced_tutorials/llm_pdfs/functions/prompt_engineering.py
new file mode 100644
index 00000000..a4a0b979
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/functions/prompt_engineering.py
@@ -0,0 +1,151 @@
+from typing import List, Tuple
+from sentence_transformers import SentenceTransformer
+
+def get_source(neighbors: List[Tuple[str, str, int, int]]) -> str:
+    """
+    Generates a formatted string for the sources of the provided context.
+
+    Args:
+        neighbors (List[Tuple[str, str, int, int]]): List of tuples representing document information.
+
+    Returns:
+        str: Formatted string containing document names, links, pages, and paragraphs.
+    """
+    return '\n\nReferences:\n' + '\n'.join(
+        [
+            f' - {neighbor[0]}({neighbor[1]}): Page: {neighbor[2]}, Paragraph: {neighbor[3]}' 
+            for neighbor 
+            in neighbors
+        ]
+    )
+
+def get_context(neighbors: List[Tuple[str]]) -> str:
+    """
+    Generates a formatted string for the context based on the provided neighbors.
+
+    Args:
+        neighbors (List[Tuple[str]]): List of tuples representing context information.
+
+    Returns:
+        str: Formatted string containing context information.
+    """
+    return '\n\n'.join([neighbor[-1] for neighbor in neighbors])
+
+
+def generate_prompt(context: str, question: str) -> str:
+    """
+    Generates a prompt for the AI assistant based on context and question.
+    
+    Args:
+        context (str): Formatted string containing context information.
+        question (str): The question to be included in the prompt.
+
+    Returns:
+        str: Formatted prompt for the AI assistant.
+    """
+    prompt_template = """
+[INST] 
+Instruction: You are an AI assistant specialized in regulatory documents. 
+Your role is to provide accurate and informative answers based on the given context.
+[/INST]
+
+### CONTEXT:
+
+{context}
+
+### QUESTION:
+[INST]{question}[/INST]
+     """
+
+    return prompt_template.format(
+        context=context, 
+        question=question,
+    )
+
+
+def get_neighbors(query: str, sentence_transformer: SentenceTransformer, feature_view, k: int = 10) -> List[Tuple[str, float]]:
+    """
+    Get the k closest neighbors for a given query using sentence embeddings.
+
+    Parameters:
+    - query (str): The input query string.
+    - sentence_transformer (SentenceTransformer): The sentence transformer model.
+    - feature_view (FeatureView): The feature view for retrieving neighbors.
+    - k (int, optional): Number of neighbors to retrieve. Default is 10.
+
+    Returns:
+    - List[Tuple[str, float]]: A list of tuples containing the neighbor context.
+    """
+    question_embedding = sentence_transformer.encode(query)
+
+    # Retrieve closest neighbors
+    neighbors = feature_view.find_neighbors(
+        question_embedding, 
+        k=k,
+    )
+
+    return neighbors
+
+
+def rerank(query: str, neighbors: List[str], reranker, k: int = 3) -> List[str]:
+    """
+    Rerank a list of neighbors based on a reranking model.
+
+    Parameters:
+    - query (str): The input query string.
+    - neighbors (List[str]): List of neighbor contexts.
+    - reranker (Reranker): The reranking model.
+    - k (int, optional): Number of top-ranked neighbors to return. Default is 3.
+
+    Returns:
+    - List[str]: The top-ranked neighbor contexts after reranking.
+    """
+    # Compute scores for each context using the reranker
+    scores = [reranker.compute_score([query, context[-1]]) for context in neighbors]
+
+    combined_data = [*zip(scores, neighbors)]
+
+    # Sort contexts based on the scores in descending order
+    sorted_data = sorted(combined_data, key=lambda x: x[0], reverse=True)
+
+    # Return the top-k ranked contexts
+    return [context for score, context in sorted_data][:k]
+
+
+def get_context_and_source(user_query: str, sentence_transformer: SentenceTransformer,
+                           feature_view, reranker) -> Tuple[str, str]:
+    """
+    Retrieve context and source based on user query using a combination of embedding, feature view, and reranking.
+
+    Parameters:
+    - user_query (str): The user's input query string.
+    - sentence_transformer (SentenceTransformer): The sentence transformer model.
+    - feature_view (FeatureView): The feature view for retrieving neighbors.
+    - reranker (Reranker): The reranking model.
+
+    Returns:
+    - Tuple[str, str]: A tuple containing the retrieved context and source.
+    """
+    # Retrieve closest neighbors
+    neighbors = get_neighbors(
+        user_query,
+        sentence_transformer,
+        feature_view,
+        k=10,
+    )
+
+    # Rerank the neighbors to get top-k
+    context_reranked = rerank(
+        user_query,
+        neighbors,
+        reranker,
+        k=3,
+    )
+
+    # Retrieve context
+    context = get_context(context_reranked)
+
+    # Retrieve source
+    source = get_source(context_reranked)
+
+    return context, source
diff --git a/advanced_tutorials/llm_pdfs/functions/text_preprocess.py b/advanced_tutorials/llm_pdfs/functions/text_preprocess.py
new file mode 100644
index 00000000..47cbd21d
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/functions/text_preprocess.py
@@ -0,0 +1,73 @@
+import pandas as pd
+from typing import List
+
+def split_page(document: str) -> List[str]:
+    """
+    Splits a document into a list of paragraphs based on newline characters.
+
+    Parameters:
+    - document (str): The input document to be split.
+
+    Returns:
+    - List[str]: A list of paragraphs.
+    """
+    return document.split('\n \n')
+
+
+def get_paragraphs(data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Explodes the 'text' column in the DataFrame, adds a 'paragraph' column indicating the index
+    of the element in the list grouped by file_name and page_number.
+
+    Parameters:
+    - data (pd.DataFrame): The input DataFrame containing 'file_name', 'page_number', and 'text' columns.
+
+    Returns:
+    - pd.DataFrame: The modified DataFrame with an added 'paragraph' column.
+    """
+    # Explode the list to separate rows
+    data_text_exploded = data.explode('text')
+
+    # Add a 'paragraph' column indicating the index of the element in the list
+    data_text_exploded['paragraph'] = data_text_exploded.groupby(
+        ['file_name', 'page_number']
+    ).cumcount() + 1
+
+    return data_text_exploded
+
+
+def process_text_data(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Processes text data by applying the split_page, get_paragraphs functions.
+
+    Parameters:
+    - df (pd.DataFrame): The input DataFrame containing 'file_name' and 'text' columns.
+
+    Returns:
+    - pd.DataFrame: The processed DataFrame with 'file_name', 'page_number', 'paragraph', and 'text' columns.
+    """
+    # Apply split_page function to split text into paragraphs
+    df['text'] = df['text'].apply(split_page)
+
+    # Apply get_paragraphs function to explode the list and add paragraph numbers
+    df = get_paragraphs(df)
+
+    # Apply strip to remove leading and trailing spaces
+    df['text'] = df['text'].str.strip()
+
+    # Filter rows where the length of the 'text' column is greater than 500
+    df = df[df['text'].str.len() > 500]
+
+    # Set a regex pattern to identify rows with 5 or more consecutive dots or dashes
+    pattern_to_remove = r'(\.{5,}|\-{5,})'
+
+    # Remove rows matching the pattern
+    df_filtered = df[~df['text'].str.contains(pattern_to_remove, regex=True)]
+
+    # Reset index
+    df_filtered.reset_index(drop=True, inplace=True)
+
+    # Reorder columns for better readability
+    df_filtered = df_filtered[['file_name', 'file_link', 'page_number', 'paragraph', 'text']]
+
+    return df_filtered
diff --git a/advanced_tutorials/llm_pdfs/requirements.txt b/advanced_tutorials/llm_pdfs/requirements.txt
new file mode 100644
index 00000000..8c00f616
--- /dev/null
+++ b/advanced_tutorials/llm_pdfs/requirements.txt
@@ -0,0 +1,22 @@
+google-api-python-client==2.114.0
+httplib2==0.22.0
+oauth2client==4.1.3
+pydrive==1.3.1
+PyPDF2==3.0.1
+pandas==2.1.4
+sentence-transformers==2.2.2
+accelerate==0.26.1
+peft==0.7.1
+bitsandbytes==0.40.2
+transformers==4.36.2
+flask-sqlalchemy==3.1.1
+trl==0.7.9
+langchain==0.1.1
+pyopenssl==23.3.0
+FlagEmbedding
+streamlit==1.30.0
+openai==1.9.0
+getpass4==0.0.14.1
+json_repair==0.6.1
+protobuf==3.20.0
+hopsworks