From 4899efe30e2605d30854567a799822dbef86212b Mon Sep 17 00:00:00 2001
From: Surya Prakash Pathak <supathak@redhat.com>
Date: Wed, 14 Feb 2024 08:31:25 -0800
Subject: [PATCH] Code generation app demo  (#38)

* code gen app

* Updated nb

* Added containerfile, requirements.txt and yaml file

* Updated Readme.md file

* Addressed PR feedback

* Updated prompt

* Updated prompt
---
 code-generation/README.md               | 35 ++++++++++++++++++++
 code-generation/ai-studio.yaml          | 20 ++++++++++++
 code-generation/builds/Containerfile    |  9 ++++++
 code-generation/builds/requirements.txt |  3 ++
 code-generation/codegen-app.py          | 43 +++++++++++++++++++++++++
 5 files changed, 110 insertions(+)
 create mode 100644 code-generation/README.md
 create mode 100644 code-generation/ai-studio.yaml
 create mode 100644 code-generation/builds/Containerfile
 create mode 100644 code-generation/builds/requirements.txt
 create mode 100644 code-generation/codegen-app.py
diff --git a/code-generation/README.md b/code-generation/README.md
new file mode 100644
index 00000000..ed97b0aa
--- /dev/null
+++ b/code-generation/README.md
@@ -0,0 +1,35 @@
+# Code Generation
+
+This example will deploy a local code-gen application using a llama.cpp model server and a python app built with langchain.  
+
+### Download Model
+
+- **codellama**
+	- Download URL: `wget https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf` 
+
+```
+cd ../models
+wget <Download URL>
+cd ../
+```
+
+### Deploy Model Service
+
+To start the model service, refer to [the playground model-service document](../playground/README.md). Deploy the LLM server and volumn mount the model of choice.
+
+```
+podman run --rm -it -d \
+        -p 8001:8001 \
+        -v Local/path/to/locallm/models:/locallm/models:ro,Z \
+        -e MODEL_PATH=models/<model-filename> \
+        -e HOST=0.0.0.0 \
+        -e PORT=8001 \
+        playground:image
+```
+
+### Build Container Image
+
+Once the model service is deployed, then follow the instruction below to build your container image and run it locally. 
+
+- `podman build -t codegen-app code-generation -f code-generation/builds/Containerfile`
+- `podman run -it -p 8501:8501 codegen-app -- -m http://10.88.0.1:8001/v1` 
diff --git a/code-generation/ai-studio.yaml b/code-generation/ai-studio.yaml
new file mode 100644
index 00000000..4ea41f08
--- /dev/null
+++ b/code-generation/ai-studio.yaml
@@ -0,0 +1,20 @@
+application:
+  type: language
+  name: codegen-demo
+  description: This is a code-generation demo application. 
+  containers:
+    - name: llamacpp-server
+      contextdir: ../playground
+      containerfile: Containerfile
+      model-service: true
+      backend: 
+        - llama
+      arch:
+        - arm64
+        - amd64
+    - name: codegen-app
+      contextdir: .
+      containerfile: builds/Containerfile
+      arch:
+        - arm64
+        - amd64
diff --git a/code-generation/builds/Containerfile b/code-generation/builds/Containerfile
new file mode 100644
index 00000000..f89db27f
--- /dev/null
+++ b/code-generation/builds/Containerfile
@@ -0,0 +1,9 @@
+FROM registry.access.redhat.com/ubi9/python-39:latest
+
+WORKDIR /code-generation
+COPY builds/requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir --upgrade -r /code-generation/requirements.txt
+COPY codegen-app.py .
+EXPOSE 8501
+ENTRYPOINT ["streamlit", "run", "codegen-app.py"]
\ No newline at end of file
diff --git a/code-generation/builds/requirements.txt b/code-generation/builds/requirements.txt
new file mode 100644
index 00000000..56959687
--- /dev/null
+++ b/code-generation/builds/requirements.txt
@@ -0,0 +1,3 @@
+langchain_openai
+langchain
+streamlit
\ No newline at end of file
diff --git a/code-generation/codegen-app.py b/code-generation/codegen-app.py
new file mode 100644
index 00000000..dfead6dd
--- /dev/null
+++ b/code-generation/codegen-app.py
@@ -0,0 +1,43 @@
+import os
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_community.callbacks import StreamlitCallbackHandler 
+
+import streamlit as st
+
+model_service = os.getenv("MODEL_SERVICE_ENDPOINT", "http://localhost:8001/v1")
+
+st.title("Code Generation App")
+
+if "messages" not in st.session_state:
+    st.session_state["messages"] = [{"role": "assistant", 
+                                     "content": "How can I help you?"}]
+    
+for msg in st.session_state.messages:
+    st.chat_message(msg["role"]).write(msg["content"])
+
+llm = ChatOpenAI(base_url=model_service, 
+                 api_key="EMPTY",
+                 streaming=True)
+
+# Define the Langchain chain
+prompt = ChatPromptTemplate.from_template("""You are an helpful code assistant that can help developer to code for a given {input}. 
+                                          Generate the code block at first, and explain the code at the end.
+                                          If the {input} is not making sense, please ask for more clarification.""")
+chain = (
+    {"input": RunnablePassthrough()}
+    | prompt
+    | llm
+)
+
+if prompt := st.chat_input():
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    st.chat_message("user").markdown(prompt)
+    
+    st_callback = StreamlitCallbackHandler(st.container())
+    response = chain.invoke(prompt, {"callbacks": [st_callback]})
+
+    st.chat_message("assistant").markdown(response.content)    
+    st.session_state.messages.append({"role": "assistant", "content": response.content})
+    st.rerun()