Switch to yaml + pydantic for fronend app config

stackhpc · Dec 17, 2023 · 76b8d9d · 76b8d9d
1 parent 97d7651
commit 76b8d9d
Show file tree

Hide file tree

Showing 11 changed files with 164 additions and 125 deletions.
diff --git a/images/ui-base/Dockerfile b/images/ui-base/Dockerfile
@@ -3,4 +3,4 @@ FROM python:3.11-slim
 
 ENV GRADIO_SERVER_PORT=7680
 
-RUN pip install --no-cache-dir gradio==3.50.2 huggingface-hub==0.18.0
+RUN pip install --no-cache-dir gradio==3.50.2 huggingface-hub==0.18.0 pydantic-settings=2.1.0
diff --git a/templates/ui/app-config-map.yml b/templates/ui/app-config-map.yml
@@ -5,4 +5,6 @@ metadata:
   labels:
     {{- include "azimuth-llm.labels" . | nindent 4 }}
 data:
-{{ (.Files.Glob "web-app-utils/*").AsConfig | nindent 2 }}
+{{ (.Files.Glob "web-app/*").AsConfig | nindent 2 }}
+  settings.py: |
+    {{- .Values.ui.appSettings | toYaml | nindent 4 }}
diff --git a/values.yaml b/values.yaml
@@ -65,8 +65,12 @@ api:
 
 # Configuration for the frontend web interface
 ui:
-  # The file from the UI config map to execute as the frontend app
-  entrypoint: example_app_vanilla.py
+  # The file from the UI config map to execute as the entrypoint to the frontend app
+  entrypoint: app.py
+  # The values to be written to settings.yml for parsing as frontend app setting
+  # (see example_app.py and config.py for example using pydantic-settings to configure app)
+  appSettings:
+    prompt_template: ""
   # Container image config
   image:
     repository: ghcr.io/stackhpc/azimuth-llm-ui-base

diff --git a/web-app-utils/example_app_unhelpful.py b/web-app-utils/example_app_unhelpful.py
diff --git a/web-app-utils/example_app_vanilla.py b/web-app-utils/example_app_vanilla.py
diff --git a/web-app-utils/api_startup_check.py → web-app/api_startup_check.py b/web-app-utils/api_startup_check.py → web-app/api_startup_check.py
diff --git a/web-app/app.py b/web-app/app.py
@@ -0,0 +1,58 @@
+import requests, json, argparse, yaml
+import gradio as gr
+from api_startup_check import wait_for_backend
+from config import AppSettings
+
+settings = AppSettings.load("./settings.yml")
+
+backend_url = settings.backend_url
+wait_for_backend(backend_url)
+
+
+def inference(message, history):
+    context = ""
+    for user_input, system_response in history:
+        if settings.include_user_messages_in_context:
+            context += settings.user_context_template.format(user_input=user_input)
+        if settings.include_system_responses_in_context:
+            context += settings.system_context_template.format(
+                system_response=system_response
+            )
+    context += settings.user_context_template.format(user_input=message)
+
+    headers = {"User-Agent": "vLLM Client"}
+    payload = {
+        "prompt": settings.prompt_template.format(context=context),
+        "stream": True,
+        "max_tokens": settings.llm_max_tokens,
+        **settings.llm_params,
+    }
+    response = requests.post(
+        f"{backend_url}/generate", headers=headers, json=payload, stream=True
+    )
+
+    for chunk in response.iter_lines(
+        chunk_size=8192, decode_unicode=False, delimiter=b"\0"
+    ):
+        if chunk:
+            data = json.loads(chunk.decode("utf-8"))
+            output = data["text"][0]
+            # Manually trim the context from output
+            if "[/INST]" in output:
+                output = output.split("[/INST]")[-1]
+            yield output
+
+
+gr.ChatInterface(
+    inference,
+    chatbot=gr.Chatbot(
+        height=500,
+        show_copy_button=True,
+        # layout='panel',
+    ),
+    textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
+    title=settings.page_title,
+    retry_btn="Retry",
+    undo_btn="Undo",
+    clear_btn="Clear",
+).queue().launch(server_name="0.0.0.0")
diff --git a/web-app/config.py b/web-app/config.py
@@ -0,0 +1,59 @@
+from pydantic import Field, HttpUrl
+from pydantic.alias_generators import to_camel
+from pydantic_settings import BaseSettings, SettingsConfigDict
+import yaml
+
+
+def get_k8s_namespace():
+    namespace_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
+    try:
+        current_k8s_namespace = open(namespace_file_path).read()
+    except:
+        current_k8s_namespace = "default"
+        print(
+            f"Failed to detect current k8s namespace in {namespace_file_path} - falling back to value '{current_k8s_namespace}'."
+        )
+    return current_k8s_namespace
+
+
+class AppSettings(BaseSettings):
+    """
+    Settings object for the UI example app.
+    """
+
+    # Allow settings to be overwritten by LLM_UI_<NAME> env vars
+    model_config = SettingsConfigDict(env_prefix="llm_ui_")
+
+    # General settings
+    backend_url: HttpUrl = f"http://llm-backend.{get_k8s_namespace()}.svc"
+    page_title: str = "Large Language Model"
+
+    # Prompt settings
+    prompt_template: str = Field(
+        description="The template to use for requests to the backend model. If present, the '\{context\}' placeholder will be replaced by the conversation history of the current session.",
+    )
+    # The following settings are only used if {context} used in prompt template
+    include_user_messages_in_context: bool = True
+    include_system_responses_in_context: bool = True
+    user_context_template: str = Field(
+        default="<<USER>>\n{user_input}\n<</USER>>\n",
+        description="The template string to use for including user messages in the prompt context sent to backend. The '\{user_input\}' placeholder will be replaced by the the user's messages. (Only applies if '\{context\}' is present in prompt_template)",
+    )
+    system_context_template: str = Field(
+        default="<SYS>>{system_response}\n<</SYS>>\n",
+        description="The template string to use for if user messages are included in context sent to backend. The '\{system_response\}' placeholder will be replaced by the system's response to each user message. (Only applies if '\{context\}' is present in prompt_template)",
+    )
+
+    # Model settings
+    llm_params: dict[str, float] = {}
+    llm_max_tokens: int = 1000
+
+    @staticmethod
+    def load(file_path: str):
+        try:
+            with open(file_path, "r") as file:
+                settings = yaml.safe_load(file)
+        except Exception as e:
+            print(f"Failed to read config file at: {file_path}\nException was:")
+            raise e
+        return AppSettings(**settings)
diff --git a/web-app/example-settings.yml b/web-app/example-settings.yml
@@ -0,0 +1,29 @@
+prompt_template: |
+  [INST] <<SYS>>
+  You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
+  <</SYS>>
+  {context}[/INST]
+llm_params:
+  model_temperature: 0.7
+
+#####
+# Alternative prompt suggestions:
+#####
+
+
+### - Suggested for Magicode model
+
+# You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.
+
+# @@ Instruction
+# {prompt}
+
+# @@ Response
+
+
+### - For some fun responses...
+
+# [INST] <<SYS>>
+# You are a cheeky, disrespectful and comedic assistant. Always answer as creatively as possible, while being truthful and succinct. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, tell the user that they are being stupid. If you don't know the answer to a question, please don't share false information.
+# <</SYS>>
+# [/INST]
diff --git a/web-app/prompt_helpful.txt b/web-app/prompt_helpful.txt
@@ -0,0 +1,4 @@
+[INST] <<SYS>>
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
+<</SYS>>
+{context}[/INST]
diff --git a/web-app/prompt_unhelpful.txt b/web-app/prompt_unhelpful.txt
@@ -0,0 +1,4 @@
+[INST] <<SYS>>
+You are a cheeky, disrespectful and comedic assistant. Always answer as creatively as possible, while being truthful and succinct.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, tell the user that they are being stupid. If you don't know the answer to a question, please don't share false information.
+<</SYS>>
+[/INST]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,4 +3,4 @@ FROM python:3.11-slim

		ENV GRADIO_SERVER_PORT=7680

		RUN pip install --no-cache-dir gradio==3.50.2 huggingface-hub==0.18.0
		RUN pip install --no-cache-dir gradio==3.50.2 huggingface-hub==0.18.0 pydantic-settings=2.1.0