From 82d045c75bbad09fe53bb560373c9e49569c0c1a Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 17:52:14 +0000
Subject: [PATCH 01/34] Refactor basic chat web app and add image analysis app

---
 .gitignore                                    |   6 +
 .../{chat-interface => basic-chat}/Dockerfile |  10 +-
 .../{chat-interface => basic-chat}/app.py     | 166 +++++++++++++-----
 web-apps/basic-chat/defaults.yml              |  31 ++++
 .../gradio-client-test.py                     |   0
 .../requirements.txt                          |   2 +-
 web-apps/build.sh                             |   6 +-
 web-apps/chat-interface/config.py             |  97 ----------
 web-apps/chat-interface/defaults.yml          |  36 ----
 web-apps/image-analysis/Dockerfile            |  18 ++
 web-apps/image-analysis/app.py                | 121 +++++++++++++
 web-apps/image-analysis/defaults.yml          |  36 ++++
 web-apps/image-analysis/requirements.txt      |   6 +
 .../purge-google-fonts.sh                     |   0
 web-apps/run.sh                               |   4 +-
 web-apps/utils/setup.py                       |   8 +
 web-apps/utils/utils.py                       |  93 ++++++++++
 17 files changed, 450 insertions(+), 190 deletions(-)
 rename web-apps/{chat-interface => basic-chat}/Dockerfile (61%)
 rename web-apps/{chat-interface => basic-chat}/app.py (54%)
 create mode 100644 web-apps/basic-chat/defaults.yml
 rename web-apps/{chat-interface => basic-chat}/gradio-client-test.py (100%)
 rename web-apps/{chat-interface => basic-chat}/requirements.txt (78%)
 delete mode 100644 web-apps/chat-interface/config.py
 delete mode 100644 web-apps/chat-interface/defaults.yml
 create mode 100644 web-apps/image-analysis/Dockerfile
 create mode 100644 web-apps/image-analysis/app.py
 create mode 100644 web-apps/image-analysis/defaults.yml
 create mode 100644 web-apps/image-analysis/requirements.txt
 rename web-apps/{chat-interface => }/purge-google-fonts.sh (100%)
 create mode 100644 web-apps/utils/setup.py
 create mode 100644 web-apps/utils/utils.py

diff --git a/.gitignore b/.gitignore
index 7d21b1b..9a57ecc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,9 @@ test-values.y[a]ml
 # Helm chart stuff
 chart/Chart.lock
 chart/charts
+
+# Python stuff
+**/build/
+**/*.egg-info/
+**/flagged/
+web-apps/**/overrides.yml
diff --git a/web-apps/chat-interface/Dockerfile b/web-apps/basic-chat/Dockerfile
similarity index 61%
rename from web-apps/chat-interface/Dockerfile
rename to web-apps/basic-chat/Dockerfile
index 803d58f..7e9ed84 100644
--- a/web-apps/chat-interface/Dockerfile
+++ b/web-apps/basic-chat/Dockerfile
@@ -1,6 +1,9 @@
 FROM python:3.11-slim
 
-COPY requirements.txt requirements.txt
+ARG DIR=chat-interface
+
+COPY $DIR/requirements.txt requirements.txt
+COPY utils utils
 RUN pip install --no-cache-dir -r requirements.txt
 
 COPY purge-google-fonts.sh purge-google-fonts.sh
@@ -8,9 +11,8 @@ RUN bash purge-google-fonts.sh
 
 WORKDIR /app
 
-COPY *.py .
+COPY $DIR/*.py .
 
-COPY defaults.yml .
-# COPY overrides.yml .
+COPY $DIR/defaults.yml .
 
 ENTRYPOINT ["python3", "app.py"]
diff --git a/web-apps/chat-interface/app.py b/web-apps/basic-chat/app.py
similarity index 54%
rename from web-apps/chat-interface/app.py
rename to web-apps/basic-chat/app.py
index 3ead467..dbfae8f 100644
--- a/web-apps/chat-interface/app.py
+++ b/web-apps/basic-chat/app.py
@@ -1,23 +1,77 @@
-import sys
 import logging
+import openai
+
 import gradio as gr
-from urllib.parse import urljoin
-from config import AppSettings
 
+from urllib.parse import urljoin
 from langchain.schema import HumanMessage, AIMessage, SystemMessage
 from langchain_openai import ChatOpenAI
-import openai
+from typing import Dict, List
+from pydantic import BaseModel, ConfigDict
+from utils import LLMParams, load_settings
 
 logging.basicConfig()
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
-logger.info("Starting app")
 
-settings = AppSettings.load()
-if len(sys.argv) > 1:
-    settings.hf_model_name = sys.argv[1]
-logger.info("App settings: %s", settings)
+class AppSettings(BaseModel):
+    # Basic config
+    host_address: str
+    backend_url: str
+    model_name: str
+    model_instruction: str
+    page_title: str
+    llm_params: LLMParams
+    # Theme customisation
+    theme_params: Dict[str, str | list]
+    theme_params_extended: Dict[str, str]
+    css_overrides: str | None
+    custom_javascript: str | None
+    # Error on typos and suppress warnings for fields with 'model_' prefix
+    model_config = ConfigDict(protected_namespaces=(), extra="forbid")
+
+
+# class AppSettings(BaseModel):
+#     hf_model_name: str = Field(
+#         description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend",
+#     )
+#     backend_url: HttpUrl = Field(
+#         description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)"
+#     )
+#     page_title: str = Field(default="Large Language Model")
+#     page_description: Optional[str] = Field(default=None)
+#     hf_model_instruction: str = Field(
+#         default="You are a helpful and cheerful AI assistant. Please respond appropriately."
+#     )
+
+#     # Model settings
+
+#     # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html
+#     # which is based on https://platform.openai.com/docs/api-reference/completions/create
+#     llm_max_tokens: int = Field(default=500)
+#     llm_temperature: float = Field(default=0)
+#     llm_top_p: float = Field(default=1)
+#     llm_top_k: float = Field(default=-1)
+#     llm_presence_penalty: float = Field(default=0, ge=-2, le=2)
+#     llm_frequency_penalty: float = Field(default=0, ge=-2, le=2)
+
+#     # UI theming
+
+#     # Variables explicitly passed to gradio.theme.Default()
+#     # For example:
+#     # {"primary_hue": "red"}
+#     theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict)
+#     # Overrides for theme.body_background_fill property
+#     theme_background_colour: Optional[str] = Field(default=None)
+#     # Provides arbitrary CSS and JS overrides to the UI,
+#     # see https://www.gradio.app/guides/custom-CSS-and-JS
+#     css_overrides: Optional[str] = Field(default=None)
+#     custom_javascript: Optional[str] = Field(default=None)
+
+
+settings = AppSettings(**load_settings())
+logger.info(settings)
 
 backend_url = str(settings.backend_url)
 backend_health_endpoint = urljoin(backend_url, "/health")
@@ -36,29 +90,19 @@ class PossibleSystemPromptException(Exception):
 
 llm = ChatOpenAI(
     base_url=urljoin(backend_url, "v1"),
-    model=settings.hf_model_name,
+    model=settings.model_name,
     openai_api_key="required-but-not-used",
-    temperature=settings.llm_temperature,
-    max_tokens=settings.llm_max_tokens,
-    # model_kwargs={
-    #     "top_p": settings.llm_top_p,
-    #     "frequency_penalty": settings.llm_frequency_penalty,
-    #     "presence_penalty": settings.llm_presence_penalty,
-    #     # Additional parameters supported by vLLM but not OpenAI API
-    #     # https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters
-    #     "extra_body": {
-    #         "top_k": settings.llm_top_k,
-    #     }
-    top_p=settings.llm_top_p,
-    frequency_penalty=settings.llm_frequency_penalty,
-    presence_penalty=settings.llm_presence_penalty,
-    # Additional parameters supported by vLLM but not OpenAI API
-    # https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters
+    temperature=settings.llm_params.temperature,
+    max_tokens=settings.llm_params.max_tokens,
+    top_p=settings.llm_params.top_p,
+    frequency_penalty=settings.llm_params.frequency_penalty,
+    presence_penalty=settings.llm_params.presence_penalty,
     extra_body={
-        "top_k": settings.llm_top_k,
+        "top_k": settings.llm_params.top_k,
     },
     streaming=True,
 )
+logger.info(llm)
 
 
 def inference(latest_message, history):
@@ -67,13 +111,13 @@ def inference(latest_message, history):
 
     try:
         if INCLUDE_SYSTEM_PROMPT:
-            context = [SystemMessage(content=settings.hf_model_instruction)]
+            context = [SystemMessage(content=settings.model_instruction)]
         else:
             context = []
         for i, (human, ai) in enumerate(history):
             if not INCLUDE_SYSTEM_PROMPT and i == 0:
                 # Mimic system prompt by prepending it to first human message
-                human = f"{settings.hf_model_instruction}\n\n{human}"
+                human = f"{settings.model_instruction}\n\n{human}"
             context.append(HumanMessage(content=human))
             context.append(AIMessage(content=(ai or "")))
         context.append(HumanMessage(content=latest_message))
@@ -131,8 +175,8 @@ def inference(latest_message, history):
 
 # UI theming
 theme = gr.themes.Default(**settings.theme_params)
-if settings.theme_background_colour:
-    theme.body_background_fill = settings.theme_background_colour
+theme.set(**settings.theme_params_extended)
+# theme.set(text)
 
 
 def inference_wrapper(*args):
@@ -153,7 +197,7 @@ def inference_wrapper(*args):
 
 
 # Build main chat interface
-with gr.ChatInterface(
+app = gr.ChatInterface(
     inference_wrapper,
     chatbot=gr.Chatbot(
         # Height of conversation window in CSS units (string) or pixels (int)
@@ -167,7 +211,6 @@ def inference_wrapper(*args):
         scale=7,
     ),
     title=settings.page_title,
-    description=settings.page_description,
     retry_btn="Retry",
     undo_btn="Undo",
     clear_btn="Clear",
@@ -175,16 +218,47 @@ def inference_wrapper(*args):
     theme=theme,
     css=settings.css_overrides,
     js=settings.custom_javascript,
-) as app:
-    logger.debug("Gradio chat interface config: %s", app.config)
-    # For running locally in tilt dev setup
-    if len(sys.argv) > 2 and sys.argv[2] == "localhost":
-        app.launch()
-    # For running on cluster
-    else:
-        app.queue(
-            # Allow 10 concurrent requests to backend
-            # vLLM backend should be clever enough to
-            # batch these requests appropriately.
-            default_concurrency_limit=10,
-        ).launch(server_name="0.0.0.0")
+)
+logger.debug("Gradio chat interface config: %s", app.config)
+app.queue(
+    # Allow 10 concurrent requests to backend
+    # vLLM backend should be clever enough to
+    # batch these requests appropriately.
+    default_concurrency_limit=10,
+).launch(server_name=settings.host_address)
+
+# with gr.ChatInterface(
+#     inference_wrapper,
+#     chatbot=gr.Chatbot(
+#         # Height of conversation window in CSS units (string) or pixels (int)
+#         height="68vh",
+#         show_copy_button=True,
+#     ),
+#     textbox=gr.Textbox(
+#         placeholder="Ask me anything...",
+#         container=False,
+#         # Ratio of text box to submit button width
+#         scale=7,
+#     ),
+#     title=settings.page_title,
+#     description=settings.page_description,
+#     retry_btn="Retry",
+#     undo_btn="Undo",
+#     clear_btn="Clear",
+#     analytics_enabled=False,
+#     theme=theme,
+#     css=settings.css_overrides,
+#     js=settings.custom_javascript,
+# ) as app:
+#     logger.debug("Gradio chat interface config: %s", app.config)
+#     # For running locally in tilt dev setup
+#     if len(sys.argv) > 2 and sys.argv[2] == "localhost":
+#         app.launch()
+#     # For running on cluster
+#     else:
+#         app.queue(
+#             # Allow 10 concurrent requests to backend
+#             # vLLM backend should be clever enough to
+#             # batch these requests appropriately.
+#             default_concurrency_limit=10,
+#         ).launch(server_name=settings.host_address)
diff --git a/web-apps/basic-chat/defaults.yml b/web-apps/basic-chat/defaults.yml
new file mode 100644
index 0000000..83f0e46
--- /dev/null
+++ b/web-apps/basic-chat/defaults.yml
@@ -0,0 +1,31 @@
+
+model_name:
+model_instruction: "You are a helpful and cheerful AI assistant. Please respond appropriately."
+backend_url:
+host_address: 0.0.0.0
+
+page_title: Large Language Model
+
+# LLM request parameters
+# See https://platform.openai.com/docs/api-reference/chat/create
+# and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters
+llm_params:
+  max_tokens:
+  temperature: 0
+  top_p:
+  top_k:
+  frequency_penalty:
+  presence_penalty:
+
+# Gradio theme constructor parameters (e.g. 'primary_hue')
+# See https://www.gradio.app/guides/theming-guide
+theme_params: {}
+
+# Gradio theme .set(...) parameters
+# See https://www.gradio.app/guides/theming-guide#extending-themes-via-set
+theme_params_extended: {}
+
+# Additional CSS and JS overrides
+# See https://www.gradio.app/guides/custom-CSS-and-JS
+css_overrides:
+custom_javascript:
diff --git a/web-apps/chat-interface/gradio-client-test.py b/web-apps/basic-chat/gradio-client-test.py
similarity index 100%
rename from web-apps/chat-interface/gradio-client-test.py
rename to web-apps/basic-chat/gradio-client-test.py
diff --git a/web-apps/chat-interface/requirements.txt b/web-apps/basic-chat/requirements.txt
similarity index 78%
rename from web-apps/chat-interface/requirements.txt
rename to web-apps/basic-chat/requirements.txt
index 3f34151..f37169c 100644
--- a/web-apps/chat-interface/requirements.txt
+++ b/web-apps/basic-chat/requirements.txt
@@ -4,4 +4,4 @@ openai
 langchain
 langchain_openai
 pydantic
-pydantic_settings
+-e ../utils
diff --git a/web-apps/build.sh b/web-apps/build.sh
index 5fe3c98..0dd5d4a 100755
--- a/web-apps/build.sh
+++ b/web-apps/build.sh
@@ -2,14 +2,12 @@
 set -e
 
 build() {
-    pushd $1 > /dev/null
-    if [[ -f Dockerfile ]]; then
+    if [[ -f $1/Dockerfile ]]; then
         echo Building $1 docker image
-        docker build . -t ghcr.io/stackhpc/azimuth-llm-$1
+        docker build . -t ghcr.io/stackhpc/azimuth-llm-$1 -f $1/Dockerfile
     else
         echo No Dockerfile found for $1
     fi
-    popd > /dev/null
 }
 
 # If a single app is provided as a
diff --git a/web-apps/chat-interface/config.py b/web-apps/chat-interface/config.py
deleted file mode 100644
index 8592884..0000000
--- a/web-apps/chat-interface/config.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import logging
-import yaml
-from pydantic import Field, HttpUrl
-from pydantic_settings import BaseSettings, SettingsConfigDict
-
-from typing import Optional, Union, List
-
-logging.basicConfig()
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-
-NAMESPACE_FILE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
-def get_k8s_namespace():
-    try:
-        current_k8s_namespace = open(NAMESPACE_FILE_PATH).read()
-        return current_k8s_namespace
-    except FileNotFoundError as err:
-        return None
-
-def default_backend():
-    k8s_ns = get_k8s_namespace()
-    if k8s_ns:
-        return f"http://llm-backend.{k8s_ns}.svc"
-    else:
-        logger.warning('Failed to determine k8s namespace from %s - assuming non-kubernetes environment.', NAMESPACE_FILE_PATH)
-
-
-class AppSettings(BaseSettings):
-    """
-    Settings object for the UI example app.
-    """
-
-    # # Allow settings to be overwritten by LLM_UI_<NAME> env vars
-    # model_config = SettingsConfigDict(env_prefix="llm_ui_")
-
-    # General settings
-    hf_model_name: str = Field(
-        description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend",
-    )
-    backend_url: HttpUrl = Field(
-        description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)"
-    )
-    page_title: str = Field(default="Large Language Model")
-    page_description: Optional[str] = Field(default=None)
-    hf_model_instruction: str = Field(
-        default="You are a helpful and cheerful AI assistant. Please respond appropriately."
-    )
-
-    # Model settings
-
-    # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html
-    # which is based on https://platform.openai.com/docs/api-reference/completions/create
-    llm_max_tokens: int = Field(default=500)
-    llm_temperature: float = Field(default=0)
-    llm_top_p: float = Field(default=1)
-    llm_top_k: float = Field(default=-1)
-    llm_presence_penalty: float = Field(default=0, ge=-2, le=2)
-    llm_frequency_penalty: float = Field(default=0, ge=-2, le=2)
-
-    # UI theming
-
-    # Variables explicitly passed to gradio.theme.Default()
-    # For example:
-    # {"primary_hue": "red"}
-    theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict)
-    # Overrides for theme.body_background_fill property
-    theme_background_colour: Optional[str] = Field(default=None)
-    # Provides arbitrary CSS and JS overrides to the UI,
-    # see https://www.gradio.app/guides/custom-CSS-and-JS
-    css_overrides: Optional[str] = Field(default=None)
-    custom_javascript: Optional[str] = Field(default=None)
-
-
-    # Method for loading settings from files
-    @staticmethod
-    def _load_yaml(file_path: str):
-        with open(file_path, "r") as file:
-            content = yaml.safe_load(file) or {}
-            return content
-
-    @staticmethod
-    def load():
-        defaults = AppSettings._load_yaml('./defaults.yml')
-        overrides = {}
-        try:
-            overrides = AppSettings._load_yaml('/etc/web-app/overrides.yml')
-        except FileNotFoundError:
-            pass
-        settings = {**defaults, **overrides}
-        # Sanity checks on settings
-        if 'backend_url' not in settings:
-            in_cluster_backend = default_backend()
-            if not in_cluster_backend:
-                raise Exception('Backend URL must be provided in settings when running this app outside of Kubernetes')
-            settings['backend_url'] = in_cluster_backend
-        return AppSettings(**settings)
diff --git a/web-apps/chat-interface/defaults.yml b/web-apps/chat-interface/defaults.yml
deleted file mode 100644
index 9520b39..0000000
--- a/web-apps/chat-interface/defaults.yml
+++ /dev/null
@@ -1,36 +0,0 @@
-
-hf_model_name: "microsoft/Phi-3.5-mini-instruct"
-hf_model_instruction: "You are a pirate"
-
-# UI theming tweaks
-# css_overrides: |
-#   h1 {
-#       color: white;
-#       padding-top: 1em;
-#   }
-#   a {
-#       color: yellow;
-#   }
-# theme_background_colour: "#00376c"
-# theme_params:
-# #   primary_hue: blue
-#   font:
-#   - sans-serif
-#   font_mono:
-#   - sans-serif
-
-# custom_javascript: |
-#   function addPrivacyStatement() {
-#       var footer = document.querySelector('footer');
-#       footer.appendChild(footer.children[1].cloneNode(deep=true));
-#       var item = footer.children[2].cloneNode();
-#       item.href = 'https://google.com';
-#       item.textContent = 'Privacy Statement';
-#       footer.appendChild(item);
-#   }
-
-# llm_max_tokens:
-# llm_temperature:
-# llm_top_p:
-# llm_frequency_penalty:
-# llm_presence_penalty:
diff --git a/web-apps/image-analysis/Dockerfile b/web-apps/image-analysis/Dockerfile
new file mode 100644
index 0000000..acc8559
--- /dev/null
+++ b/web-apps/image-analysis/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.11-slim
+
+ARG DIR=image-interface
+
+COPY $DIR/requirements.txt requirements.txt
+COPY utils utils
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY purge-google-fonts.sh purge-google-fonts.sh
+RUN bash purge-google-fonts.sh
+
+WORKDIR /app
+
+COPY $DIR/*.py .
+
+COPY $DIR/defaults.yml .
+
+ENTRYPOINT ["python3", "app.py"]
diff --git a/web-apps/image-analysis/app.py b/web-apps/image-analysis/app.py
new file mode 100644
index 0000000..77cda84
--- /dev/null
+++ b/web-apps/image-analysis/app.py
@@ -0,0 +1,121 @@
+import base64
+import logging
+import requests
+
+import gradio as gr
+
+from typing import List, Dict
+from io import BytesIO
+from PIL import Image
+from pydantic import BaseModel, ConfigDict
+from urllib.parse import urljoin
+
+from utils import load_settings, LLMParams
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class PromptExample(BaseModel):
+    image_url: str
+    prompt: str
+
+
+class AppSettings(BaseModel):
+    # Basic config
+    host_address: str
+    backend_url: str
+    model_name: str
+    page_title: str
+    page_description: str
+    examples: List[PromptExample]
+    llm_params: LLMParams
+    # Theme customisation
+    theme_params: Dict[str, str | list]
+    theme_params_extended: Dict[str, str]
+    css_overrides: str | None
+    custom_javascript: str | None
+    # Error on typos and suppress warnings for fields with 'model_' prefix
+    model_config = ConfigDict(protected_namespaces=(), extra="forbid")
+
+
+settings = AppSettings(**load_settings())
+logger.info(settings)
+
+
+# TODO: Rewrite this to stream output?
+def analyze_image(image_url, prompt):
+    try:
+        # Download the image
+        response = requests.get(image_url)
+        response.raise_for_status()
+        image = Image.open(BytesIO(response.content))
+
+        # Convert image to base64
+        buffered = BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+
+        # Prepare the payload for the vision model
+        payload = {
+            "model": settings.model_name,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/png;base64,{img_str}"},
+                        },
+                    ],
+                }
+            ],
+            **{k: v for k, v in settings.llm_params if k != "top_k" and v is not None},
+        }
+        if settings.llm_params.top_k:
+            payload["extra_body"] = {
+                "top_k": settings.llm_params.top_k,
+            }
+
+        # Make the API call to the vision model
+        headers = {"Content-Type": "application/json"}
+        response = requests.post(
+            urljoin(settings.backend_url, "/v1/chat/completions"),
+            json=payload,
+            headers=headers,
+        )
+        response.raise_for_status()
+
+        # Extract and return the model's response
+        result = response.json()
+        return result["choices"][0]["message"]["content"]
+
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+
+
+# UI theming
+theme = gr.themes.Default(**settings.theme_params)
+theme.set(**settings.theme_params_extended)
+
+# Set up the Gradio interface
+app = gr.Interface(
+    fn=analyze_image,
+    inputs=[
+        gr.Textbox(label="Image URL"),
+        gr.Textbox(label="Prompt/Question", elem_id="prompt", scale=2),
+    ],
+    outputs=gr.Textbox(label="Results"),
+    title=settings.page_title,
+    description=settings.page_description,
+    examples=[[ex.image_url, ex.prompt] for ex in settings.examples],
+    theme=theme,
+    css=settings.css_overrides,
+    js=settings.custom_javascript,
+    analytics_enabled=False,
+)
+
+# Launch the interface
+app.queue(default_concurrency_limit=10).launch(server_name=settings.host_address)
diff --git a/web-apps/image-analysis/defaults.yml b/web-apps/image-analysis/defaults.yml
new file mode 100644
index 0000000..e6f2791
--- /dev/null
+++ b/web-apps/image-analysis/defaults.yml
@@ -0,0 +1,36 @@
+
+model_name:
+backend_url:
+host_address: 0.0.0.0
+
+page_title: Image analysis with a vision model
+page_description: This model can be used to analyse image files.
+
+# Example inputs to render in the UI
+examples:
+  - image_url: https://www.myplace.de/sites/default/files/styles/blog_hero_bild_slideshow/public/blog/Platzprofessor-MyPlace-SelfStorage-Die-Stadt-als-Raum-der-Begegnung-H.jpg?itok=ibY2Hoy9
+    prompt: Conduct a detailed image analysis and describe all parts of the image that you can identify. Count all occurrences of the entities, which you can identify. Make a guess about the provenance or location of the image.
+
+# LLM request parameters
+# See https://platform.openai.com/docs/api-reference/chat/create
+# and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters
+llm_params:
+  max_tokens:
+  temperature:
+  top_p:
+  top_k:
+  frequency_penalty:
+  presence_penalty:
+
+# Gradio theme constructor parameters (e.g. 'primary_hue')
+# See https://www.gradio.app/guides/theming-guide
+theme_params: {}
+
+# Gradio theme .set(...) parameters
+# See https://www.gradio.app/guides/theming-guide#extending-themes-via-set
+theme_params_extended: {}
+
+# Additional CSS and JS overrides
+# See https://www.gradio.app/guides/custom-CSS-and-JS
+css_overrides:
+custom_javascript:
diff --git a/web-apps/image-analysis/requirements.txt b/web-apps/image-analysis/requirements.txt
new file mode 100644
index 0000000..9196eda
--- /dev/null
+++ b/web-apps/image-analysis/requirements.txt
@@ -0,0 +1,6 @@
+pillow
+requests
+gradio<5
+gradio_client
+-e ../utils
+pydantic
diff --git a/web-apps/chat-interface/purge-google-fonts.sh b/web-apps/purge-google-fonts.sh
similarity index 100%
rename from web-apps/chat-interface/purge-google-fonts.sh
rename to web-apps/purge-google-fonts.sh
diff --git a/web-apps/run.sh b/web-apps/run.sh
index 5baa0c6..e6e20b5 100755
--- a/web-apps/run.sh
+++ b/web-apps/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -e
 
-IMAGE_TAG=azimuth-llm-$1
+IMAGE_TAG=ghcr.io/stackhpc/azimuth-llm-$1
 
 error() {
     echo $1
@@ -18,4 +18,4 @@ else
     echo "Found local $IMAGE_TAG docker image"
 fi
 
-docker run -p 7860:7860 $IMAGE_TAG
+docker run --rm -p 7860:7860 $IMAGE_TAG
diff --git a/web-apps/utils/setup.py b/web-apps/utils/setup.py
new file mode 100644
index 0000000..515d709
--- /dev/null
+++ b/web-apps/utils/setup.py
@@ -0,0 +1,8 @@
+from setuptools import setup, find_packages
+
+setup(
+   name='web-app-utils',
+   version='0.0.1',
+   py_modules=["utils"],
+   requires=["pydantic"]
+)
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
new file mode 100644
index 0000000..252ccf4
--- /dev/null
+++ b/web-apps/utils/utils.py
@@ -0,0 +1,93 @@
+#####
+# Shared utility functions and models for re-use by multiple web apps
+#####
+
+import logging
+import pathlib
+import yaml
+from typing import Annotated
+from pydantic import BaseModel, ConfigDict, PositiveInt, Field
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class LLMParams(BaseModel):
+    """
+    Parameters for vLLM API requests. For details see
+    https://platform.openai.com/docs/api-reference/chat/create
+    https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters
+    """
+
+    max_tokens: PositiveInt | None
+    temperature: Annotated[float, Field(ge=0, le=2)] | None
+    top_p: Annotated[float, Field(gt=0, le=1)] | None
+    top_k: Annotated[int, Field(ge=-1)] | None
+    frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None
+    presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None
+    # Make sure we can't smuggle in extra request params / typos
+    model_config = ConfigDict(extra="forbid")
+
+
+NAMESPACE_FILE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
+
+
+def get_k8s_namespace():
+    try:
+        current_k8s_namespace = open(NAMESPACE_FILE_PATH).read()
+        return current_k8s_namespace
+    except FileNotFoundError as err:
+        return None
+
+
+def api_address_in_cluster():
+    k8s_ns = get_k8s_namespace()
+    if k8s_ns:
+        return f"http://llm-backend.{k8s_ns}.svc"
+    else:
+        logger.warning(
+            "Failed to determine k8s namespace from %s - assuming non-kubernetes environment.",
+            NAMESPACE_FILE_PATH,
+        )
+
+
+# Method for loading settings from files
+def load_yaml(file_path: str) -> dict:
+    with open(file_path, "r") as file:
+        content = yaml.safe_load(file) or {}
+        return content
+
+
+def load_settings() -> dict:
+
+    defaults = load_yaml("./defaults.yml")
+    overrides = {}
+    # Path must match the one used in the Helm chart's
+    # app-config-map.yml template
+    path = pathlib.Path("/etc/web-app/overrides.yml")
+    if path.exists():
+        overrides = load_yaml(path)
+    else:
+        # Allow local overrides for dev/testing
+        path = pathlib.Path("./overrides.yml")
+        if path.exists():
+            overrides = load_yaml(path)
+
+    # Sanity checks on settings
+    unused_overrides = [k for k in overrides.keys() if k not in defaults.keys()]
+    if unused_overrides:
+        logger.warning(
+            f"Overrides {unused_overrides} not part of default settings so may be ignored."
+            "Please check for typos"
+        )
+    settings = {**defaults, **overrides}
+    if "backend_url" not in settings or not settings["backend_url"]:
+        # Try to detect in-cluster address
+        in_cluster_backend = api_address_in_cluster()
+        if not in_cluster_backend:
+            raise Exception(
+                "Backend URL must be provided in settings when running outside of Kubernetes."
+            )
+        settings["backend_url"] = in_cluster_backend
+    return settings

From 7ecfb9250b25b8e9b8a1620738e02065bed12a13 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 17:55:57 +0000
Subject: [PATCH 02/34] Update image build matrix

---
 .github/workflows/build-push-artifacts.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index 10d5c1e..c1e2e74 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -39,7 +39,8 @@ jobs:
     strategy:
       matrix:
         include:
-          - component: chat-interface
+          - component: basic-chat
+          - component: image-analysis
     steps:
       - name: Check out the repository
         uses: actions/checkout@v4

From 1596c08b82faca87af58ecfcfe9826f72021ff7d Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 17:57:17 +0000
Subject: [PATCH 03/34] Disable change check for testing

---
 .github/workflows/build-push-artifacts.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index c1e2e74..d0cff1d 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -35,7 +35,7 @@ jobs:
     name: Build and push images
     runs-on: ubuntu-latest
     needs: changes
-    if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
+    # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
     strategy:
       matrix:
         include:

From 6ff5623b65df1f7df7e2cdef174169b7a91d43e6 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 18:00:53 +0000
Subject: [PATCH 04/34] Rename web apps and dockerfile targets

---
 web-apps/image-analysis/Dockerfile                       | 2 +-
 web-apps/{basic-chat => text-chat}/Dockerfile            | 2 +-
 web-apps/{basic-chat => text-chat}/app.py                | 0
 web-apps/{basic-chat => text-chat}/defaults.yml          | 0
 web-apps/{basic-chat => text-chat}/gradio-client-test.py | 0
 web-apps/{basic-chat => text-chat}/requirements.txt      | 0
 6 files changed, 2 insertions(+), 2 deletions(-)
 rename web-apps/{basic-chat => text-chat}/Dockerfile (93%)
 rename web-apps/{basic-chat => text-chat}/app.py (100%)
 rename web-apps/{basic-chat => text-chat}/defaults.yml (100%)
 rename web-apps/{basic-chat => text-chat}/gradio-client-test.py (100%)
 rename web-apps/{basic-chat => text-chat}/requirements.txt (100%)

diff --git a/web-apps/image-analysis/Dockerfile b/web-apps/image-analysis/Dockerfile
index acc8559..5f858f8 100644
--- a/web-apps/image-analysis/Dockerfile
+++ b/web-apps/image-analysis/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11-slim
 
-ARG DIR=image-interface
+ARG DIR=image-analysis
 
 COPY $DIR/requirements.txt requirements.txt
 COPY utils utils
diff --git a/web-apps/basic-chat/Dockerfile b/web-apps/text-chat/Dockerfile
similarity index 93%
rename from web-apps/basic-chat/Dockerfile
rename to web-apps/text-chat/Dockerfile
index 7e9ed84..294fecd 100644
--- a/web-apps/basic-chat/Dockerfile
+++ b/web-apps/text-chat/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11-slim
 
-ARG DIR=chat-interface
+ARG DIR=text-chat
 
 COPY $DIR/requirements.txt requirements.txt
 COPY utils utils
diff --git a/web-apps/basic-chat/app.py b/web-apps/text-chat/app.py
similarity index 100%
rename from web-apps/basic-chat/app.py
rename to web-apps/text-chat/app.py
diff --git a/web-apps/basic-chat/defaults.yml b/web-apps/text-chat/defaults.yml
similarity index 100%
rename from web-apps/basic-chat/defaults.yml
rename to web-apps/text-chat/defaults.yml
diff --git a/web-apps/basic-chat/gradio-client-test.py b/web-apps/text-chat/gradio-client-test.py
similarity index 100%
rename from web-apps/basic-chat/gradio-client-test.py
rename to web-apps/text-chat/gradio-client-test.py
diff --git a/web-apps/basic-chat/requirements.txt b/web-apps/text-chat/requirements.txt
similarity index 100%
rename from web-apps/basic-chat/requirements.txt
rename to web-apps/text-chat/requirements.txt

From 87075618dc9e692e7feafbdeeb20546f1b141176 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 18:01:06 +0000
Subject: [PATCH 05/34] Revert "Disable change check for testing"

This reverts commit 1596c08b82faca87af58ecfcfe9826f72021ff7d.
---
 .github/workflows/build-push-artifacts.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index d0cff1d..c1e2e74 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -35,7 +35,7 @@ jobs:
     name: Build and push images
     runs-on: ubuntu-latest
     needs: changes
-    # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
+    if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
     strategy:
       matrix:
         include:

From 96ac9090e864a950889a749fb0e2f2b615c750ab Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 18:06:20 +0000
Subject: [PATCH 06/34] Update image build workflow paths

---
 .github/workflows/build-push-artifacts.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index c1e2e74..67c17ef 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -35,11 +35,11 @@ jobs:
     name: Build and push images
     runs-on: ubuntu-latest
     needs: changes
-    if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
+    # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
     strategy:
       matrix:
         include:
-          - component: basic-chat
+          - component: text-chat
           - component: image-analysis
     steps:
       - name: Check out the repository
@@ -56,7 +56,7 @@ jobs:
         id: image-meta
         uses: docker/metadata-action@v5
         with:
-          images: ghcr.io/stackhpc/azimuth-llm-${{ matrix.component }}
+          images: ghcr.io/stackhpc/azimuth-llm-ui-${{ matrix.component }}
           # Produce the branch name or tag and the SHA as tags
           tags: |
             type=ref,event=branch
@@ -67,7 +67,8 @@ jobs:
         uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action
         with:
           cache-key: ${{ matrix.component }}
-          context: ./web-apps/${{ matrix.component }}
+          context: ./web-apps/
+          file: ./web-apps/${{ matrix.component }}/Dockerfile
           platforms: linux/amd64,linux/arm64
           push: true
           tags: ${{ steps.image-meta.outputs.tags }}

From afc646928d37703453a22b76e977c87291852ae0 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 18:19:01 +0000
Subject: [PATCH 07/34] Update appSettings and related comments

---
 chart/values.yaml | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/chart/values.yaml b/chart/values.yaml
index 12db8b3..936cfbf 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -80,13 +80,21 @@ api:
 ui:
   # Toggles installation of the gradio web UI
   enabled: true
-  # The file from the UI config map to execute as the entrypoint to the frontend app
-  entrypoint: app.py
-  # The values to be written to settings.yml for parsing as frontend app setting
-  # (see example_app.py and config.py for example using pydantic-settings to configure app)
+  # Container image config
+  image:
+    repository: ghcr.io/stackhpc/azimuth-llm-chat-interface
+    version: 87a0342
+    imagePullPolicy:
+  # The settings to be passed to the frontend web app.
+  # Format depends on the chosen UI image above. For each of the UIs
+  # included in the web-apps/ folder of this git repository there is a
+  # defaults.yml file (e.g. web-apps/text-chat/defaults.yml) listing all
+  # available configuration options.
+  # FIXME: Figure out how to make JSON schema fit
+  # with different config options for each web app.
   appSettings:
-    hf_model_name: *model-name
-    hf_model_instruction: "You are a helpful AI assistant. Please response appropriately."
+    model_name: *model-name
+    model_instruction: "You are a helpful AI assistant. Please response appropriately."
     # Use local system fonts by default to avoid GDPR issues
     # with Gradio's defaults fonts which require fetching from
     # the Google fonts API. To restore default Gradio theme
@@ -98,11 +106,6 @@ ui:
       font_mono:
         - sans-serif
         - Arial
-  # Container image config
-  image:
-    repository: ghcr.io/stackhpc/azimuth-llm-chat-interface
-    version: 87a0342
-    imagePullPolicy:
   # Service config
   service:
     name: web-app

From 58c4dcb2a3926745c483756b31501ef5c6c4786a Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 21:08:02 +0000
Subject: [PATCH 08/34] Rename published docker images

---
 .github/workflows/build-push-artifacts.yml         | 6 +++---
 web-apps/{text-chat => chat}/Dockerfile            | 2 +-
 web-apps/{text-chat => chat}/app.py                | 0
 web-apps/{text-chat => chat}/defaults.yml          | 0
 web-apps/{text-chat => chat}/gradio-client-test.py | 0
 web-apps/{text-chat => chat}/requirements.txt      | 2 +-
 web-apps/image-analysis/requirements.txt           | 2 +-
 7 files changed, 6 insertions(+), 6 deletions(-)
 rename web-apps/{text-chat => chat}/Dockerfile (94%)
 rename web-apps/{text-chat => chat}/app.py (100%)
 rename web-apps/{text-chat => chat}/defaults.yml (100%)
 rename web-apps/{text-chat => chat}/gradio-client-test.py (100%)
 rename web-apps/{text-chat => chat}/requirements.txt (84%)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index 67c17ef..ad8a592 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -35,11 +35,11 @@ jobs:
     name: Build and push images
     runs-on: ubuntu-latest
     needs: changes
-    # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
+    if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
     strategy:
       matrix:
         include:
-          - component: text-chat
+          - component: chat
           - component: image-analysis
     steps:
       - name: Check out the repository
@@ -56,7 +56,7 @@ jobs:
         id: image-meta
         uses: docker/metadata-action@v5
         with:
-          images: ghcr.io/stackhpc/azimuth-llm-ui-${{ matrix.component }}
+          images: ghcr.io/stackhpc/azimuth-llm-${{ matrix.component }}-ui
           # Produce the branch name or tag and the SHA as tags
           tags: |
             type=ref,event=branch
diff --git a/web-apps/text-chat/Dockerfile b/web-apps/chat/Dockerfile
similarity index 94%
rename from web-apps/text-chat/Dockerfile
rename to web-apps/chat/Dockerfile
index 294fecd..c963b29 100644
--- a/web-apps/text-chat/Dockerfile
+++ b/web-apps/chat/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11-slim
 
-ARG DIR=text-chat
+ARG DIR=chat
 
 COPY $DIR/requirements.txt requirements.txt
 COPY utils utils
diff --git a/web-apps/text-chat/app.py b/web-apps/chat/app.py
similarity index 100%
rename from web-apps/text-chat/app.py
rename to web-apps/chat/app.py
diff --git a/web-apps/text-chat/defaults.yml b/web-apps/chat/defaults.yml
similarity index 100%
rename from web-apps/text-chat/defaults.yml
rename to web-apps/chat/defaults.yml
diff --git a/web-apps/text-chat/gradio-client-test.py b/web-apps/chat/gradio-client-test.py
similarity index 100%
rename from web-apps/text-chat/gradio-client-test.py
rename to web-apps/chat/gradio-client-test.py
diff --git a/web-apps/text-chat/requirements.txt b/web-apps/chat/requirements.txt
similarity index 84%
rename from web-apps/text-chat/requirements.txt
rename to web-apps/chat/requirements.txt
index f37169c..a82255b 100644
--- a/web-apps/text-chat/requirements.txt
+++ b/web-apps/chat/requirements.txt
@@ -4,4 +4,4 @@ openai
 langchain
 langchain_openai
 pydantic
--e ../utils
+../utils
diff --git a/web-apps/image-analysis/requirements.txt b/web-apps/image-analysis/requirements.txt
index 9196eda..006c6a9 100644
--- a/web-apps/image-analysis/requirements.txt
+++ b/web-apps/image-analysis/requirements.txt
@@ -2,5 +2,5 @@ pillow
 requests
 gradio<5
 gradio_client
--e ../utils
 pydantic
+../utils

From a8c5a77274638d0eba0d7253445a489c6d73fda9 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 21:39:59 +0000
Subject: [PATCH 09/34] Add chart test for image-analysis UI

---
 chart/ci/image-analysis-ui-values.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 chart/ci/image-analysis-ui-values.yaml

diff --git a/chart/ci/image-analysis-ui-values.yaml b/chart/ci/image-analysis-ui-values.yaml
new file mode 100644
index 0000000..43e4eed
--- /dev/null
+++ b/chart/ci/image-analysis-ui-values.yaml
@@ -0,0 +1,8 @@
+api:
+  enabled: false
+ui:
+  image:
+    repository: ghcr.io/stackhpc/azimuth-llm-image-analysis-ui
+  service:
+    zenith:
+      enabled: false

From b462043f0a4267bb03f43d0ee5fd07bf9ae3d5b8 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Wed, 30 Oct 2024 21:40:18 +0000
Subject: [PATCH 10/34] Fixup failing chart tests

---
 .../ci/{web-apps-only-values.yaml => chat-ui-values.yaml}  | 0
 chart/values.schema.json                                   | 7 +++----
 chart/values.yaml                                          | 5 ++---
 3 files changed, 5 insertions(+), 7 deletions(-)
 rename chart/ci/{web-apps-only-values.yaml => chat-ui-values.yaml} (100%)

diff --git a/chart/ci/web-apps-only-values.yaml b/chart/ci/chat-ui-values.yaml
similarity index 100%
rename from chart/ci/web-apps-only-values.yaml
rename to chart/ci/chat-ui-values.yaml
diff --git a/chart/values.schema.json b/chart/values.schema.json
index 8d20cf7..f6fb6a9 100644
--- a/chart/values.schema.json
+++ b/chart/values.schema.json
@@ -30,12 +30,12 @@
                 "appSettings": {
                     "type": "object",
                     "properties": {
-                        "hf_model_name": {
+                        "model_name": {
                             "type": "string",
                             "title": "Model Name",
                             "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above."
                         },
-                        "hf_model_instruction": {
+                        "model_instruction": {
                             "type": "string",
                             "title": "Instruction",
                             "description": "The initial system prompt (i.e. the hidden instruction) to use when generating responses.",
@@ -94,8 +94,7 @@
                         }
                     },
                     "required": [
-                        "hf_model_name",
-                        "hf_model_instruction"
+                        "model_name"
                     ]
                 }
             }
diff --git a/chart/values.yaml b/chart/values.yaml
index 936cfbf..ed66c5b 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -82,8 +82,8 @@ ui:
   enabled: true
   # Container image config
   image:
-    repository: ghcr.io/stackhpc/azimuth-llm-chat-interface
-    version: 87a0342
+    repository: ghcr.io/stackhpc/azimuth-llm-chat-ui
+    version: 58c4dcb
     imagePullPolicy:
   # The settings to be passed to the frontend web app.
   # Format depends on the chosen UI image above. For each of the UIs
@@ -94,7 +94,6 @@ ui:
   # with different config options for each web app.
   appSettings:
     model_name: *model-name
-    model_instruction: "You are a helpful AI assistant. Please response appropriately."
     # Use local system fonts by default to avoid GDPR issues
     # with Gradio's defaults fonts which require fetching from
     # the Google fonts API. To restore default Gradio theme

From 9cd569439d536fe643ddada64c7e03c71344bca0 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 00:15:07 +0000
Subject: [PATCH 11/34] Update docs

---
 README.md | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 58b0812..98068a8 100644
--- a/README.md
+++ b/README.md
@@ -34,38 +34,36 @@ ui:
       enabled: false
 ```
 
-***Warning*** - Exposing the services in this way provides no authentication mechanism and anyone with access to the load balancer IPs will be able to query the language model. It is up to you to secure the running service in your own way. In contrast, when deploying via Azimuth, authentication is provided via the standard Azimuth Identity Provider mechanisms and the authenticated services are exposed via [Zenith](https://github.com/stackhpc/zenith).
+[!WARNING] Exposing the services in this way provides no authentication mechanism and anyone with access to the load balancer IPs will be able to query the language model. It is up to you to secure the running service as appropriate for your use case. In contrast, when deployed via Azimuth, authentication is provided via the standard Azimuth Identity Provider mechanisms and the authenticated services are exposed via [Zenith](https://github.com/stackhpc/zenith).
 
-The UI can also optionally be exposed using a Kubernetes Ingress resource. See the `ui.ingress` section in `values.yml` for available config options.
+The both the web-based interface and the backend OpenAI-compatible vLLM API server can also optionally be exposed using [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/). See the `ingress` section in `values.yml` for available config options.
 
 ## Tested Models
 
-The following is a non-exhaustive list of models which have been tested with this app:
-- [Llama 2 7B chat](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
-- [AWQ Quantized Llama 2 70B](https://huggingface.co/TheBloke/Llama-2-70B-Chat-AWQ)
-- [Magicoder 6.7B](https://huggingface.co/ise-uiuc/Magicoder-S-DS-6.7B)
-- [Mistral 7B Instruct v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
-- [WizardCoder Python 34B](https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0)
-- [AWQ Quantized Mixtral 8x7B Instruct v0.1](https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ)
+The application uses [vLLM](https://docs.vllm.ai/en/latest/index.html) for model serving, therefore any of the vLLM [supported models](https://docs.vllm.ai/en/latest/models/supported_models.html) should work. Since vLLM pulls the model files directly from [HuggingFace](https://huggingface.co/models) it is likely that some other models will also be compatible with vLLM but mileage may vary between models and model architectures. If a model is incompatible with vLLM then the API pod will likely enter a `CrashLoopBackoff` state and any relevant error information will be found in the API pod logs. These logs can be viewed with
 
-Due to the combination of [components](##Components) used in this app, some HuggingFace models may not work as expected (usually due to the way in which LangChain formats the prompt messages). Any errors when using a new model will appear in the logs for either the web-app pod or the backend API pod. Please open an issue if you would like explicit support for a specific model that is not in the above list.
+```
+kubectl (-n <helm-release-namespace>) logs deploy/<helm-release-name>-api
+```
+
+If you suspect that a given error is not caused by the upstream vLLM support and a problem with this Helm chart then please [open an issue](https://github.com/stackhpc/azimuth-llm/issues).
 
 ## Monitoring
 
-The LLM chart integrates with [kube-prometheus-stack](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack) by creating a `ServiceMonitor` resource and installing a custom Grafana dashboard as a Kubernetes `ConfigMap`. If the target cluster has an existing `kube-prometheus-stack` deployment which is appropriately configured to watch all namespaces for new Grafana dashboards, the custom LLM dashboard provided here will automatically picked up by Grafana. It will appear in the Grafana dashboard list with the name 'LLM dashboard'.
+The LLM chart integrates with [kube-prometheus-stack](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack) by creating a `ServiceMonitor` resource and installing two custom Grafana dashboard as Kubernetes `ConfigMap`s. If the target cluster has an existing `kube-prometheus-stack` deployment which is appropriately configured to watch all namespaces for new Grafana dashboards, the LLM dashboards will automatically appear in Grafana's dashboard list.
 
 To disable the monitoring integrations, set the `api.monitoring.enabled` value to `false`.
 
 ## Components
 
 The Helm chart consists of the following components:
-- A backend web API which runs [vLLM](https://github.com/vllm-project/vllm)'s [OpenAI compatible web server](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server).
+- A backend web API which runs [vLLM](https://github.com/vllm-project/vllm)'s [OpenAI compatible web server](https://docs.vllm.ai/en/stable/getting_started/quickstart.html#openai-compatible-server).
 
-- A frontend web-app built using [Gradio](https://www.gradio.app) and [LangChain](https://www.langchain.com). The web app source code can be found in `chart/web-app` and gets written to a ConfigMap during the chart build and is then mounted into the UI pod and executed as the entry point for the UI docker image (built from `images/ui-base/Dockerfile`).
+- A choice of frontend web-apps built using [Gradio](https://www.gradio.app) (see [web-apps](./web-apps/)). Each web interface is available as a pre-built container image [hosted on ghcr.io](https://github.com/orgs/stackhpc/packages?repo_name=azimuth-llm) and be configured for each Helm release by changing the `ui.image` section of the chart values.
 
-- A [stakater/Reloader](https://github.com/stakater/Reloader) instance which monitors the web-app ConfigMap for changes and restarts the frontend when the app code changes (i.e. whenever the Helm values are updated).
+<!-- ## Development
 
-## Development
+TODO: Update this
 
 The GitHub repository includes a [tilt](https://tilt.dev) file for easier development. After installing tilt locally, simply run `tilt up` from the repo root to get started with development. This will trigger the following:
 
@@ -77,4 +75,8 @@ The GitHub repository includes a [tilt](https://tilt.dev) file for easier develo
 
 - Launch the frontend web app locally on `127.0.0.1:7860`, configured to use `localhost:8080` as the backend API
 
-- Watch all components and only reload the minimal set of components needed when a file in the repo changes (e.g. modifying `chart/web-app/app.py` will restart the local web app instance only)
\ No newline at end of file
+- Watch all components and only reload the minimal set of components needed when a file in the repo changes (e.g. modifying `chart/web-app/app.py` will restart the local web app instance only) -->
+
+<!-- ## Adding a new web interface
+
+TODO: Write these docs... -->

From c85bffdda3acbc7cb8f973e2a32f250914e09557 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 12:35:09 +0000
Subject: [PATCH 12/34] Refactor Helm charts to allow a different chart schema
 per web app

---
 .github/workflows/build-push-artifacts.yml    |   3 +-
 .gitignore                                    |   4 +-
 chart/ci/image-analysis-ui-values.yaml        |   8 --
 chart/values.schema.json                      | 123 -----------------
 charts/azimuth-chat/Chart.yaml                |  22 +++
 charts/azimuth-chat/azimuth-ui.schema.yaml    |  34 +++++
 charts/azimuth-chat/ci/chat-ui-values.yaml    |   7 +
 charts/azimuth-chat/values.schema.json        | 128 ++++++++++++++++++
 charts/azimuth-chat/values.yaml               |   9 ++
 charts/azimuth-image-analysis/Chart.yaml      |  22 +++
 .../azimuth-ui.schema.yaml                    |  26 ++++
 .../ci/image-analysis-ui-values.yaml          |   7 +
 .../azimuth-image-analysis/values.schema.json | 111 +++++++++++++++
 charts/azimuth-image-analysis/values.yaml     |   8 ++
 {chart => charts/azimuth-llm}/.helmignore     |   0
 {chart => charts/azimuth-llm}/Chart.yaml      |  12 +-
 .../azimuth-llm}/azimuth-ui.schema.yaml       |   1 -
 .../azimuth-llm/ci/no-api-values.yaml         |   0
 .../azimuth-llm}/templates/NOTES.txt          |   0
 .../azimuth-llm}/templates/_helpers.tpl       |   0
 .../config-map-grafana-dashboard-details.yml  |   0
 .../config-map-grafana-dashboard-summary.yml  |  20 +--
 .../azimuth-llm}/templates/api/deployment.yml |   0
 .../azimuth-llm}/templates/api/ingress.yml    |   0
 .../templates/api/service-monitor.yml         |   0
 .../azimuth-llm}/templates/api/service.yml    |   0
 .../templates/api/zenith-client.yml           |   0
 .../templates/api/zenith-reservation.yml      |   0
 .../templates/test/end-to-end.yml             |   0
 .../azimuth-llm}/templates/test/web-app.yml   |   0
 .../templates/ui/app-config-map.yml           |   0
 .../azimuth-llm}/templates/ui/deployment.yml  |   4 +
 .../azimuth-llm}/templates/ui/ingress.yml     |   0
 .../azimuth-llm}/templates/ui/service.yml     |   0
 .../templates/ui/ui-zenith-client.yml         |   0
 .../templates/ui/ui-zenith-reservation.yml    |   0
 {chart => charts/azimuth-llm}/values.yaml     |   7 +-
 ct.yaml                                       |   8 +-
 38 files changed, 410 insertions(+), 154 deletions(-)
 delete mode 100644 chart/ci/image-analysis-ui-values.yaml
 delete mode 100644 chart/values.schema.json
 create mode 100644 charts/azimuth-chat/Chart.yaml
 create mode 100644 charts/azimuth-chat/azimuth-ui.schema.yaml
 create mode 100644 charts/azimuth-chat/ci/chat-ui-values.yaml
 create mode 100644 charts/azimuth-chat/values.schema.json
 create mode 100644 charts/azimuth-chat/values.yaml
 create mode 100644 charts/azimuth-image-analysis/Chart.yaml
 create mode 100644 charts/azimuth-image-analysis/azimuth-ui.schema.yaml
 create mode 100644 charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml
 create mode 100644 charts/azimuth-image-analysis/values.schema.json
 create mode 100644 charts/azimuth-image-analysis/values.yaml
 rename {chart => charts/azimuth-llm}/.helmignore (100%)
 rename {chart => charts/azimuth-llm}/Chart.yaml (89%)
 rename {chart => charts/azimuth-llm}/azimuth-ui.schema.yaml (98%)
 rename chart/ci/chat-ui-values.yaml => charts/azimuth-llm/ci/no-api-values.yaml (100%)
 rename {chart => charts/azimuth-llm}/templates/NOTES.txt (100%)
 rename {chart => charts/azimuth-llm}/templates/_helpers.tpl (100%)
 rename {chart => charts/azimuth-llm}/templates/api/config-map-grafana-dashboard-details.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/api/config-map-grafana-dashboard-summary.yml (98%)
 rename {chart => charts/azimuth-llm}/templates/api/deployment.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/api/ingress.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/api/service-monitor.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/api/service.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/api/zenith-client.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/api/zenith-reservation.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/test/end-to-end.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/test/web-app.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/ui/app-config-map.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/ui/deployment.yml (83%)
 rename {chart => charts/azimuth-llm}/templates/ui/ingress.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/ui/service.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/ui/ui-zenith-client.yml (100%)
 rename {chart => charts/azimuth-llm}/templates/ui/ui-zenith-reservation.yml (100%)
 rename {chart => charts/azimuth-llm}/values.yaml (97%)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index ad8a592..fa2cca4 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -28,7 +28,7 @@ jobs:
             images:
               - 'web-apps/**'
             chart:
-              - 'chart/**'
+              - 'charts/**'
 
   # Job to build container images
   build_push_images:
@@ -96,6 +96,7 @@ jobs:
       - name: Publish Helm charts
         uses: azimuth-cloud/github-actions/helm-publish@master
         with:
+          directory: charts
           token: ${{ secrets.GITHUB_TOKEN }}
           version: ${{ steps.semver.outputs.version }}
           app-version: ${{ steps.semver.outputs.short-sha }}
diff --git a/.gitignore b/.gitignore
index 9a57ecc..b6862d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,8 +11,8 @@ test-values.y[a]ml
 **venv*/
 
 # Helm chart stuff
-chart/Chart.lock
-chart/charts
+charts/*/Chart.lock
+charts/*/charts
 
 # Python stuff
 **/build/
diff --git a/chart/ci/image-analysis-ui-values.yaml b/chart/ci/image-analysis-ui-values.yaml
deleted file mode 100644
index 43e4eed..0000000
--- a/chart/ci/image-analysis-ui-values.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-api:
-  enabled: false
-ui:
-  image:
-    repository: ghcr.io/stackhpc/azimuth-llm-image-analysis-ui
-  service:
-    zenith:
-      enabled: false
diff --git a/chart/values.schema.json b/chart/values.schema.json
deleted file mode 100644
index f6fb6a9..0000000
--- a/chart/values.schema.json
+++ /dev/null
@@ -1,123 +0,0 @@
-{
-    "$schema": "http://json-schema.org/schema#",
-    "type": "object",
-    "properties": {
-        "huggingface": {
-            "type": "object",
-            "properties": {
-                "model": {
-                    "type": "string",
-                    "title": "Model",
-                    "description": "The [HuggingFace model](https://huggingface.co/models) to deploy (see [here](https://github.com/stackhpc/azimuth-llm?tab=readme-ov-file#tested-models) for a list of tested models).",
-                    "default": "microsoft/Phi-3.5-mini-instruct"
-                },
-                "token": {
-                    "type": [
-                        "string",
-                        "null"
-                    ],
-                    "title": "Access Token",
-                    "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)."
-                }
-            },
-            "required": [
-                "model"
-            ]
-        },
-        "ui": {
-            "type": "object",
-            "properties": {
-                "appSettings": {
-                    "type": "object",
-                    "properties": {
-                        "model_name": {
-                            "type": "string",
-                            "title": "Model Name",
-                            "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above."
-                        },
-                        "model_instruction": {
-                            "type": "string",
-                            "title": "Instruction",
-                            "description": "The initial system prompt (i.e. the hidden instruction) to use when generating responses.",
-                            "default": "You are a helpful AI assistant. Please respond appropriately."
-                        },
-                        "page_title": {
-                            "type": "string",
-                            "title": "Page Title",
-                            "description": "The title to display at the top of the chat interface.",
-                            "default": "Large Language Model"
-                        },
-                        "llm_max_tokens": {
-                            "type": "integer",
-                            "title": "Max Tokens",
-                            "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.",
-                            "default": 1000
-                        },
-                        "llm_temperature": {
-                            "type": "number",
-                            "title": "LLM Temperature",
-                            "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.",
-                            "default": 0,
-                            "minimum": 0,
-                            "maximum": 2
-                        },
-                        "llm_top_p": {
-                            "type": "number",
-                            "title": "LLM Top P",
-                            "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.",
-                            "default": 1,
-                            "exclusiveMinimum": 0,
-                            "maximum": 1
-                        },
-                        "llm_top_k": {
-                            "type": "integer",
-                            "title": "LLM Top K",
-                            "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
-                            "default": -1,
-                            "minimum": -1
-                        },
-                        "llm_presence_penalty": {
-                            "type": "number",
-                            "title": "LLM Presence Penalty",
-                            "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.",
-                            "default": 0,
-                            "minimum": -2,
-                            "maximum": 2
-                        },
-                        "llm_frequency_penalty": {
-                            "type": "number",
-                            "title": "LLM Frequency Penalty",
-                            "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.",
-                            "default": 0,
-                            "minimum": -2,
-                            "maximum": 2
-                        }
-                    },
-                    "required": [
-                        "model_name"
-                    ]
-                }
-            }
-        },
-        "api": {
-            "type": "object",
-            "properties": {
-                "modelMaxContextLength": {
-                    "title": "Model Context Length",
-                    "description": "An override for the maximum context length to allow, if the model's default is not suitable."
-                },
-                "image": {
-                    "type": "object",
-                    "properties": {
-                        "version": {
-                            "type": "string",
-                            "title": "Backend vLLM version",
-                            "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
-                            "default": "v0.6.3"
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/charts/azimuth-chat/Chart.yaml b/charts/azimuth-chat/Chart.yaml
new file mode 100644
index 0000000..b3e9fa0
--- /dev/null
+++ b/charts/azimuth-chat/Chart.yaml
@@ -0,0 +1,22 @@
+apiVersion: v2
+name: azimuth-llm-chat
+description: HuggingFace vision model serving along with a simple web interface.
+maintainers:
+  - name: "Scott Davidson"
+    email: scott@stackhpc.com
+
+type: application
+
+version: 0.1.0
+
+appVersion: "0.1.0"
+
+icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg
+
+annotations:
+  azimuth.stackhpc.com/label: HuggingFace Image Analysis
+
+dependencies:
+  - name: azimuth-llm
+    version: 0.1.0
+    repository: "file://../azimuth-llm/"
diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml
new file mode 100644
index 0000000..061a8ce
--- /dev/null
+++ b/charts/azimuth-chat/azimuth-ui.schema.yaml
@@ -0,0 +1,34 @@
+controls:
+  /azimuth-llm/huggingface/model:
+    type: TextControl
+    required: true
+  /azimuth-llm/huggingface/token:
+    type: TextControl
+    secret: true
+  # Use mirror to mimic yaml anchor in base Helm chart
+  /azimuth-llm/ui/appSettings/hf_model_name:
+    type: MirrorControl
+    path: /azimuth-llm/huggingface/model
+    visuallyHidden: true
+  # Azimuth UI doesn't handle json type ["integer","null"]
+  # properly so we allow any type in JSON schema then
+  # constrain to (optional) integer here.
+  /azimuth-llm/api/modelMaxContextLength:
+    type: IntegerControl
+    minimum: 100
+    step: 100
+    required: false
+
+sortOrder:
+  - /azimuth-llm/huggingface/model
+  - /azimuth-llm/huggingface/token
+  - /azimuth-llm/ui/appSettings/hf_model_instruction
+  - /azimuth-llm/ui/appSettings/page_title
+  - /azimuth-llm/api/image/version
+  - /azimuth-llm/ui/appSettings/llm_temperature
+  - /azimuth-llm/ui/appSettings/llm_max_tokens
+  - /azimuth-llm/ui/appSettings/llm_frequency_penalty
+  - /azimuth-llm/ui/appSettings/llm_presence_penalty
+  - /azimuth-llm/ui/appSettings/llm_top_p
+  - /azimuth-llm/ui/appSettings/llm_top_k
+  - /azimuth-llm/api/modelMaxContextLength
diff --git a/charts/azimuth-chat/ci/chat-ui-values.yaml b/charts/azimuth-chat/ci/chat-ui-values.yaml
new file mode 100644
index 0000000..bf30ede
--- /dev/null
+++ b/charts/azimuth-chat/ci/chat-ui-values.yaml
@@ -0,0 +1,7 @@
+azimuth-llm:
+  api:
+    enabled: false
+  ui:
+    service:
+      zenith:
+        enabled: false
diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json
new file mode 100644
index 0000000..96e5882
--- /dev/null
+++ b/charts/azimuth-chat/values.schema.json
@@ -0,0 +1,128 @@
+{
+    "type": "object",
+    "properties": {
+        "azimuth-llm": {
+            "type": "object",
+            "properties": {
+                "huggingface": {
+                    "type": "object",
+                    "properties": {
+                        "model": {
+                            "type": "string",
+                            "title": "Model",
+                            "description": "The [HuggingFace model](https://huggingface.co/models) to deploy (see [here](https://github.com/stackhpc/azimuth-llm?tab=readme-ov-file#tested-models) for a list of tested models).",
+                            "default": "microsoft/Phi-3.5-mini-instruct"
+                        },
+                        "token": {
+                            "type": [
+                                "string",
+                                "null"
+                            ],
+                            "title": "Access Token",
+                            "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)."
+                        }
+                    },
+                    "required": [
+                        "model"
+                    ]
+                },
+                "api": {
+                    "type": "object",
+                    "properties": {
+                        "modelMaxContextLength": {
+                            "title": "Model Context Length",
+                            "description": "An override for the maximum context length to allow, if the model's default is not suitable."
+                        },
+                        "image": {
+                            "type": "object",
+                            "properties": {
+                                "version": {
+                                    "type": "string",
+                                    "title": "Backend vLLM version",
+                                    "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
+                                    "default": "v0.6.3"
+                                }
+                            }
+                        }
+                    }
+                },
+                "ui": {
+                    "type": "object",
+                    "properties": {
+                        "appSettings": {
+                            "type": "object",
+                            "properties": {
+                                "model_name": {
+                                    "type": "string",
+                                    "title": "Model Name",
+                                    "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above."
+                                },
+                                "model_instruction": {
+                                    "type": "string",
+                                    "title": "Instruction",
+                                    "description": "The initial system prompt (i.e. the hidden instruction) to use when generating responses.",
+                                    "default": "You are a helpful AI assistant. Please respond appropriately."
+                                },
+                                "page_title": {
+                                    "type": "string",
+                                    "title": "Page Title",
+                                    "description": "The title to display at the top of the chat interface.",
+                                    "default": "Large Language Model"
+                                },
+                                "llm_max_tokens": {
+                                    "type": "integer",
+                                    "title": "Max Tokens",
+                                    "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.",
+                                    "default": 1000
+                                },
+                                "llm_temperature": {
+                                    "type": "number",
+                                    "title": "LLM Temperature",
+                                    "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.",
+                                    "default": 0,
+                                    "minimum": 0,
+                                    "maximum": 2
+                                },
+                                "llm_top_p": {
+                                    "type": "number",
+                                    "title": "LLM Top P",
+                                    "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.",
+                                    "default": 1,
+                                    "exclusiveMinimum": 0,
+                                    "maximum": 1
+                                },
+                                "llm_top_k": {
+                                    "type": "integer",
+                                    "title": "LLM Top K",
+                                    "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
+                                    "default": -1,
+                                    "minimum": -1
+                                },
+                                "llm_presence_penalty": {
+                                    "type": "number",
+                                    "title": "LLM Presence Penalty",
+                                    "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.",
+                                    "default": 0,
+                                    "minimum": -2,
+                                    "maximum": 2
+                                },
+                                "llm_frequency_penalty": {
+                                    "type": "number",
+                                    "title": "LLM Frequency Penalty",
+                                    "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.",
+                                    "default": 0,
+                                    "minimum": -2,
+                                    "maximum": 2
+                                }
+                            },
+                            "required": [
+                                "model_name",
+                                "model_instruction"
+                            ]
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/charts/azimuth-chat/values.yaml b/charts/azimuth-chat/values.yaml
new file mode 100644
index 0000000..9a17317
--- /dev/null
+++ b/charts/azimuth-chat/values.yaml
@@ -0,0 +1,9 @@
+azimuth-llm:
+  huggingface:
+    model: &model-name microsoft/Phi-3.5-mini-instruct
+  ui:
+    image:
+      repository: ghcr.io/stackhpc/azimuth-llm-chat-ui
+    appSettings:
+      model_name: *model-name
+      model_instruction: "You are a helpful AI assistant; please respond appropriately."
diff --git a/charts/azimuth-image-analysis/Chart.yaml b/charts/azimuth-image-analysis/Chart.yaml
new file mode 100644
index 0000000..c2681ca
--- /dev/null
+++ b/charts/azimuth-image-analysis/Chart.yaml
@@ -0,0 +1,22 @@
+apiVersion: v2
+name: azimuth-llm-image-analysis
+description: HuggingFace vision model serving along with a simple web interface.
+maintainers:
+  - name: "Scott Davidson"
+    email: scott@stackhpc.com
+
+type: application
+
+version: 0.1.0
+
+appVersion: "0.1.0"
+
+icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg
+
+annotations:
+  azimuth.stackhpc.com/label: HuggingFace Image Analysis
+
+dependencies:
+  - name: azimuth-llm
+    version: 0.1.0
+    repository: "file://../azimuth-llm/"
diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
new file mode 100644
index 0000000..5c4799c
--- /dev/null
+++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
@@ -0,0 +1,26 @@
+controls:
+  /azimuth-llm/huggingface/model:
+    type: TextControl
+    required: true
+  /azimuth-llm/huggingface/token:
+    type: TextControl
+    secret: true
+  # Use mirror to mimic yaml anchor in base Helm chart
+  /azimuth-llm/ui/appSettings/hf_model_name:
+    type: MirrorControl
+    path: /azimuth-llm/huggingface/model
+    visuallyHidden: true
+
+sortOrder:
+  - /huggingface/model
+  - /huggingface/token
+  - /ui/appSettings/hf_model_instruction
+  - /ui/appSettings/page_title
+  - /api/image/version
+  - /ui/appSettings/llm_temperature
+  - /ui/appSettings/llm_max_tokens
+  - /ui/appSettings/llm_frequency_penalty
+  - /ui/appSettings/llm_presence_penalty
+  - /ui/appSettings/llm_top_p
+  - /ui/appSettings/llm_top_k
+  - /api/modelMaxContextLength
diff --git a/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml b/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml
new file mode 100644
index 0000000..bf30ede
--- /dev/null
+++ b/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml
@@ -0,0 +1,7 @@
+azimuth-llm:
+  api:
+    enabled: false
+  ui:
+    service:
+      zenith:
+        enabled: false
diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json
new file mode 100644
index 0000000..2ddda05
--- /dev/null
+++ b/charts/azimuth-image-analysis/values.schema.json
@@ -0,0 +1,111 @@
+{
+    "type": "object",
+    "properties": {
+        "azimuth-llm": {
+            "type": "object",
+            "properties": {
+                "huggingface": {
+                    "type": "object",
+                    "properties": {
+                        "model": {
+                            "type": "string",
+                            "title": "Model",
+                            "description": "The [HuggingFace model](https://huggingface.co/models) to deploy (see [here](https://github.com/stackhpc/azimuth-llm?tab=readme-ov-file#tested-models) for a list of tested models).",
+                            "default": "microsoft/Phi-3.5-vision-instruct"
+                        },
+                        "token": {
+                            "type": [
+                                "string",
+                                "null"
+                            ],
+                            "title": "Access Token",
+                            "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)."
+                        }
+                    },
+                    "required": [
+                        "model"
+                    ]
+                },
+                "api": {
+                    "type": "object",
+                    "properties": {
+                        "image": {
+                            "type": "object",
+                            "properties": {
+                                "version": {
+                                    "type": "string",
+                                    "title": "Backend vLLM version",
+                                    "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
+                                    "default": "v0.6.3"
+                                }
+                            }
+                        }
+                    }
+                },
+                "ui": {
+                    "type": "object",
+                    "properties": {
+                        "appSettings": {
+                            "type": "object",
+                            "properties": {
+                                "model_name": {
+                                    "type": "string",
+                                    "title": "Model Name",
+                                    "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above."
+                                },
+                                "llm_max_tokens": {
+                                    "type": "integer",
+                                    "title": "Max Tokens",
+                                    "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.",
+                                    "default": 1000
+                                },
+                                "llm_temperature": {
+                                    "type": "number",
+                                    "title": "LLM Temperature",
+                                    "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.",
+                                    "default": 0,
+                                    "minimum": 0,
+                                    "maximum": 2
+                                },
+                                "llm_top_p": {
+                                    "type": "number",
+                                    "title": "LLM Top P",
+                                    "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.",
+                                    "default": 1,
+                                    "exclusiveMinimum": 0,
+                                    "maximum": 1
+                                },
+                                "llm_top_k": {
+                                    "type": "integer",
+                                    "title": "LLM Top K",
+                                    "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
+                                    "default": -1,
+                                    "minimum": -1
+                                },
+                                "llm_presence_penalty": {
+                                    "type": "number",
+                                    "title": "LLM Presence Penalty",
+                                    "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.",
+                                    "default": 0,
+                                    "minimum": -2,
+                                    "maximum": 2
+                                },
+                                "llm_frequency_penalty": {
+                                    "type": "number",
+                                    "title": "LLM Frequency Penalty",
+                                    "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.",
+                                    "default": 0,
+                                    "minimum": -2,
+                                    "maximum": 2
+                                }
+                            },
+                            "required": [
+                                "model_name"
+                            ]
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/charts/azimuth-image-analysis/values.yaml b/charts/azimuth-image-analysis/values.yaml
new file mode 100644
index 0000000..c5a770c
--- /dev/null
+++ b/charts/azimuth-image-analysis/values.yaml
@@ -0,0 +1,8 @@
+azimuth-llm:
+  huggingface:
+    model: &model-name microsoft/Phi-3.5-vision-instruct
+  ui:
+    image:
+      repository: ghcr.io/stackhpc/azimuth-llm-image-analysis-ui
+    appSettings:
+      model_name: *model-name
diff --git a/chart/.helmignore b/charts/azimuth-llm/.helmignore
similarity index 100%
rename from chart/.helmignore
rename to charts/azimuth-llm/.helmignore
diff --git a/chart/Chart.yaml b/charts/azimuth-llm/Chart.yaml
similarity index 89%
rename from chart/Chart.yaml
rename to charts/azimuth-llm/Chart.yaml
index a6542df..637db49 100644
--- a/chart/Chart.yaml
+++ b/charts/azimuth-llm/Chart.yaml
@@ -18,7 +18,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.2.0
+version: 0.1.0
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
@@ -31,8 +31,8 @@ icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-l
 annotations:
   azimuth.stackhpc.com/label: HuggingFace LLM
 
-dependencies:
-  - name: reloader
-    version: 1.0.63
-    repository: https://stakater.github.io/stakater-charts
-    condition: ui.enabled
+# dependencies:
+#   - name: reloader
+#     version: 1.0.63
+#     repository: https://stakater.github.io/stakater-charts
+#     condition: ui.enabled
diff --git a/chart/azimuth-ui.schema.yaml b/charts/azimuth-llm/azimuth-ui.schema.yaml
similarity index 98%
rename from chart/azimuth-ui.schema.yaml
rename to charts/azimuth-llm/azimuth-ui.schema.yaml
index de283f1..a633139 100644
--- a/chart/azimuth-ui.schema.yaml
+++ b/charts/azimuth-llm/azimuth-ui.schema.yaml
@@ -16,7 +16,6 @@ controls:
   /api/modelMaxContextLength:
     type: IntegerControl
     minimum: 100
-    step: 100
     required: false
 
 sortOrder:
diff --git a/chart/ci/chat-ui-values.yaml b/charts/azimuth-llm/ci/no-api-values.yaml
similarity index 100%
rename from chart/ci/chat-ui-values.yaml
rename to charts/azimuth-llm/ci/no-api-values.yaml
diff --git a/chart/templates/NOTES.txt b/charts/azimuth-llm/templates/NOTES.txt
similarity index 100%
rename from chart/templates/NOTES.txt
rename to charts/azimuth-llm/templates/NOTES.txt
diff --git a/chart/templates/_helpers.tpl b/charts/azimuth-llm/templates/_helpers.tpl
similarity index 100%
rename from chart/templates/_helpers.tpl
rename to charts/azimuth-llm/templates/_helpers.tpl
diff --git a/chart/templates/api/config-map-grafana-dashboard-details.yml b/charts/azimuth-llm/templates/api/config-map-grafana-dashboard-details.yml
similarity index 100%
rename from chart/templates/api/config-map-grafana-dashboard-details.yml
rename to charts/azimuth-llm/templates/api/config-map-grafana-dashboard-details.yml
diff --git a/chart/templates/api/config-map-grafana-dashboard-summary.yml b/charts/azimuth-llm/templates/api/config-map-grafana-dashboard-summary.yml
similarity index 98%
rename from chart/templates/api/config-map-grafana-dashboard-summary.yml
rename to charts/azimuth-llm/templates/api/config-map-grafana-dashboard-summary.yml
index 9a2002e..672d862 100644
--- a/chart/templates/api/config-map-grafana-dashboard-summary.yml
+++ b/charts/azimuth-llm/templates/api/config-map-grafana-dashboard-summary.yml
@@ -184,7 +184,8 @@ data:
                     "value": 80
                   }
                 ]
-              }
+              },
+              "unit": "locale"
             },
             "overrides": []
           },
@@ -279,7 +280,8 @@ data:
                     "value": 80
                   }
                 ]
-              }
+              },
+              "unit": "locale"
             },
             "overrides": []
           },
@@ -378,7 +380,8 @@ data:
                     "value": 80
                   }
                 ]
-              }
+              },
+              "unit": "locale"
             },
             "overrides": []
           },
@@ -477,7 +480,8 @@ data:
                     "value": 80
                   }
                 ]
-              }
+              },
+              "unit": "locale"
             },
             "overrides": []
           },
@@ -518,7 +522,7 @@ data:
               "useBackend": false
             }
           ],
-          "title": "Tokens Generated (total)",
+          "title": "Generated Tokens (total)",
           "type": "timeseries"
         },
         {
@@ -814,14 +818,14 @@ data:
         "list": []
       },
       "time": {
-        "from": "now-120d",
+        "from": "now-90d",
         "to": "now"
       },
       "timepicker": {},
       "timezone": "",
-      "title": "vLLM Dashboard - Summary",
+      "title": "Scott test 1",
       "uid": "ee0cbu8l3b400dasdasfas",
-      "version": 1,
+      "version": 5,
       "weekStart": ""
     }
 {{- end -}}
diff --git a/chart/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml
similarity index 100%
rename from chart/templates/api/deployment.yml
rename to charts/azimuth-llm/templates/api/deployment.yml
diff --git a/chart/templates/api/ingress.yml b/charts/azimuth-llm/templates/api/ingress.yml
similarity index 100%
rename from chart/templates/api/ingress.yml
rename to charts/azimuth-llm/templates/api/ingress.yml
diff --git a/chart/templates/api/service-monitor.yml b/charts/azimuth-llm/templates/api/service-monitor.yml
similarity index 100%
rename from chart/templates/api/service-monitor.yml
rename to charts/azimuth-llm/templates/api/service-monitor.yml
diff --git a/chart/templates/api/service.yml b/charts/azimuth-llm/templates/api/service.yml
similarity index 100%
rename from chart/templates/api/service.yml
rename to charts/azimuth-llm/templates/api/service.yml
diff --git a/chart/templates/api/zenith-client.yml b/charts/azimuth-llm/templates/api/zenith-client.yml
similarity index 100%
rename from chart/templates/api/zenith-client.yml
rename to charts/azimuth-llm/templates/api/zenith-client.yml
diff --git a/chart/templates/api/zenith-reservation.yml b/charts/azimuth-llm/templates/api/zenith-reservation.yml
similarity index 100%
rename from chart/templates/api/zenith-reservation.yml
rename to charts/azimuth-llm/templates/api/zenith-reservation.yml
diff --git a/chart/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml
similarity index 100%
rename from chart/templates/test/end-to-end.yml
rename to charts/azimuth-llm/templates/test/end-to-end.yml
diff --git a/chart/templates/test/web-app.yml b/charts/azimuth-llm/templates/test/web-app.yml
similarity index 100%
rename from chart/templates/test/web-app.yml
rename to charts/azimuth-llm/templates/test/web-app.yml
diff --git a/chart/templates/ui/app-config-map.yml b/charts/azimuth-llm/templates/ui/app-config-map.yml
similarity index 100%
rename from chart/templates/ui/app-config-map.yml
rename to charts/azimuth-llm/templates/ui/app-config-map.yml
diff --git a/chart/templates/ui/deployment.yml b/charts/azimuth-llm/templates/ui/deployment.yml
similarity index 83%
rename from chart/templates/ui/deployment.yml
rename to charts/azimuth-llm/templates/ui/deployment.yml
index aa52e02..bed4167 100644
--- a/chart/templates/ui/deployment.yml
+++ b/charts/azimuth-llm/templates/ui/deployment.yml
@@ -19,6 +19,10 @@ spec:
     metadata:
       labels:
         {{- include "azimuth-llm.ui-selectorLabels" . | nindent 8 }}
+      # Restart deployment when settings config map changes
+      # https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/ui/app-config-map.yml") . | sha256sum }}
     spec:
       containers:
       - name: {{ .Release.Name }}-ui
diff --git a/chart/templates/ui/ingress.yml b/charts/azimuth-llm/templates/ui/ingress.yml
similarity index 100%
rename from chart/templates/ui/ingress.yml
rename to charts/azimuth-llm/templates/ui/ingress.yml
diff --git a/chart/templates/ui/service.yml b/charts/azimuth-llm/templates/ui/service.yml
similarity index 100%
rename from chart/templates/ui/service.yml
rename to charts/azimuth-llm/templates/ui/service.yml
diff --git a/chart/templates/ui/ui-zenith-client.yml b/charts/azimuth-llm/templates/ui/ui-zenith-client.yml
similarity index 100%
rename from chart/templates/ui/ui-zenith-client.yml
rename to charts/azimuth-llm/templates/ui/ui-zenith-client.yml
diff --git a/chart/templates/ui/ui-zenith-reservation.yml b/charts/azimuth-llm/templates/ui/ui-zenith-reservation.yml
similarity index 100%
rename from chart/templates/ui/ui-zenith-reservation.yml
rename to charts/azimuth-llm/templates/ui/ui-zenith-reservation.yml
diff --git a/chart/values.yaml b/charts/azimuth-llm/values.yaml
similarity index 97%
rename from chart/values.yaml
rename to charts/azimuth-llm/values.yaml
index ed66c5b..116385b 100644
--- a/chart/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -90,8 +90,6 @@ ui:
   # included in the web-apps/ folder of this git repository there is a
   # defaults.yml file (e.g. web-apps/text-chat/defaults.yml) listing all
   # available configuration options.
-  # FIXME: Figure out how to make JSON schema fit
-  # with different config options for each web app.
   appSettings:
     model_name: *model-name
     # Use local system fonts by default to avoid GDPR issues
@@ -151,5 +149,6 @@ ingress:
     # Annotations to apply to the ingress resource
     # e.g. for cert-manager integration
     annotations:
-reloader:
-  watchGlobally: false
+
+# reloader:
+#   watchGlobally: false
diff --git a/ct.yaml b/ct.yaml
index 866e08c..f5fada9 100644
--- a/ct.yaml
+++ b/ct.yaml
@@ -1,2 +1,8 @@
+# Complains about invalid maintainer URLs
 validate-maintainers: false
-charts: chart/
+# Skip version bump detection and lint all charts
+# since we're using the azimuth-cloud Helm chart publish
+# workflow which doesn't use Chart.yaml's version key
+all: true
+# Split output to make it look nice in GitHub Actions tab
+github-groups: true

From 40f9b33c38674e08a20e21172b64c3ee70dc32b5 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 12:36:07 +0000
Subject: [PATCH 13/34] Mount local overrides as volume for testing

---
 web-apps/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web-apps/run.sh b/web-apps/run.sh
index e6e20b5..877bf5d 100755
--- a/web-apps/run.sh
+++ b/web-apps/run.sh
@@ -18,4 +18,4 @@ else
     echo "Found local $IMAGE_TAG docker image"
 fi
 
-docker run --rm -p 7860:7860 $IMAGE_TAG
+docker run --rm -v ./$1/overrides.yml:/etc/web-app/overrides.yml -p 7860:7860 $IMAGE_TAG

From 0f6882c1739da6b9e63dcf3b6ecfcc7cf1fcaa24 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 12:36:47 +0000
Subject: [PATCH 14/34] Clean up and formatting

---
 web-apps/chat/app.py | 79 --------------------------------------------
 1 file changed, 79 deletions(-)

diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py
index dbfae8f..8894fef 100644
--- a/web-apps/chat/app.py
+++ b/web-apps/chat/app.py
@@ -32,44 +32,6 @@ class AppSettings(BaseModel):
     model_config = ConfigDict(protected_namespaces=(), extra="forbid")
 
 
-# class AppSettings(BaseModel):
-#     hf_model_name: str = Field(
-#         description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend",
-#     )
-#     backend_url: HttpUrl = Field(
-#         description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)"
-#     )
-#     page_title: str = Field(default="Large Language Model")
-#     page_description: Optional[str] = Field(default=None)
-#     hf_model_instruction: str = Field(
-#         default="You are a helpful and cheerful AI assistant. Please respond appropriately."
-#     )
-
-#     # Model settings
-
-#     # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html
-#     # which is based on https://platform.openai.com/docs/api-reference/completions/create
-#     llm_max_tokens: int = Field(default=500)
-#     llm_temperature: float = Field(default=0)
-#     llm_top_p: float = Field(default=1)
-#     llm_top_k: float = Field(default=-1)
-#     llm_presence_penalty: float = Field(default=0, ge=-2, le=2)
-#     llm_frequency_penalty: float = Field(default=0, ge=-2, le=2)
-
-#     # UI theming
-
-#     # Variables explicitly passed to gradio.theme.Default()
-#     # For example:
-#     # {"primary_hue": "red"}
-#     theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict)
-#     # Overrides for theme.body_background_fill property
-#     theme_background_colour: Optional[str] = Field(default=None)
-#     # Provides arbitrary CSS and JS overrides to the UI,
-#     # see https://www.gradio.app/guides/custom-CSS-and-JS
-#     css_overrides: Optional[str] = Field(default=None)
-#     custom_javascript: Optional[str] = Field(default=None)
-
-
 settings = AppSettings(**load_settings())
 logger.info(settings)
 
@@ -102,7 +64,6 @@ class PossibleSystemPromptException(Exception):
     },
     streaming=True,
 )
-logger.info(llm)
 
 
 def inference(latest_message, history):
@@ -176,7 +137,6 @@ def inference(latest_message, history):
 # UI theming
 theme = gr.themes.Default(**settings.theme_params)
 theme.set(**settings.theme_params_extended)
-# theme.set(text)
 
 
 def inference_wrapper(*args):
@@ -221,44 +181,5 @@ def inference_wrapper(*args):
 )
 logger.debug("Gradio chat interface config: %s", app.config)
 app.queue(
-    # Allow 10 concurrent requests to backend
-    # vLLM backend should be clever enough to
-    # batch these requests appropriately.
     default_concurrency_limit=10,
 ).launch(server_name=settings.host_address)
-
-# with gr.ChatInterface(
-#     inference_wrapper,
-#     chatbot=gr.Chatbot(
-#         # Height of conversation window in CSS units (string) or pixels (int)
-#         height="68vh",
-#         show_copy_button=True,
-#     ),
-#     textbox=gr.Textbox(
-#         placeholder="Ask me anything...",
-#         container=False,
-#         # Ratio of text box to submit button width
-#         scale=7,
-#     ),
-#     title=settings.page_title,
-#     description=settings.page_description,
-#     retry_btn="Retry",
-#     undo_btn="Undo",
-#     clear_btn="Clear",
-#     analytics_enabled=False,
-#     theme=theme,
-#     css=settings.css_overrides,
-#     js=settings.custom_javascript,
-# ) as app:
-#     logger.debug("Gradio chat interface config: %s", app.config)
-#     # For running locally in tilt dev setup
-#     if len(sys.argv) > 2 and sys.argv[2] == "localhost":
-#         app.launch()
-#     # For running on cluster
-#     else:
-#         app.queue(
-#             # Allow 10 concurrent requests to backend
-#             # vLLM backend should be clever enough to
-#             # batch these requests appropriately.
-#             default_concurrency_limit=10,
-#         ).launch(server_name=settings.host_address)

From b8737bc45d34e37742b03a580fa5cb3d677199dc Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 13:55:31 +0000
Subject: [PATCH 15/34] Rename CI test values files

---
 .../azimuth-chat/ci/{chat-ui-values.yaml => ui-only-values.yaml}  | 0
 .../ci/{image-analysis-ui-values.yaml => ui-only-values.yaml}     | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename charts/azimuth-chat/ci/{chat-ui-values.yaml => ui-only-values.yaml} (100%)
 rename charts/azimuth-image-analysis/ci/{image-analysis-ui-values.yaml => ui-only-values.yaml} (100%)

diff --git a/charts/azimuth-chat/ci/chat-ui-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml
similarity index 100%
rename from charts/azimuth-chat/ci/chat-ui-values.yaml
rename to charts/azimuth-chat/ci/ui-only-values.yaml
diff --git a/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml b/charts/azimuth-image-analysis/ci/ui-only-values.yaml
similarity index 100%
rename from charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml
rename to charts/azimuth-image-analysis/ci/ui-only-values.yaml

From 9dfcf3afcdaaf6ac79b77f5cbf397933abfee9d2 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 13:56:44 +0000
Subject: [PATCH 16/34] Update Azimuth UI config

---
 charts/azimuth-chat/azimuth-ui.schema.yaml           | 5 ++---
 charts/azimuth-image-analysis/azimuth-ui.schema.yaml | 5 +----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml
index 061a8ce..1c0da6a 100644
--- a/charts/azimuth-chat/azimuth-ui.schema.yaml
+++ b/charts/azimuth-chat/azimuth-ui.schema.yaml
@@ -6,7 +6,7 @@ controls:
     type: TextControl
     secret: true
   # Use mirror to mimic yaml anchor in base Helm chart
-  /azimuth-llm/ui/appSettings/hf_model_name:
+  /azimuth-llm/ui/appSettings/model_name:
     type: MirrorControl
     path: /azimuth-llm/huggingface/model
     visuallyHidden: true
@@ -16,13 +16,12 @@ controls:
   /azimuth-llm/api/modelMaxContextLength:
     type: IntegerControl
     minimum: 100
-    step: 100
     required: false
 
 sortOrder:
   - /azimuth-llm/huggingface/model
   - /azimuth-llm/huggingface/token
-  - /azimuth-llm/ui/appSettings/hf_model_instruction
+  - /azimuth-llm/ui/appSettings/model_instruction
   - /azimuth-llm/ui/appSettings/page_title
   - /azimuth-llm/api/image/version
   - /azimuth-llm/ui/appSettings/llm_temperature
diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
index 5c4799c..d244a5a 100644
--- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
+++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
@@ -6,7 +6,7 @@ controls:
     type: TextControl
     secret: true
   # Use mirror to mimic yaml anchor in base Helm chart
-  /azimuth-llm/ui/appSettings/hf_model_name:
+  /azimuth-llm/ui/appSettings/model_name:
     type: MirrorControl
     path: /azimuth-llm/huggingface/model
     visuallyHidden: true
@@ -14,8 +14,6 @@ controls:
 sortOrder:
   - /huggingface/model
   - /huggingface/token
-  - /ui/appSettings/hf_model_instruction
-  - /ui/appSettings/page_title
   - /api/image/version
   - /ui/appSettings/llm_temperature
   - /ui/appSettings/llm_max_tokens
@@ -23,4 +21,3 @@ sortOrder:
   - /ui/appSettings/llm_presence_penalty
   - /ui/appSettings/llm_top_p
   - /ui/appSettings/llm_top_k
-  - /api/modelMaxContextLength

From 00972e24d821c0f0e8a9a2b7a1ccd60127f426fb Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 14:11:33 +0000
Subject: [PATCH 17/34] Fix chart dependency versions

---
 charts/azimuth-chat/Chart.yaml           | 2 +-
 charts/azimuth-image-analysis/Chart.yaml | 2 +-
 charts/azimuth-llm/Chart.yaml            | 8 +-------
 charts/azimuth-llm/values.yaml           | 3 ---
 4 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/charts/azimuth-chat/Chart.yaml b/charts/azimuth-chat/Chart.yaml
index b3e9fa0..97dd341 100644
--- a/charts/azimuth-chat/Chart.yaml
+++ b/charts/azimuth-chat/Chart.yaml
@@ -18,5 +18,5 @@ annotations:
 
 dependencies:
   - name: azimuth-llm
-    version: 0.1.0
+    version: ">=0-0"
     repository: "file://../azimuth-llm/"
diff --git a/charts/azimuth-image-analysis/Chart.yaml b/charts/azimuth-image-analysis/Chart.yaml
index c2681ca..238016b 100644
--- a/charts/azimuth-image-analysis/Chart.yaml
+++ b/charts/azimuth-image-analysis/Chart.yaml
@@ -18,5 +18,5 @@ annotations:
 
 dependencies:
   - name: azimuth-llm
-    version: 0.1.0
+    version: ">=0-0"
     repository: "file://../azimuth-llm/"
diff --git a/charts/azimuth-llm/Chart.yaml b/charts/azimuth-llm/Chart.yaml
index 637db49..6c92b69 100644
--- a/charts/azimuth-llm/Chart.yaml
+++ b/charts/azimuth-llm/Chart.yaml
@@ -18,7 +18,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
+version: 0.2.0
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
@@ -30,9 +30,3 @@ icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-l
 
 annotations:
   azimuth.stackhpc.com/label: HuggingFace LLM
-
-# dependencies:
-#   - name: reloader
-#     version: 1.0.63
-#     repository: https://stakater.github.io/stakater-charts
-#     condition: ui.enabled
diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
index 116385b..a037d46 100644
--- a/charts/azimuth-llm/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -149,6 +149,3 @@ ingress:
     # Annotations to apply to the ingress resource
     # e.g. for cert-manager integration
     annotations:
-
-# reloader:
-#   watchGlobally: false

From 60204342560e8172d20ec6a55083560eefb14f1f Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 14:47:00 +0000
Subject: [PATCH 18/34] Fix scheme for passing custom LLM params

---
 charts/azimuth-chat/azimuth-ui.schema.yaml    | 12 +++----
 charts/azimuth-chat/ci/ui-only-values.yaml    |  9 +++++
 .../azimuth-ui.schema.yaml                    | 12 +++----
 .../ci/ui-only-values.yaml                    |  9 +++++
 charts/azimuth-llm/azimuth-ui.schema.yaml     | 33 -------------------
 5 files changed, 30 insertions(+), 45 deletions(-)
 delete mode 100644 charts/azimuth-llm/azimuth-ui.schema.yaml

diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml
index 1c0da6a..74bd573 100644
--- a/charts/azimuth-chat/azimuth-ui.schema.yaml
+++ b/charts/azimuth-chat/azimuth-ui.schema.yaml
@@ -24,10 +24,10 @@ sortOrder:
   - /azimuth-llm/ui/appSettings/model_instruction
   - /azimuth-llm/ui/appSettings/page_title
   - /azimuth-llm/api/image/version
-  - /azimuth-llm/ui/appSettings/llm_temperature
-  - /azimuth-llm/ui/appSettings/llm_max_tokens
-  - /azimuth-llm/ui/appSettings/llm_frequency_penalty
-  - /azimuth-llm/ui/appSettings/llm_presence_penalty
-  - /azimuth-llm/ui/appSettings/llm_top_p
-  - /azimuth-llm/ui/appSettings/llm_top_k
+  - /azimuth-llm/ui/appSettings/llm_params/temperature
+  - /azimuth-llm/ui/appSettings/llm_params/max_tokens
+  - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty
+  - /azimuth-llm/ui/appSettings/llm_params/presence_penalty
+  - /azimuth-llm/ui/appSettings/llm_params/top_p
+  - /azimuth-llm/ui/appSettings/llm_params/top_k
   - /azimuth-llm/api/modelMaxContextLength
diff --git a/charts/azimuth-chat/ci/ui-only-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml
index bf30ede..b66347d 100644
--- a/charts/azimuth-chat/ci/ui-only-values.yaml
+++ b/charts/azimuth-chat/ci/ui-only-values.yaml
@@ -5,3 +5,12 @@ azimuth-llm:
     service:
       zenith:
         enabled: false
+    appSettings:
+      # Verify that we can set non-standard LLM params
+      llm_params:
+        max_tokens: 101
+        temperature: 0.1
+        top_p: 0.15
+        top_k: 1
+        presence_penalty: 0.9
+        frequency_penalty: 1
diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
index d244a5a..f1068c2 100644
--- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
+++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
@@ -15,9 +15,9 @@ sortOrder:
   - /huggingface/model
   - /huggingface/token
   - /api/image/version
-  - /ui/appSettings/llm_temperature
-  - /ui/appSettings/llm_max_tokens
-  - /ui/appSettings/llm_frequency_penalty
-  - /ui/appSettings/llm_presence_penalty
-  - /ui/appSettings/llm_top_p
-  - /ui/appSettings/llm_top_k
+  - /ui/appSettings/llm_params/temperature
+  - /ui/appSettings/llm_params/max_tokens
+  - /ui/appSettings/llm_params/frequency_penalty
+  - /ui/appSettings/llm_params/presence_penalty
+  - /ui/appSettings/llm_params/top_p
+  - /ui/appSettings/llm_params/top_k
diff --git a/charts/azimuth-image-analysis/ci/ui-only-values.yaml b/charts/azimuth-image-analysis/ci/ui-only-values.yaml
index bf30ede..b66347d 100644
--- a/charts/azimuth-image-analysis/ci/ui-only-values.yaml
+++ b/charts/azimuth-image-analysis/ci/ui-only-values.yaml
@@ -5,3 +5,12 @@ azimuth-llm:
     service:
       zenith:
         enabled: false
+    appSettings:
+      # Verify that we can set non-standard LLM params
+      llm_params:
+        max_tokens: 101
+        temperature: 0.1
+        top_p: 0.15
+        top_k: 1
+        presence_penalty: 0.9
+        frequency_penalty: 1
diff --git a/charts/azimuth-llm/azimuth-ui.schema.yaml b/charts/azimuth-llm/azimuth-ui.schema.yaml
deleted file mode 100644
index a633139..0000000
--- a/charts/azimuth-llm/azimuth-ui.schema.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-controls:
-  /huggingface/model:
-    type: TextControl
-    required: true
-  /huggingface/token:
-    type: TextControl
-    secret: true
-  # Use mirror to mimic yaml anchor in base Helm chart
-  /ui/appSettings/hf_model_name:
-    type: MirrorControl
-    path: /huggingface/model
-    visuallyHidden: true
-  # Azimuth UI doesn't handle json type ["integer","null"]
-  # properly so we allow any type in JSON schema then
-  # constrain to (optional) integer here.
-  /api/modelMaxContextLength:
-    type: IntegerControl
-    minimum: 100
-    required: false
-
-sortOrder:
-  - /huggingface/model
-  - /huggingface/token
-  - /ui/appSettings/hf_model_instruction
-  - /ui/appSettings/page_title
-  - /api/image/version
-  - /ui/appSettings/llm_temperature
-  - /ui/appSettings/llm_max_tokens
-  - /ui/appSettings/llm_frequency_penalty
-  - /ui/appSettings/llm_presence_penalty
-  - /ui/appSettings/llm_top_p
-  - /ui/appSettings/llm_top_k
-  - /api/modelMaxContextLength

From 456b2212bfcbbcaab9c83751a8929b61d82d564c Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 15:01:37 +0000
Subject: [PATCH 19/34] Fix scheme for passing custom LLM params

---
 charts/azimuth-chat/values.schema.json        | 93 ++++++++++---------
 .../azimuth-image-analysis/values.schema.json | 93 ++++++++++---------
 2 files changed, 98 insertions(+), 88 deletions(-)

diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json
index 96e5882..ebc2622 100644
--- a/charts/azimuth-chat/values.schema.json
+++ b/charts/azimuth-chat/values.schema.json
@@ -69,50 +69,55 @@
                                     "description": "The title to display at the top of the chat interface.",
                                     "default": "Large Language Model"
                                 },
-                                "llm_max_tokens": {
-                                    "type": "integer",
-                                    "title": "Max Tokens",
-                                    "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.",
-                                    "default": 1000
-                                },
-                                "llm_temperature": {
-                                    "type": "number",
-                                    "title": "LLM Temperature",
-                                    "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.",
-                                    "default": 0,
-                                    "minimum": 0,
-                                    "maximum": 2
-                                },
-                                "llm_top_p": {
-                                    "type": "number",
-                                    "title": "LLM Top P",
-                                    "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.",
-                                    "default": 1,
-                                    "exclusiveMinimum": 0,
-                                    "maximum": 1
-                                },
-                                "llm_top_k": {
-                                    "type": "integer",
-                                    "title": "LLM Top K",
-                                    "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
-                                    "default": -1,
-                                    "minimum": -1
-                                },
-                                "llm_presence_penalty": {
-                                    "type": "number",
-                                    "title": "LLM Presence Penalty",
-                                    "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.",
-                                    "default": 0,
-                                    "minimum": -2,
-                                    "maximum": 2
-                                },
-                                "llm_frequency_penalty": {
-                                    "type": "number",
-                                    "title": "LLM Frequency Penalty",
-                                    "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.",
-                                    "default": 0,
-                                    "minimum": -2,
-                                    "maximum": 2
+                                "llm_params": {
+                                    "type": "object",
+                                    "properties": {
+                                        "max_tokens": {
+                                            "type": "integer",
+                                            "title": "Max Tokens",
+                                            "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.",
+                                            "default": 1000
+                                        },
+                                        "temperature": {
+                                            "type": "number",
+                                            "title": "LLM Temperature",
+                                            "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.",
+                                            "default": 0,
+                                            "minimum": 0,
+                                            "maximum": 2
+                                        },
+                                        "top_p": {
+                                            "type": "number",
+                                            "title": "LLM Top P",
+                                            "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.",
+                                            "default": 1,
+                                            "exclusiveMinimum": 0,
+                                            "maximum": 1
+                                        },
+                                        "top_k": {
+                                            "type": "integer",
+                                            "title": "LLM Top K",
+                                            "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
+                                            "default": -1,
+                                            "minimum": -1
+                                        },
+                                        "presence_penalty": {
+                                            "type": "number",
+                                            "title": "LLM Presence Penalty",
+                                            "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.",
+                                            "default": 0,
+                                            "minimum": -2,
+                                            "maximum": 2
+                                        },
+                                        "frequency_penalty": {
+                                            "type": "number",
+                                            "title": "LLM Frequency Penalty",
+                                            "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.",
+                                            "default": 0,
+                                            "minimum": -2,
+                                            "maximum": 2
+                                        }
+                                    }
                                 }
                             },
                             "required": [
diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json
index 2ddda05..029a7ae 100644
--- a/charts/azimuth-image-analysis/values.schema.json
+++ b/charts/azimuth-image-analysis/values.schema.json
@@ -53,50 +53,55 @@
                                     "title": "Model Name",
                                     "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above."
                                 },
-                                "llm_max_tokens": {
-                                    "type": "integer",
-                                    "title": "Max Tokens",
-                                    "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.",
-                                    "default": 1000
-                                },
-                                "llm_temperature": {
-                                    "type": "number",
-                                    "title": "LLM Temperature",
-                                    "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.",
-                                    "default": 0,
-                                    "minimum": 0,
-                                    "maximum": 2
-                                },
-                                "llm_top_p": {
-                                    "type": "number",
-                                    "title": "LLM Top P",
-                                    "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.",
-                                    "default": 1,
-                                    "exclusiveMinimum": 0,
-                                    "maximum": 1
-                                },
-                                "llm_top_k": {
-                                    "type": "integer",
-                                    "title": "LLM Top K",
-                                    "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
-                                    "default": -1,
-                                    "minimum": -1
-                                },
-                                "llm_presence_penalty": {
-                                    "type": "number",
-                                    "title": "LLM Presence Penalty",
-                                    "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.",
-                                    "default": 0,
-                                    "minimum": -2,
-                                    "maximum": 2
-                                },
-                                "llm_frequency_penalty": {
-                                    "type": "number",
-                                    "title": "LLM Frequency Penalty",
-                                    "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.",
-                                    "default": 0,
-                                    "minimum": -2,
-                                    "maximum": 2
+                                "llm_params": {
+                                    "type": "object",
+                                    "properties": {
+                                        "max_tokens": {
+                                            "type": "integer",
+                                            "title": "Max Tokens",
+                                            "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.",
+                                            "default": 1000
+                                        },
+                                        "temperature": {
+                                            "type": "number",
+                                            "title": "LLM Temperature",
+                                            "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.",
+                                            "default": 0,
+                                            "minimum": 0,
+                                            "maximum": 2
+                                        },
+                                        "top_p": {
+                                            "type": "number",
+                                            "title": "LLM Top P",
+                                            "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.",
+                                            "default": 1,
+                                            "exclusiveMinimum": 0,
+                                            "maximum": 1
+                                        },
+                                        "top_k": {
+                                            "type": "integer",
+                                            "title": "LLM Top K",
+                                            "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
+                                            "default": -1,
+                                            "minimum": -1
+                                        },
+                                        "presence_penalty": {
+                                            "type": "number",
+                                            "title": "LLM Presence Penalty",
+                                            "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.",
+                                            "default": 0,
+                                            "minimum": -2,
+                                            "maximum": 2
+                                        },
+                                        "frequency_penalty": {
+                                            "type": "number",
+                                            "title": "LLM Frequency Penalty",
+                                            "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.",
+                                            "default": 0,
+                                            "minimum": -2,
+                                            "maximum": 2
+                                        }
+                                    }
                                 }
                             },
                             "required": [

From f5c86549098417608ee6dd988c6b3e49c86b19f7 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 16:10:34 +0000
Subject: [PATCH 20/34] Add model context length option to Azimuth UI

---
 .../azimuth-ui.schema.yaml                    | 26 ++++++++++++-------
 .../azimuth-image-analysis/values.schema.json |  4 +++
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
index f1068c2..885ca8e 100644
--- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
+++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
@@ -10,14 +10,22 @@ controls:
     type: MirrorControl
     path: /azimuth-llm/huggingface/model
     visuallyHidden: true
+  # Azimuth UI doesn't handle json type ["integer","null"]
+  # properly so we allow any type in JSON schema then
+  # constrain to (optional) integer here.
+  /azimuth-llm/api/modelMaxContextLength:
+    type: IntegerControl
+    minimum: 100
+    required: false
 
 sortOrder:
-  - /huggingface/model
-  - /huggingface/token
-  - /api/image/version
-  - /ui/appSettings/llm_params/temperature
-  - /ui/appSettings/llm_params/max_tokens
-  - /ui/appSettings/llm_params/frequency_penalty
-  - /ui/appSettings/llm_params/presence_penalty
-  - /ui/appSettings/llm_params/top_p
-  - /ui/appSettings/llm_params/top_k
+  - /azimuth-llm/huggingface/model
+  - /azimuth-llm/huggingface/token
+  - /azimuth-llm/api/image/version
+  - /azimuth-llm/ui/appSettings/llm_params/temperature
+  - /azimuth-llm/ui/appSettings/llm_params/max_tokens
+  - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty
+  - /azimuth-llm/ui/appSettings/llm_params/presence_penalty
+  - /azimuth-llm/ui/appSettings/llm_params/top_p
+  - /azimuth-llm/ui/appSettings/llm_params/top_k
+  - /azimuth-llm/api/modelMaxContextLength
diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json
index 029a7ae..7f747e4 100644
--- a/charts/azimuth-image-analysis/values.schema.json
+++ b/charts/azimuth-image-analysis/values.schema.json
@@ -39,6 +39,10 @@
                                     "default": "v0.6.3"
                                 }
                             }
+                        },
+                        "modelMaxContextLength": {
+                            "title": "Model Context Length",
+                            "description": "An override for the maximum context length to allow, if the model's default is not suitable."
                         }
                     }
                 },

From 8e58066329e8bfc9e5101783ab421d69a4aee5ae Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 16:44:13 +0000
Subject: [PATCH 21/34] Fix defaults for LLMParams data model

---
 web-apps/image-analysis/app.py |  2 +-
 web-apps/utils/utils.py        | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/web-apps/image-analysis/app.py b/web-apps/image-analysis/app.py
index 77cda84..c60f412 100644
--- a/web-apps/image-analysis/app.py
+++ b/web-apps/image-analysis/app.py
@@ -30,7 +30,7 @@ class AppSettings(BaseModel):
     page_title: str
     page_description: str
     examples: List[PromptExample]
-    llm_params: LLMParams
+    llm_params: LLMParams | None
     # Theme customisation
     theme_params: Dict[str, str | list]
     theme_params_extended: Dict[str, str]
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
index 252ccf4..de299f8 100644
--- a/web-apps/utils/utils.py
+++ b/web-apps/utils/utils.py
@@ -20,12 +20,12 @@ class LLMParams(BaseModel):
     https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters
     """
 
-    max_tokens: PositiveInt | None
-    temperature: Annotated[float, Field(ge=0, le=2)] | None
-    top_p: Annotated[float, Field(gt=0, le=1)] | None
-    top_k: Annotated[int, Field(ge=-1)] | None
-    frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None
-    presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None
+    max_tokens: PositiveInt | None = None
+    temperature: Annotated[float, Field(ge=0, le=2)] | None = None
+    top_p: Annotated[float, Field(gt=0, le=1)] | None = None
+    top_k: Annotated[int, Field(ge=-1)] | None = None
+    frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None = None
+    presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None = None
     # Make sure we can't smuggle in extra request params / typos
     model_config = ConfigDict(extra="forbid")
 

From ef832886997a302e061101b1f39cd493049fc573 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 16:45:06 +0000
Subject: [PATCH 22/34] Bump UI image tag

---
 charts/azimuth-llm/values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
index a037d46..6c8f508 100644
--- a/charts/azimuth-llm/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -83,7 +83,7 @@ ui:
   # Container image config
   image:
     repository: ghcr.io/stackhpc/azimuth-llm-chat-ui
-    version: 58c4dcb
+    version: 8e58066
     imagePullPolicy:
   # The settings to be passed to the frontend web app.
   # Format depends on the chosen UI image above. For each of the UIs

From 35a14389bef050c462addd538988f21dba05158f Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 16:53:16 +0000
Subject: [PATCH 23/34] Bump UI image tag

---
 charts/azimuth-llm/values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
index 6c8f508..dc3a95f 100644
--- a/charts/azimuth-llm/values.yaml
+++ b/charts/azimuth-llm/values.yaml
@@ -83,7 +83,7 @@ ui:
   # Container image config
   image:
     repository: ghcr.io/stackhpc/azimuth-llm-chat-ui
-    version: 8e58066
+    version: ef83288
     imagePullPolicy:
   # The settings to be passed to the frontend web app.
   # Format depends on the chosen UI image above. For each of the UIs

From f3d5544849fa2e61966dd50cc9f2fa1546b419c0 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 17:44:09 +0000
Subject: [PATCH 24/34] Remove top_k from vision model UI options

---
 charts/azimuth-image-analysis/azimuth-ui.schema.yaml | 4 +++-
 charts/azimuth-image-analysis/ci/ui-only-values.yaml | 1 -
 charts/azimuth-image-analysis/values.schema.json     | 8 +-------
 web-apps/image-analysis/defaults.yml                 | 4 +++-
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
index 885ca8e..a960081 100644
--- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
+++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
@@ -27,5 +27,7 @@ sortOrder:
   - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty
   - /azimuth-llm/ui/appSettings/llm_params/presence_penalty
   - /azimuth-llm/ui/appSettings/llm_params/top_p
-  - /azimuth-llm/ui/appSettings/llm_params/top_k
+  # vLLM responds with HTTP 400 BadRequest when top_k is
+  # passed to a vision model (but ollama accepts it)
+  # - /azimuth-llm/ui/appSettings/llm_params/top_k
   - /azimuth-llm/api/modelMaxContextLength
diff --git a/charts/azimuth-image-analysis/ci/ui-only-values.yaml b/charts/azimuth-image-analysis/ci/ui-only-values.yaml
index b66347d..96f716d 100644
--- a/charts/azimuth-image-analysis/ci/ui-only-values.yaml
+++ b/charts/azimuth-image-analysis/ci/ui-only-values.yaml
@@ -11,6 +11,5 @@ azimuth-llm:
         max_tokens: 101
         temperature: 0.1
         top_p: 0.15
-        top_k: 1
         presence_penalty: 0.9
         frequency_penalty: 1
diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json
index 7f747e4..c8be1ac 100644
--- a/charts/azimuth-image-analysis/values.schema.json
+++ b/charts/azimuth-image-analysis/values.schema.json
@@ -58,6 +58,7 @@
                                     "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above."
                                 },
                                 "llm_params": {
+                                    "$comment": "top_k parameter causes vLLM to error for most (all?) vision models so is excluded here",
                                     "type": "object",
                                     "properties": {
                                         "max_tokens": {
@@ -82,13 +83,6 @@
                                             "exclusiveMinimum": 0,
                                             "maximum": 1
                                         },
-                                        "top_k": {
-                                            "type": "integer",
-                                            "title": "LLM Top K",
-                                            "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
-                                            "default": -1,
-                                            "minimum": -1
-                                        },
                                         "presence_penalty": {
                                             "type": "number",
                                             "title": "LLM Presence Penalty",
diff --git a/web-apps/image-analysis/defaults.yml b/web-apps/image-analysis/defaults.yml
index e6f2791..21d233c 100644
--- a/web-apps/image-analysis/defaults.yml
+++ b/web-apps/image-analysis/defaults.yml
@@ -18,7 +18,9 @@ llm_params:
   max_tokens:
   temperature:
   top_p:
-  top_k:
+  # vLLM rejects requests with top_k parameter for
+  # most (all?) vision models so can't use it here
+  # top_k:
   frequency_penalty:
   presence_penalty:
 

From 0dfd58a2123671ce3545bbded3eda736a3f3f382 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 19:47:53 +0000
Subject: [PATCH 25/34] Update workflow permissions avoid device-flow auth

---
 .github/workflows/build-push-artifacts.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index fa2cca4..4027fdd 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -34,8 +34,13 @@ jobs:
   build_push_images:
     name: Build and push images
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write         # needed for signing the images with GitHub OIDC Token
+      packages: write         # required for pushing container images
+      security-events: write  # required for pushing SARIF files
     needs: changes
-    if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
+    # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
     strategy:
       matrix:
         include:

From bd6accb229c515524f10348adfc4fd209a4378a5 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 19:48:34 +0000
Subject: [PATCH 26/34] Reable change detection on image builds

---
 .github/workflows/build-push-artifacts.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index 4027fdd..75c561f 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -40,7 +40,7 @@ jobs:
       packages: write         # required for pushing container images
       security-events: write  # required for pushing SARIF files
     needs: changes
-    # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
+    if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
     strategy:
       matrix:
         include:

From d6202c0083c471761e381b8534caaccb4d17b607 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 19:55:34 +0000
Subject: [PATCH 27/34] Remove redundant helm template check

---
 .github/workflows/test-pr.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml
index 1187e7e..a064702 100644
--- a/.github/workflows/test-pr.yml
+++ b/.github/workflows/test-pr.yml
@@ -28,10 +28,6 @@ jobs:
       - name: Run chart linting
         run: ct lint --config ct.yaml
 
-      - name: Run helm template with default values
-        run: helm template ci-test .
-        working-directory: chart
-
       - name: Create Kind Cluster
         uses: helm/kind-action@v1
         with:

From 96ca80a4e31eaf39afbf2e7372a3ff0915512b21 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 20:04:45 +0000
Subject: [PATCH 28/34] Skip change detection on tags

---
 .github/workflows/build-push-artifacts.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index 75c561f..412566e 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -7,6 +7,7 @@ jobs:
   changes:
     name: Check for relevant changes
     runs-on: ubuntu-latest
+    if: ${{ github.ref_type != 'tag' }}
     # Required permissions
     permissions:
       pull-requests: read

From 24b4f2c98807154c14edaa1afcb09fb137518bb8 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 20:06:42 +0000
Subject: [PATCH 29/34] Always run artifact publishing on tags

---
 .github/workflows/build-push-artifacts.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index 412566e..358fce0 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -41,7 +41,7 @@ jobs:
       packages: write         # required for pushing container images
       security-events: write  # required for pushing SARIF files
     needs: changes
-    if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }}
+    if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }}
     strategy:
       matrix:
         include:
@@ -86,7 +86,7 @@ jobs:
     runs-on: ubuntu-latest
     # Only build and push the chart if chart files have changed
     needs: [changes]
-    if: ${{ needs.changes.outputs.chart == 'true' || github.ref_type == 'tag' }}
+    if: ${{ github.ref_type == 'tag' || needs.changes.outputs.chart == 'true' }}
     steps:
       - name: Check out the repository
         uses: actions/checkout@v4

From df7b8eb7fd7e6698af74cf969f1479ca0bf4bb7d Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 20:09:30 +0000
Subject: [PATCH 30/34] Revert "Skip change detection on tags"

This reverts commit 96ca80a4e31eaf39afbf2e7372a3ff0915512b21.
---
 .github/workflows/build-push-artifacts.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index 358fce0..fd8969b 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -7,7 +7,6 @@ jobs:
   changes:
     name: Check for relevant changes
     runs-on: ubuntu-latest
-    if: ${{ github.ref_type != 'tag' }}
     # Required permissions
     permissions:
       pull-requests: read

From f9eb0aa2c2722e6e994093f3759a6210b9d826cc Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 21:21:06 +0000
Subject: [PATCH 31/34] Remove unused reloader annotation

---
 charts/azimuth-llm/templates/ui/deployment.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/charts/azimuth-llm/templates/ui/deployment.yml b/charts/azimuth-llm/templates/ui/deployment.yml
index bed4167..3938893 100644
--- a/charts/azimuth-llm/templates/ui/deployment.yml
+++ b/charts/azimuth-llm/templates/ui/deployment.yml
@@ -5,9 +5,6 @@ metadata:
   name: {{ .Release.Name }}-ui
   labels:
     {{- include "azimuth-llm.labels" . | nindent 4 }}
-  annotations:
-    # Make sure UI is reloaded when app settings are updated
-    reloader.stakater.com/auto: "true"
 spec:
   replicas: 1
   selector:

From d0d1fd024dc7037547ac37e1449c66424a3fdf7f Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 23:45:20 +0000
Subject: [PATCH 32/34] Test alternative docker cache settings

---
 .github/workflows/build-push-artifacts.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index fd8969b..9c81fe3 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -40,7 +40,7 @@ jobs:
       packages: write         # required for pushing container images
       security-events: write  # required for pushing SARIF files
     needs: changes
-    if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }}
+    # if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }}
     strategy:
       matrix:
         include:
@@ -69,7 +69,8 @@ jobs:
             type=sha,prefix=
 
       - name: Build and push image
-        uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action
+        # uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action
+        uses: azimuth-cloud/github-actions/docker-multiarch-build-push@test/docker-build-cache
         with:
           cache-key: ${{ matrix.component }}
           context: ./web-apps/

From 7f915535832d0d539980e25616c19f0073b1075d Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Thu, 31 Oct 2024 23:54:31 +0000
Subject: [PATCH 33/34] Dummy change for cache testing

---
 .github/workflows/build-push-artifacts.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index 9c81fe3..fdbf892 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -69,7 +69,6 @@ jobs:
             type=sha,prefix=
 
       - name: Build and push image
-        # uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action
         uses: azimuth-cloud/github-actions/docker-multiarch-build-push@test/docker-build-cache
         with:
           cache-key: ${{ matrix.component }}

From 717e582bac5dc7fe38931d21b3e5f7cce941b435 Mon Sep 17 00:00:00 2001
From: sd109 <sdavidson327@gmail.com>
Date: Fri, 1 Nov 2024 10:18:29 +0000
Subject: [PATCH 34/34] Revert to master branch of docker build action

---
 .github/workflows/build-push-artifacts.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml
index fdbf892..73d8370 100644
--- a/.github/workflows/build-push-artifacts.yml
+++ b/.github/workflows/build-push-artifacts.yml
@@ -40,7 +40,7 @@ jobs:
       packages: write         # required for pushing container images
       security-events: write  # required for pushing SARIF files
     needs: changes
-    # if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }}
+    if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }}
     strategy:
       matrix:
         include:
@@ -69,7 +69,7 @@ jobs:
             type=sha,prefix=
 
       - name: Build and push image
-        uses: azimuth-cloud/github-actions/docker-multiarch-build-push@test/docker-build-cache
+        uses: azimuth-cloud/github-actions/docker-multiarch-build-push@master
         with:
           cache-key: ${{ matrix.component }}
           context: ./web-apps/