From 82d045c75bbad09fe53bb560373c9e49569c0c1a Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 17:52:14 +0000 Subject: [PATCH 01/34] Refactor basic chat web app and add image analysis app --- .gitignore | 6 + .../{chat-interface => basic-chat}/Dockerfile | 10 +- .../{chat-interface => basic-chat}/app.py | 166 +++++++++++++----- web-apps/basic-chat/defaults.yml | 31 ++++ .../gradio-client-test.py | 0 .../requirements.txt | 2 +- web-apps/build.sh | 6 +- web-apps/chat-interface/config.py | 97 ---------- web-apps/chat-interface/defaults.yml | 36 ---- web-apps/image-analysis/Dockerfile | 18 ++ web-apps/image-analysis/app.py | 121 +++++++++++++ web-apps/image-analysis/defaults.yml | 36 ++++ web-apps/image-analysis/requirements.txt | 6 + .../purge-google-fonts.sh | 0 web-apps/run.sh | 4 +- web-apps/utils/setup.py | 8 + web-apps/utils/utils.py | 93 ++++++++++ 17 files changed, 450 insertions(+), 190 deletions(-) rename web-apps/{chat-interface => basic-chat}/Dockerfile (61%) rename web-apps/{chat-interface => basic-chat}/app.py (54%) create mode 100644 web-apps/basic-chat/defaults.yml rename web-apps/{chat-interface => basic-chat}/gradio-client-test.py (100%) rename web-apps/{chat-interface => basic-chat}/requirements.txt (78%) delete mode 100644 web-apps/chat-interface/config.py delete mode 100644 web-apps/chat-interface/defaults.yml create mode 100644 web-apps/image-analysis/Dockerfile create mode 100644 web-apps/image-analysis/app.py create mode 100644 web-apps/image-analysis/defaults.yml create mode 100644 web-apps/image-analysis/requirements.txt rename web-apps/{chat-interface => }/purge-google-fonts.sh (100%) create mode 100644 web-apps/utils/setup.py create mode 100644 web-apps/utils/utils.py diff --git a/.gitignore b/.gitignore index 7d21b1b..9a57ecc 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,9 @@ test-values.y[a]ml # Helm chart stuff chart/Chart.lock chart/charts + +# Python stuff +**/build/ +**/*.egg-info/ +**/flagged/ +web-apps/**/overrides.yml diff --git a/web-apps/chat-interface/Dockerfile b/web-apps/basic-chat/Dockerfile similarity index 61% rename from web-apps/chat-interface/Dockerfile rename to web-apps/basic-chat/Dockerfile index 803d58f..7e9ed84 100644 --- a/web-apps/chat-interface/Dockerfile +++ b/web-apps/basic-chat/Dockerfile @@ -1,6 +1,9 @@ FROM python:3.11-slim -COPY requirements.txt requirements.txt +ARG DIR=chat-interface + +COPY $DIR/requirements.txt requirements.txt +COPY utils utils RUN pip install --no-cache-dir -r requirements.txt COPY purge-google-fonts.sh purge-google-fonts.sh @@ -8,9 +11,8 @@ RUN bash purge-google-fonts.sh WORKDIR /app -COPY *.py . +COPY $DIR/*.py . -COPY defaults.yml . -# COPY overrides.yml . +COPY $DIR/defaults.yml . ENTRYPOINT ["python3", "app.py"] diff --git a/web-apps/chat-interface/app.py b/web-apps/basic-chat/app.py similarity index 54% rename from web-apps/chat-interface/app.py rename to web-apps/basic-chat/app.py index 3ead467..dbfae8f 100644 --- a/web-apps/chat-interface/app.py +++ b/web-apps/basic-chat/app.py @@ -1,23 +1,77 @@ -import sys import logging +import openai + import gradio as gr -from urllib.parse import urljoin -from config import AppSettings +from urllib.parse import urljoin from langchain.schema import HumanMessage, AIMessage, SystemMessage from langchain_openai import ChatOpenAI -import openai +from typing import Dict, List +from pydantic import BaseModel, ConfigDict +from utils import LLMParams, load_settings logging.basicConfig() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -logger.info("Starting app") -settings = AppSettings.load() -if len(sys.argv) > 1: - settings.hf_model_name = sys.argv[1] -logger.info("App settings: %s", settings) +class AppSettings(BaseModel): + # Basic config + host_address: str + backend_url: str + model_name: str + model_instruction: str + page_title: str + llm_params: LLMParams + # Theme customisation + theme_params: Dict[str, str | list] + theme_params_extended: Dict[str, str] + css_overrides: str | None + custom_javascript: str | None + # Error on typos and suppress warnings for fields with 'model_' prefix + model_config = ConfigDict(protected_namespaces=(), extra="forbid") + + +# class AppSettings(BaseModel): +# hf_model_name: str = Field( +# description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend", +# ) +# backend_url: HttpUrl = Field( +# description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)" +# ) +# page_title: str = Field(default="Large Language Model") +# page_description: Optional[str] = Field(default=None) +# hf_model_instruction: str = Field( +# default="You are a helpful and cheerful AI assistant. Please respond appropriately." +# ) + +# # Model settings + +# # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html +# # which is based on https://platform.openai.com/docs/api-reference/completions/create +# llm_max_tokens: int = Field(default=500) +# llm_temperature: float = Field(default=0) +# llm_top_p: float = Field(default=1) +# llm_top_k: float = Field(default=-1) +# llm_presence_penalty: float = Field(default=0, ge=-2, le=2) +# llm_frequency_penalty: float = Field(default=0, ge=-2, le=2) + +# # UI theming + +# # Variables explicitly passed to gradio.theme.Default() +# # For example: +# # {"primary_hue": "red"} +# theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict) +# # Overrides for theme.body_background_fill property +# theme_background_colour: Optional[str] = Field(default=None) +# # Provides arbitrary CSS and JS overrides to the UI, +# # see https://www.gradio.app/guides/custom-CSS-and-JS +# css_overrides: Optional[str] = Field(default=None) +# custom_javascript: Optional[str] = Field(default=None) + + +settings = AppSettings(**load_settings()) +logger.info(settings) backend_url = str(settings.backend_url) backend_health_endpoint = urljoin(backend_url, "/health") @@ -36,29 +90,19 @@ class PossibleSystemPromptException(Exception): llm = ChatOpenAI( base_url=urljoin(backend_url, "v1"), - model=settings.hf_model_name, + model=settings.model_name, openai_api_key="required-but-not-used", - temperature=settings.llm_temperature, - max_tokens=settings.llm_max_tokens, - # model_kwargs={ - # "top_p": settings.llm_top_p, - # "frequency_penalty": settings.llm_frequency_penalty, - # "presence_penalty": settings.llm_presence_penalty, - # # Additional parameters supported by vLLM but not OpenAI API - # # https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters - # "extra_body": { - # "top_k": settings.llm_top_k, - # } - top_p=settings.llm_top_p, - frequency_penalty=settings.llm_frequency_penalty, - presence_penalty=settings.llm_presence_penalty, - # Additional parameters supported by vLLM but not OpenAI API - # https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#extra-parameters + temperature=settings.llm_params.temperature, + max_tokens=settings.llm_params.max_tokens, + top_p=settings.llm_params.top_p, + frequency_penalty=settings.llm_params.frequency_penalty, + presence_penalty=settings.llm_params.presence_penalty, extra_body={ - "top_k": settings.llm_top_k, + "top_k": settings.llm_params.top_k, }, streaming=True, ) +logger.info(llm) def inference(latest_message, history): @@ -67,13 +111,13 @@ def inference(latest_message, history): try: if INCLUDE_SYSTEM_PROMPT: - context = [SystemMessage(content=settings.hf_model_instruction)] + context = [SystemMessage(content=settings.model_instruction)] else: context = [] for i, (human, ai) in enumerate(history): if not INCLUDE_SYSTEM_PROMPT and i == 0: # Mimic system prompt by prepending it to first human message - human = f"{settings.hf_model_instruction}\n\n{human}" + human = f"{settings.model_instruction}\n\n{human}" context.append(HumanMessage(content=human)) context.append(AIMessage(content=(ai or ""))) context.append(HumanMessage(content=latest_message)) @@ -131,8 +175,8 @@ def inference(latest_message, history): # UI theming theme = gr.themes.Default(**settings.theme_params) -if settings.theme_background_colour: - theme.body_background_fill = settings.theme_background_colour +theme.set(**settings.theme_params_extended) +# theme.set(text) def inference_wrapper(*args): @@ -153,7 +197,7 @@ def inference_wrapper(*args): # Build main chat interface -with gr.ChatInterface( +app = gr.ChatInterface( inference_wrapper, chatbot=gr.Chatbot( # Height of conversation window in CSS units (string) or pixels (int) @@ -167,7 +211,6 @@ def inference_wrapper(*args): scale=7, ), title=settings.page_title, - description=settings.page_description, retry_btn="Retry", undo_btn="Undo", clear_btn="Clear", @@ -175,16 +218,47 @@ def inference_wrapper(*args): theme=theme, css=settings.css_overrides, js=settings.custom_javascript, -) as app: - logger.debug("Gradio chat interface config: %s", app.config) - # For running locally in tilt dev setup - if len(sys.argv) > 2 and sys.argv[2] == "localhost": - app.launch() - # For running on cluster - else: - app.queue( - # Allow 10 concurrent requests to backend - # vLLM backend should be clever enough to - # batch these requests appropriately. - default_concurrency_limit=10, - ).launch(server_name="0.0.0.0") +) +logger.debug("Gradio chat interface config: %s", app.config) +app.queue( + # Allow 10 concurrent requests to backend + # vLLM backend should be clever enough to + # batch these requests appropriately. + default_concurrency_limit=10, +).launch(server_name=settings.host_address) + +# with gr.ChatInterface( +# inference_wrapper, +# chatbot=gr.Chatbot( +# # Height of conversation window in CSS units (string) or pixels (int) +# height="68vh", +# show_copy_button=True, +# ), +# textbox=gr.Textbox( +# placeholder="Ask me anything...", +# container=False, +# # Ratio of text box to submit button width +# scale=7, +# ), +# title=settings.page_title, +# description=settings.page_description, +# retry_btn="Retry", +# undo_btn="Undo", +# clear_btn="Clear", +# analytics_enabled=False, +# theme=theme, +# css=settings.css_overrides, +# js=settings.custom_javascript, +# ) as app: +# logger.debug("Gradio chat interface config: %s", app.config) +# # For running locally in tilt dev setup +# if len(sys.argv) > 2 and sys.argv[2] == "localhost": +# app.launch() +# # For running on cluster +# else: +# app.queue( +# # Allow 10 concurrent requests to backend +# # vLLM backend should be clever enough to +# # batch these requests appropriately. +# default_concurrency_limit=10, +# ).launch(server_name=settings.host_address) diff --git a/web-apps/basic-chat/defaults.yml b/web-apps/basic-chat/defaults.yml new file mode 100644 index 0000000..83f0e46 --- /dev/null +++ b/web-apps/basic-chat/defaults.yml @@ -0,0 +1,31 @@ + +model_name: +model_instruction: "You are a helpful and cheerful AI assistant. Please respond appropriately." +backend_url: +host_address: 0.0.0.0 + +page_title: Large Language Model + +# LLM request parameters +# See https://platform.openai.com/docs/api-reference/chat/create +# and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters +llm_params: + max_tokens: + temperature: 0 + top_p: + top_k: + frequency_penalty: + presence_penalty: + +# Gradio theme constructor parameters (e.g. 'primary_hue') +# See https://www.gradio.app/guides/theming-guide +theme_params: {} + +# Gradio theme .set(...) parameters +# See https://www.gradio.app/guides/theming-guide#extending-themes-via-set +theme_params_extended: {} + +# Additional CSS and JS overrides +# See https://www.gradio.app/guides/custom-CSS-and-JS +css_overrides: +custom_javascript: diff --git a/web-apps/chat-interface/gradio-client-test.py b/web-apps/basic-chat/gradio-client-test.py similarity index 100% rename from web-apps/chat-interface/gradio-client-test.py rename to web-apps/basic-chat/gradio-client-test.py diff --git a/web-apps/chat-interface/requirements.txt b/web-apps/basic-chat/requirements.txt similarity index 78% rename from web-apps/chat-interface/requirements.txt rename to web-apps/basic-chat/requirements.txt index 3f34151..f37169c 100644 --- a/web-apps/chat-interface/requirements.txt +++ b/web-apps/basic-chat/requirements.txt @@ -4,4 +4,4 @@ openai langchain langchain_openai pydantic -pydantic_settings +-e ../utils diff --git a/web-apps/build.sh b/web-apps/build.sh index 5fe3c98..0dd5d4a 100755 --- a/web-apps/build.sh +++ b/web-apps/build.sh @@ -2,14 +2,12 @@ set -e build() { - pushd $1 > /dev/null - if [[ -f Dockerfile ]]; then + if [[ -f $1/Dockerfile ]]; then echo Building $1 docker image - docker build . -t ghcr.io/stackhpc/azimuth-llm-$1 + docker build . -t ghcr.io/stackhpc/azimuth-llm-$1 -f $1/Dockerfile else echo No Dockerfile found for $1 fi - popd > /dev/null } # If a single app is provided as a diff --git a/web-apps/chat-interface/config.py b/web-apps/chat-interface/config.py deleted file mode 100644 index 8592884..0000000 --- a/web-apps/chat-interface/config.py +++ /dev/null @@ -1,97 +0,0 @@ -import logging -import yaml -from pydantic import Field, HttpUrl -from pydantic_settings import BaseSettings, SettingsConfigDict - -from typing import Optional, Union, List - -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -NAMESPACE_FILE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" -def get_k8s_namespace(): - try: - current_k8s_namespace = open(NAMESPACE_FILE_PATH).read() - return current_k8s_namespace - except FileNotFoundError as err: - return None - -def default_backend(): - k8s_ns = get_k8s_namespace() - if k8s_ns: - return f"http://llm-backend.{k8s_ns}.svc" - else: - logger.warning('Failed to determine k8s namespace from %s - assuming non-kubernetes environment.', NAMESPACE_FILE_PATH) - - -class AppSettings(BaseSettings): - """ - Settings object for the UI example app. - """ - - # # Allow settings to be overwritten by LLM_UI_ env vars - # model_config = SettingsConfigDict(env_prefix="llm_ui_") - - # General settings - hf_model_name: str = Field( - description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend", - ) - backend_url: HttpUrl = Field( - description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)" - ) - page_title: str = Field(default="Large Language Model") - page_description: Optional[str] = Field(default=None) - hf_model_instruction: str = Field( - default="You are a helpful and cheerful AI assistant. Please respond appropriately." - ) - - # Model settings - - # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html - # which is based on https://platform.openai.com/docs/api-reference/completions/create - llm_max_tokens: int = Field(default=500) - llm_temperature: float = Field(default=0) - llm_top_p: float = Field(default=1) - llm_top_k: float = Field(default=-1) - llm_presence_penalty: float = Field(default=0, ge=-2, le=2) - llm_frequency_penalty: float = Field(default=0, ge=-2, le=2) - - # UI theming - - # Variables explicitly passed to gradio.theme.Default() - # For example: - # {"primary_hue": "red"} - theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict) - # Overrides for theme.body_background_fill property - theme_background_colour: Optional[str] = Field(default=None) - # Provides arbitrary CSS and JS overrides to the UI, - # see https://www.gradio.app/guides/custom-CSS-and-JS - css_overrides: Optional[str] = Field(default=None) - custom_javascript: Optional[str] = Field(default=None) - - - # Method for loading settings from files - @staticmethod - def _load_yaml(file_path: str): - with open(file_path, "r") as file: - content = yaml.safe_load(file) or {} - return content - - @staticmethod - def load(): - defaults = AppSettings._load_yaml('./defaults.yml') - overrides = {} - try: - overrides = AppSettings._load_yaml('/etc/web-app/overrides.yml') - except FileNotFoundError: - pass - settings = {**defaults, **overrides} - # Sanity checks on settings - if 'backend_url' not in settings: - in_cluster_backend = default_backend() - if not in_cluster_backend: - raise Exception('Backend URL must be provided in settings when running this app outside of Kubernetes') - settings['backend_url'] = in_cluster_backend - return AppSettings(**settings) diff --git a/web-apps/chat-interface/defaults.yml b/web-apps/chat-interface/defaults.yml deleted file mode 100644 index 9520b39..0000000 --- a/web-apps/chat-interface/defaults.yml +++ /dev/null @@ -1,36 +0,0 @@ - -hf_model_name: "microsoft/Phi-3.5-mini-instruct" -hf_model_instruction: "You are a pirate" - -# UI theming tweaks -# css_overrides: | -# h1 { -# color: white; -# padding-top: 1em; -# } -# a { -# color: yellow; -# } -# theme_background_colour: "#00376c" -# theme_params: -# # primary_hue: blue -# font: -# - sans-serif -# font_mono: -# - sans-serif - -# custom_javascript: | -# function addPrivacyStatement() { -# var footer = document.querySelector('footer'); -# footer.appendChild(footer.children[1].cloneNode(deep=true)); -# var item = footer.children[2].cloneNode(); -# item.href = 'https://google.com'; -# item.textContent = 'Privacy Statement'; -# footer.appendChild(item); -# } - -# llm_max_tokens: -# llm_temperature: -# llm_top_p: -# llm_frequency_penalty: -# llm_presence_penalty: diff --git a/web-apps/image-analysis/Dockerfile b/web-apps/image-analysis/Dockerfile new file mode 100644 index 0000000..acc8559 --- /dev/null +++ b/web-apps/image-analysis/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.11-slim + +ARG DIR=image-interface + +COPY $DIR/requirements.txt requirements.txt +COPY utils utils +RUN pip install --no-cache-dir -r requirements.txt + +COPY purge-google-fonts.sh purge-google-fonts.sh +RUN bash purge-google-fonts.sh + +WORKDIR /app + +COPY $DIR/*.py . + +COPY $DIR/defaults.yml . + +ENTRYPOINT ["python3", "app.py"] diff --git a/web-apps/image-analysis/app.py b/web-apps/image-analysis/app.py new file mode 100644 index 0000000..77cda84 --- /dev/null +++ b/web-apps/image-analysis/app.py @@ -0,0 +1,121 @@ +import base64 +import logging +import requests + +import gradio as gr + +from typing import List, Dict +from io import BytesIO +from PIL import Image +from pydantic import BaseModel, ConfigDict +from urllib.parse import urljoin + +from utils import load_settings, LLMParams + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class PromptExample(BaseModel): + image_url: str + prompt: str + + +class AppSettings(BaseModel): + # Basic config + host_address: str + backend_url: str + model_name: str + page_title: str + page_description: str + examples: List[PromptExample] + llm_params: LLMParams + # Theme customisation + theme_params: Dict[str, str | list] + theme_params_extended: Dict[str, str] + css_overrides: str | None + custom_javascript: str | None + # Error on typos and suppress warnings for fields with 'model_' prefix + model_config = ConfigDict(protected_namespaces=(), extra="forbid") + + +settings = AppSettings(**load_settings()) +logger.info(settings) + + +# TODO: Rewrite this to stream output? +def analyze_image(image_url, prompt): + try: + # Download the image + response = requests.get(image_url) + response.raise_for_status() + image = Image.open(BytesIO(response.content)) + + # Convert image to base64 + buffered = BytesIO() + image.save(buffered, format="PNG") + img_str = base64.b64encode(buffered.getvalue()).decode() + + # Prepare the payload for the vision model + payload = { + "model": settings.model_name, + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{img_str}"}, + }, + ], + } + ], + **{k: v for k, v in settings.llm_params if k != "top_k" and v is not None}, + } + if settings.llm_params.top_k: + payload["extra_body"] = { + "top_k": settings.llm_params.top_k, + } + + # Make the API call to the vision model + headers = {"Content-Type": "application/json"} + response = requests.post( + urljoin(settings.backend_url, "/v1/chat/completions"), + json=payload, + headers=headers, + ) + response.raise_for_status() + + # Extract and return the model's response + result = response.json() + return result["choices"][0]["message"]["content"] + + except Exception as e: + return f"An error occurred: {str(e)}" + + +# UI theming +theme = gr.themes.Default(**settings.theme_params) +theme.set(**settings.theme_params_extended) + +# Set up the Gradio interface +app = gr.Interface( + fn=analyze_image, + inputs=[ + gr.Textbox(label="Image URL"), + gr.Textbox(label="Prompt/Question", elem_id="prompt", scale=2), + ], + outputs=gr.Textbox(label="Results"), + title=settings.page_title, + description=settings.page_description, + examples=[[ex.image_url, ex.prompt] for ex in settings.examples], + theme=theme, + css=settings.css_overrides, + js=settings.custom_javascript, + analytics_enabled=False, +) + +# Launch the interface +app.queue(default_concurrency_limit=10).launch(server_name=settings.host_address) diff --git a/web-apps/image-analysis/defaults.yml b/web-apps/image-analysis/defaults.yml new file mode 100644 index 0000000..e6f2791 --- /dev/null +++ b/web-apps/image-analysis/defaults.yml @@ -0,0 +1,36 @@ + +model_name: +backend_url: +host_address: 0.0.0.0 + +page_title: Image analysis with a vision model +page_description: This model can be used to analyse image files. + +# Example inputs to render in the UI +examples: + - image_url: https://www.myplace.de/sites/default/files/styles/blog_hero_bild_slideshow/public/blog/Platzprofessor-MyPlace-SelfStorage-Die-Stadt-als-Raum-der-Begegnung-H.jpg?itok=ibY2Hoy9 + prompt: Conduct a detailed image analysis and describe all parts of the image that you can identify. Count all occurrences of the entities, which you can identify. Make a guess about the provenance or location of the image. + +# LLM request parameters +# See https://platform.openai.com/docs/api-reference/chat/create +# and https://docs.vllm.ai/en/v0.6.0/serving/openai_compatible_server.html#extra-parameters +llm_params: + max_tokens: + temperature: + top_p: + top_k: + frequency_penalty: + presence_penalty: + +# Gradio theme constructor parameters (e.g. 'primary_hue') +# See https://www.gradio.app/guides/theming-guide +theme_params: {} + +# Gradio theme .set(...) parameters +# See https://www.gradio.app/guides/theming-guide#extending-themes-via-set +theme_params_extended: {} + +# Additional CSS and JS overrides +# See https://www.gradio.app/guides/custom-CSS-and-JS +css_overrides: +custom_javascript: diff --git a/web-apps/image-analysis/requirements.txt b/web-apps/image-analysis/requirements.txt new file mode 100644 index 0000000..9196eda --- /dev/null +++ b/web-apps/image-analysis/requirements.txt @@ -0,0 +1,6 @@ +pillow +requests +gradio<5 +gradio_client +-e ../utils +pydantic diff --git a/web-apps/chat-interface/purge-google-fonts.sh b/web-apps/purge-google-fonts.sh similarity index 100% rename from web-apps/chat-interface/purge-google-fonts.sh rename to web-apps/purge-google-fonts.sh diff --git a/web-apps/run.sh b/web-apps/run.sh index 5baa0c6..e6e20b5 100755 --- a/web-apps/run.sh +++ b/web-apps/run.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -IMAGE_TAG=azimuth-llm-$1 +IMAGE_TAG=ghcr.io/stackhpc/azimuth-llm-$1 error() { echo $1 @@ -18,4 +18,4 @@ else echo "Found local $IMAGE_TAG docker image" fi -docker run -p 7860:7860 $IMAGE_TAG +docker run --rm -p 7860:7860 $IMAGE_TAG diff --git a/web-apps/utils/setup.py b/web-apps/utils/setup.py new file mode 100644 index 0000000..515d709 --- /dev/null +++ b/web-apps/utils/setup.py @@ -0,0 +1,8 @@ +from setuptools import setup, find_packages + +setup( + name='web-app-utils', + version='0.0.1', + py_modules=["utils"], + requires=["pydantic"] +) diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py new file mode 100644 index 0000000..252ccf4 --- /dev/null +++ b/web-apps/utils/utils.py @@ -0,0 +1,93 @@ +##### +# Shared utility functions and models for re-use by multiple web apps +##### + +import logging +import pathlib +import yaml +from typing import Annotated +from pydantic import BaseModel, ConfigDict, PositiveInt, Field + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class LLMParams(BaseModel): + """ + Parameters for vLLM API requests. For details see + https://platform.openai.com/docs/api-reference/chat/create + https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters + """ + + max_tokens: PositiveInt | None + temperature: Annotated[float, Field(ge=0, le=2)] | None + top_p: Annotated[float, Field(gt=0, le=1)] | None + top_k: Annotated[int, Field(ge=-1)] | None + frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None + presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None + # Make sure we can't smuggle in extra request params / typos + model_config = ConfigDict(extra="forbid") + + +NAMESPACE_FILE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" + + +def get_k8s_namespace(): + try: + current_k8s_namespace = open(NAMESPACE_FILE_PATH).read() + return current_k8s_namespace + except FileNotFoundError as err: + return None + + +def api_address_in_cluster(): + k8s_ns = get_k8s_namespace() + if k8s_ns: + return f"http://llm-backend.{k8s_ns}.svc" + else: + logger.warning( + "Failed to determine k8s namespace from %s - assuming non-kubernetes environment.", + NAMESPACE_FILE_PATH, + ) + + +# Method for loading settings from files +def load_yaml(file_path: str) -> dict: + with open(file_path, "r") as file: + content = yaml.safe_load(file) or {} + return content + + +def load_settings() -> dict: + + defaults = load_yaml("./defaults.yml") + overrides = {} + # Path must match the one used in the Helm chart's + # app-config-map.yml template + path = pathlib.Path("/etc/web-app/overrides.yml") + if path.exists(): + overrides = load_yaml(path) + else: + # Allow local overrides for dev/testing + path = pathlib.Path("./overrides.yml") + if path.exists(): + overrides = load_yaml(path) + + # Sanity checks on settings + unused_overrides = [k for k in overrides.keys() if k not in defaults.keys()] + if unused_overrides: + logger.warning( + f"Overrides {unused_overrides} not part of default settings so may be ignored." + "Please check for typos" + ) + settings = {**defaults, **overrides} + if "backend_url" not in settings or not settings["backend_url"]: + # Try to detect in-cluster address + in_cluster_backend = api_address_in_cluster() + if not in_cluster_backend: + raise Exception( + "Backend URL must be provided in settings when running outside of Kubernetes." + ) + settings["backend_url"] = in_cluster_backend + return settings From 7ecfb9250b25b8e9b8a1620738e02065bed12a13 Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 17:55:57 +0000 Subject: [PATCH 02/34] Update image build matrix --- .github/workflows/build-push-artifacts.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 10d5c1e..c1e2e74 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -39,7 +39,8 @@ jobs: strategy: matrix: include: - - component: chat-interface + - component: basic-chat + - component: image-analysis steps: - name: Check out the repository uses: actions/checkout@v4 From 1596c08b82faca87af58ecfcfe9826f72021ff7d Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 17:57:17 +0000 Subject: [PATCH 03/34] Disable change check for testing --- .github/workflows/build-push-artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index c1e2e74..d0cff1d 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -35,7 +35,7 @@ jobs: name: Build and push images runs-on: ubuntu-latest needs: changes - if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} strategy: matrix: include: From 6ff5623b65df1f7df7e2cdef174169b7a91d43e6 Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 18:00:53 +0000 Subject: [PATCH 04/34] Rename web apps and dockerfile targets --- web-apps/image-analysis/Dockerfile | 2 +- web-apps/{basic-chat => text-chat}/Dockerfile | 2 +- web-apps/{basic-chat => text-chat}/app.py | 0 web-apps/{basic-chat => text-chat}/defaults.yml | 0 web-apps/{basic-chat => text-chat}/gradio-client-test.py | 0 web-apps/{basic-chat => text-chat}/requirements.txt | 0 6 files changed, 2 insertions(+), 2 deletions(-) rename web-apps/{basic-chat => text-chat}/Dockerfile (93%) rename web-apps/{basic-chat => text-chat}/app.py (100%) rename web-apps/{basic-chat => text-chat}/defaults.yml (100%) rename web-apps/{basic-chat => text-chat}/gradio-client-test.py (100%) rename web-apps/{basic-chat => text-chat}/requirements.txt (100%) diff --git a/web-apps/image-analysis/Dockerfile b/web-apps/image-analysis/Dockerfile index acc8559..5f858f8 100644 --- a/web-apps/image-analysis/Dockerfile +++ b/web-apps/image-analysis/Dockerfile @@ -1,6 +1,6 @@ FROM python:3.11-slim -ARG DIR=image-interface +ARG DIR=image-analysis COPY $DIR/requirements.txt requirements.txt COPY utils utils diff --git a/web-apps/basic-chat/Dockerfile b/web-apps/text-chat/Dockerfile similarity index 93% rename from web-apps/basic-chat/Dockerfile rename to web-apps/text-chat/Dockerfile index 7e9ed84..294fecd 100644 --- a/web-apps/basic-chat/Dockerfile +++ b/web-apps/text-chat/Dockerfile @@ -1,6 +1,6 @@ FROM python:3.11-slim -ARG DIR=chat-interface +ARG DIR=text-chat COPY $DIR/requirements.txt requirements.txt COPY utils utils diff --git a/web-apps/basic-chat/app.py b/web-apps/text-chat/app.py similarity index 100% rename from web-apps/basic-chat/app.py rename to web-apps/text-chat/app.py diff --git a/web-apps/basic-chat/defaults.yml b/web-apps/text-chat/defaults.yml similarity index 100% rename from web-apps/basic-chat/defaults.yml rename to web-apps/text-chat/defaults.yml diff --git a/web-apps/basic-chat/gradio-client-test.py b/web-apps/text-chat/gradio-client-test.py similarity index 100% rename from web-apps/basic-chat/gradio-client-test.py rename to web-apps/text-chat/gradio-client-test.py diff --git a/web-apps/basic-chat/requirements.txt b/web-apps/text-chat/requirements.txt similarity index 100% rename from web-apps/basic-chat/requirements.txt rename to web-apps/text-chat/requirements.txt From 87075618dc9e692e7feafbdeeb20546f1b141176 Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 18:01:06 +0000 Subject: [PATCH 05/34] Revert "Disable change check for testing" This reverts commit 1596c08b82faca87af58ecfcfe9826f72021ff7d. --- .github/workflows/build-push-artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index d0cff1d..c1e2e74 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -35,7 +35,7 @@ jobs: name: Build and push images runs-on: ubuntu-latest needs: changes - # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} strategy: matrix: include: From 96ac9090e864a950889a749fb0e2f2b615c750ab Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 18:06:20 +0000 Subject: [PATCH 06/34] Update image build workflow paths --- .github/workflows/build-push-artifacts.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index c1e2e74..67c17ef 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -35,11 +35,11 @@ jobs: name: Build and push images runs-on: ubuntu-latest needs: changes - if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} strategy: matrix: include: - - component: basic-chat + - component: text-chat - component: image-analysis steps: - name: Check out the repository @@ -56,7 +56,7 @@ jobs: id: image-meta uses: docker/metadata-action@v5 with: - images: ghcr.io/stackhpc/azimuth-llm-${{ matrix.component }} + images: ghcr.io/stackhpc/azimuth-llm-ui-${{ matrix.component }} # Produce the branch name or tag and the SHA as tags tags: | type=ref,event=branch @@ -67,7 +67,8 @@ jobs: uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action with: cache-key: ${{ matrix.component }} - context: ./web-apps/${{ matrix.component }} + context: ./web-apps/ + file: ./web-apps/${{ matrix.component }}/Dockerfile platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.image-meta.outputs.tags }} From afc646928d37703453a22b76e977c87291852ae0 Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 18:19:01 +0000 Subject: [PATCH 07/34] Update appSettings and related comments --- chart/values.yaml | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/chart/values.yaml b/chart/values.yaml index 12db8b3..936cfbf 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -80,13 +80,21 @@ api: ui: # Toggles installation of the gradio web UI enabled: true - # The file from the UI config map to execute as the entrypoint to the frontend app - entrypoint: app.py - # The values to be written to settings.yml for parsing as frontend app setting - # (see example_app.py and config.py for example using pydantic-settings to configure app) + # Container image config + image: + repository: ghcr.io/stackhpc/azimuth-llm-chat-interface + version: 87a0342 + imagePullPolicy: + # The settings to be passed to the frontend web app. + # Format depends on the chosen UI image above. For each of the UIs + # included in the web-apps/ folder of this git repository there is a + # defaults.yml file (e.g. web-apps/text-chat/defaults.yml) listing all + # available configuration options. + # FIXME: Figure out how to make JSON schema fit + # with different config options for each web app. appSettings: - hf_model_name: *model-name - hf_model_instruction: "You are a helpful AI assistant. Please response appropriately." + model_name: *model-name + model_instruction: "You are a helpful AI assistant. Please response appropriately." # Use local system fonts by default to avoid GDPR issues # with Gradio's defaults fonts which require fetching from # the Google fonts API. To restore default Gradio theme @@ -98,11 +106,6 @@ ui: font_mono: - sans-serif - Arial - # Container image config - image: - repository: ghcr.io/stackhpc/azimuth-llm-chat-interface - version: 87a0342 - imagePullPolicy: # Service config service: name: web-app From 58c4dcb2a3926745c483756b31501ef5c6c4786a Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 21:08:02 +0000 Subject: [PATCH 08/34] Rename published docker images --- .github/workflows/build-push-artifacts.yml | 6 +++--- web-apps/{text-chat => chat}/Dockerfile | 2 +- web-apps/{text-chat => chat}/app.py | 0 web-apps/{text-chat => chat}/defaults.yml | 0 web-apps/{text-chat => chat}/gradio-client-test.py | 0 web-apps/{text-chat => chat}/requirements.txt | 2 +- web-apps/image-analysis/requirements.txt | 2 +- 7 files changed, 6 insertions(+), 6 deletions(-) rename web-apps/{text-chat => chat}/Dockerfile (94%) rename web-apps/{text-chat => chat}/app.py (100%) rename web-apps/{text-chat => chat}/defaults.yml (100%) rename web-apps/{text-chat => chat}/gradio-client-test.py (100%) rename web-apps/{text-chat => chat}/requirements.txt (84%) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 67c17ef..ad8a592 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -35,11 +35,11 @@ jobs: name: Build and push images runs-on: ubuntu-latest needs: changes - # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} strategy: matrix: include: - - component: text-chat + - component: chat - component: image-analysis steps: - name: Check out the repository @@ -56,7 +56,7 @@ jobs: id: image-meta uses: docker/metadata-action@v5 with: - images: ghcr.io/stackhpc/azimuth-llm-ui-${{ matrix.component }} + images: ghcr.io/stackhpc/azimuth-llm-${{ matrix.component }}-ui # Produce the branch name or tag and the SHA as tags tags: | type=ref,event=branch diff --git a/web-apps/text-chat/Dockerfile b/web-apps/chat/Dockerfile similarity index 94% rename from web-apps/text-chat/Dockerfile rename to web-apps/chat/Dockerfile index 294fecd..c963b29 100644 --- a/web-apps/text-chat/Dockerfile +++ b/web-apps/chat/Dockerfile @@ -1,6 +1,6 @@ FROM python:3.11-slim -ARG DIR=text-chat +ARG DIR=chat COPY $DIR/requirements.txt requirements.txt COPY utils utils diff --git a/web-apps/text-chat/app.py b/web-apps/chat/app.py similarity index 100% rename from web-apps/text-chat/app.py rename to web-apps/chat/app.py diff --git a/web-apps/text-chat/defaults.yml b/web-apps/chat/defaults.yml similarity index 100% rename from web-apps/text-chat/defaults.yml rename to web-apps/chat/defaults.yml diff --git a/web-apps/text-chat/gradio-client-test.py b/web-apps/chat/gradio-client-test.py similarity index 100% rename from web-apps/text-chat/gradio-client-test.py rename to web-apps/chat/gradio-client-test.py diff --git a/web-apps/text-chat/requirements.txt b/web-apps/chat/requirements.txt similarity index 84% rename from web-apps/text-chat/requirements.txt rename to web-apps/chat/requirements.txt index f37169c..a82255b 100644 --- a/web-apps/text-chat/requirements.txt +++ b/web-apps/chat/requirements.txt @@ -4,4 +4,4 @@ openai langchain langchain_openai pydantic --e ../utils +../utils diff --git a/web-apps/image-analysis/requirements.txt b/web-apps/image-analysis/requirements.txt index 9196eda..006c6a9 100644 --- a/web-apps/image-analysis/requirements.txt +++ b/web-apps/image-analysis/requirements.txt @@ -2,5 +2,5 @@ pillow requests gradio<5 gradio_client --e ../utils pydantic +../utils From a8c5a77274638d0eba0d7253445a489c6d73fda9 Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 21:39:59 +0000 Subject: [PATCH 09/34] Add chart test for image-analysis UI --- chart/ci/image-analysis-ui-values.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 chart/ci/image-analysis-ui-values.yaml diff --git a/chart/ci/image-analysis-ui-values.yaml b/chart/ci/image-analysis-ui-values.yaml new file mode 100644 index 0000000..43e4eed --- /dev/null +++ b/chart/ci/image-analysis-ui-values.yaml @@ -0,0 +1,8 @@ +api: + enabled: false +ui: + image: + repository: ghcr.io/stackhpc/azimuth-llm-image-analysis-ui + service: + zenith: + enabled: false From b462043f0a4267bb03f43d0ee5fd07bf9ae3d5b8 Mon Sep 17 00:00:00 2001 From: sd109 Date: Wed, 30 Oct 2024 21:40:18 +0000 Subject: [PATCH 10/34] Fixup failing chart tests --- .../ci/{web-apps-only-values.yaml => chat-ui-values.yaml} | 0 chart/values.schema.json | 7 +++---- chart/values.yaml | 5 ++--- 3 files changed, 5 insertions(+), 7 deletions(-) rename chart/ci/{web-apps-only-values.yaml => chat-ui-values.yaml} (100%) diff --git a/chart/ci/web-apps-only-values.yaml b/chart/ci/chat-ui-values.yaml similarity index 100% rename from chart/ci/web-apps-only-values.yaml rename to chart/ci/chat-ui-values.yaml diff --git a/chart/values.schema.json b/chart/values.schema.json index 8d20cf7..f6fb6a9 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -30,12 +30,12 @@ "appSettings": { "type": "object", "properties": { - "hf_model_name": { + "model_name": { "type": "string", "title": "Model Name", "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above." }, - "hf_model_instruction": { + "model_instruction": { "type": "string", "title": "Instruction", "description": "The initial system prompt (i.e. the hidden instruction) to use when generating responses.", @@ -94,8 +94,7 @@ } }, "required": [ - "hf_model_name", - "hf_model_instruction" + "model_name" ] } } diff --git a/chart/values.yaml b/chart/values.yaml index 936cfbf..ed66c5b 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -82,8 +82,8 @@ ui: enabled: true # Container image config image: - repository: ghcr.io/stackhpc/azimuth-llm-chat-interface - version: 87a0342 + repository: ghcr.io/stackhpc/azimuth-llm-chat-ui + version: 58c4dcb imagePullPolicy: # The settings to be passed to the frontend web app. # Format depends on the chosen UI image above. For each of the UIs @@ -94,7 +94,6 @@ ui: # with different config options for each web app. appSettings: model_name: *model-name - model_instruction: "You are a helpful AI assistant. Please response appropriately." # Use local system fonts by default to avoid GDPR issues # with Gradio's defaults fonts which require fetching from # the Google fonts API. To restore default Gradio theme From 9cd569439d536fe643ddada64c7e03c71344bca0 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 00:15:07 +0000 Subject: [PATCH 11/34] Update docs --- README.md | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 58b0812..98068a8 100644 --- a/README.md +++ b/README.md @@ -34,38 +34,36 @@ ui: enabled: false ``` -***Warning*** - Exposing the services in this way provides no authentication mechanism and anyone with access to the load balancer IPs will be able to query the language model. It is up to you to secure the running service in your own way. In contrast, when deploying via Azimuth, authentication is provided via the standard Azimuth Identity Provider mechanisms and the authenticated services are exposed via [Zenith](https://github.com/stackhpc/zenith). +[!WARNING] Exposing the services in this way provides no authentication mechanism and anyone with access to the load balancer IPs will be able to query the language model. It is up to you to secure the running service as appropriate for your use case. In contrast, when deployed via Azimuth, authentication is provided via the standard Azimuth Identity Provider mechanisms and the authenticated services are exposed via [Zenith](https://github.com/stackhpc/zenith). -The UI can also optionally be exposed using a Kubernetes Ingress resource. See the `ui.ingress` section in `values.yml` for available config options. +The both the web-based interface and the backend OpenAI-compatible vLLM API server can also optionally be exposed using [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/). See the `ingress` section in `values.yml` for available config options. ## Tested Models -The following is a non-exhaustive list of models which have been tested with this app: -- [Llama 2 7B chat](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) -- [AWQ Quantized Llama 2 70B](https://huggingface.co/TheBloke/Llama-2-70B-Chat-AWQ) -- [Magicoder 6.7B](https://huggingface.co/ise-uiuc/Magicoder-S-DS-6.7B) -- [Mistral 7B Instruct v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) -- [WizardCoder Python 34B](https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0) -- [AWQ Quantized Mixtral 8x7B Instruct v0.1](https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-AWQ) +The application uses [vLLM](https://docs.vllm.ai/en/latest/index.html) for model serving, therefore any of the vLLM [supported models](https://docs.vllm.ai/en/latest/models/supported_models.html) should work. Since vLLM pulls the model files directly from [HuggingFace](https://huggingface.co/models) it is likely that some other models will also be compatible with vLLM but mileage may vary between models and model architectures. If a model is incompatible with vLLM then the API pod will likely enter a `CrashLoopBackoff` state and any relevant error information will be found in the API pod logs. These logs can be viewed with -Due to the combination of [components](##Components) used in this app, some HuggingFace models may not work as expected (usually due to the way in which LangChain formats the prompt messages). Any errors when using a new model will appear in the logs for either the web-app pod or the backend API pod. Please open an issue if you would like explicit support for a specific model that is not in the above list. +``` +kubectl (-n ) logs deploy/-api +``` + +If you suspect that a given error is not caused by the upstream vLLM support and a problem with this Helm chart then please [open an issue](https://github.com/stackhpc/azimuth-llm/issues). ## Monitoring -The LLM chart integrates with [kube-prometheus-stack](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack) by creating a `ServiceMonitor` resource and installing a custom Grafana dashboard as a Kubernetes `ConfigMap`. If the target cluster has an existing `kube-prometheus-stack` deployment which is appropriately configured to watch all namespaces for new Grafana dashboards, the custom LLM dashboard provided here will automatically picked up by Grafana. It will appear in the Grafana dashboard list with the name 'LLM dashboard'. +The LLM chart integrates with [kube-prometheus-stack](https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack) by creating a `ServiceMonitor` resource and installing two custom Grafana dashboard as Kubernetes `ConfigMap`s. If the target cluster has an existing `kube-prometheus-stack` deployment which is appropriately configured to watch all namespaces for new Grafana dashboards, the LLM dashboards will automatically appear in Grafana's dashboard list. To disable the monitoring integrations, set the `api.monitoring.enabled` value to `false`. ## Components The Helm chart consists of the following components: -- A backend web API which runs [vLLM](https://github.com/vllm-project/vllm)'s [OpenAI compatible web server](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server). +- A backend web API which runs [vLLM](https://github.com/vllm-project/vllm)'s [OpenAI compatible web server](https://docs.vllm.ai/en/stable/getting_started/quickstart.html#openai-compatible-server). -- A frontend web-app built using [Gradio](https://www.gradio.app) and [LangChain](https://www.langchain.com). The web app source code can be found in `chart/web-app` and gets written to a ConfigMap during the chart build and is then mounted into the UI pod and executed as the entry point for the UI docker image (built from `images/ui-base/Dockerfile`). +- A choice of frontend web-apps built using [Gradio](https://www.gradio.app) (see [web-apps](./web-apps/)). Each web interface is available as a pre-built container image [hosted on ghcr.io](https://github.com/orgs/stackhpc/packages?repo_name=azimuth-llm) and be configured for each Helm release by changing the `ui.image` section of the chart values. -- A [stakater/Reloader](https://github.com/stakater/Reloader) instance which monitors the web-app ConfigMap for changes and restarts the frontend when the app code changes (i.e. whenever the Helm values are updated). + + + From c85bffdda3acbc7cb8f973e2a32f250914e09557 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 12:35:09 +0000 Subject: [PATCH 12/34] Refactor Helm charts to allow a different chart schema per web app --- .github/workflows/build-push-artifacts.yml | 3 +- .gitignore | 4 +- chart/ci/image-analysis-ui-values.yaml | 8 -- chart/values.schema.json | 123 ----------------- charts/azimuth-chat/Chart.yaml | 22 +++ charts/azimuth-chat/azimuth-ui.schema.yaml | 34 +++++ charts/azimuth-chat/ci/chat-ui-values.yaml | 7 + charts/azimuth-chat/values.schema.json | 128 ++++++++++++++++++ charts/azimuth-chat/values.yaml | 9 ++ charts/azimuth-image-analysis/Chart.yaml | 22 +++ .../azimuth-ui.schema.yaml | 26 ++++ .../ci/image-analysis-ui-values.yaml | 7 + .../azimuth-image-analysis/values.schema.json | 111 +++++++++++++++ charts/azimuth-image-analysis/values.yaml | 8 ++ {chart => charts/azimuth-llm}/.helmignore | 0 {chart => charts/azimuth-llm}/Chart.yaml | 12 +- .../azimuth-llm}/azimuth-ui.schema.yaml | 1 - .../azimuth-llm/ci/no-api-values.yaml | 0 .../azimuth-llm}/templates/NOTES.txt | 0 .../azimuth-llm}/templates/_helpers.tpl | 0 .../config-map-grafana-dashboard-details.yml | 0 .../config-map-grafana-dashboard-summary.yml | 20 +-- .../azimuth-llm}/templates/api/deployment.yml | 0 .../azimuth-llm}/templates/api/ingress.yml | 0 .../templates/api/service-monitor.yml | 0 .../azimuth-llm}/templates/api/service.yml | 0 .../templates/api/zenith-client.yml | 0 .../templates/api/zenith-reservation.yml | 0 .../templates/test/end-to-end.yml | 0 .../azimuth-llm}/templates/test/web-app.yml | 0 .../templates/ui/app-config-map.yml | 0 .../azimuth-llm}/templates/ui/deployment.yml | 4 + .../azimuth-llm}/templates/ui/ingress.yml | 0 .../azimuth-llm}/templates/ui/service.yml | 0 .../templates/ui/ui-zenith-client.yml | 0 .../templates/ui/ui-zenith-reservation.yml | 0 {chart => charts/azimuth-llm}/values.yaml | 7 +- ct.yaml | 8 +- 38 files changed, 410 insertions(+), 154 deletions(-) delete mode 100644 chart/ci/image-analysis-ui-values.yaml delete mode 100644 chart/values.schema.json create mode 100644 charts/azimuth-chat/Chart.yaml create mode 100644 charts/azimuth-chat/azimuth-ui.schema.yaml create mode 100644 charts/azimuth-chat/ci/chat-ui-values.yaml create mode 100644 charts/azimuth-chat/values.schema.json create mode 100644 charts/azimuth-chat/values.yaml create mode 100644 charts/azimuth-image-analysis/Chart.yaml create mode 100644 charts/azimuth-image-analysis/azimuth-ui.schema.yaml create mode 100644 charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml create mode 100644 charts/azimuth-image-analysis/values.schema.json create mode 100644 charts/azimuth-image-analysis/values.yaml rename {chart => charts/azimuth-llm}/.helmignore (100%) rename {chart => charts/azimuth-llm}/Chart.yaml (89%) rename {chart => charts/azimuth-llm}/azimuth-ui.schema.yaml (98%) rename chart/ci/chat-ui-values.yaml => charts/azimuth-llm/ci/no-api-values.yaml (100%) rename {chart => charts/azimuth-llm}/templates/NOTES.txt (100%) rename {chart => charts/azimuth-llm}/templates/_helpers.tpl (100%) rename {chart => charts/azimuth-llm}/templates/api/config-map-grafana-dashboard-details.yml (100%) rename {chart => charts/azimuth-llm}/templates/api/config-map-grafana-dashboard-summary.yml (98%) rename {chart => charts/azimuth-llm}/templates/api/deployment.yml (100%) rename {chart => charts/azimuth-llm}/templates/api/ingress.yml (100%) rename {chart => charts/azimuth-llm}/templates/api/service-monitor.yml (100%) rename {chart => charts/azimuth-llm}/templates/api/service.yml (100%) rename {chart => charts/azimuth-llm}/templates/api/zenith-client.yml (100%) rename {chart => charts/azimuth-llm}/templates/api/zenith-reservation.yml (100%) rename {chart => charts/azimuth-llm}/templates/test/end-to-end.yml (100%) rename {chart => charts/azimuth-llm}/templates/test/web-app.yml (100%) rename {chart => charts/azimuth-llm}/templates/ui/app-config-map.yml (100%) rename {chart => charts/azimuth-llm}/templates/ui/deployment.yml (83%) rename {chart => charts/azimuth-llm}/templates/ui/ingress.yml (100%) rename {chart => charts/azimuth-llm}/templates/ui/service.yml (100%) rename {chart => charts/azimuth-llm}/templates/ui/ui-zenith-client.yml (100%) rename {chart => charts/azimuth-llm}/templates/ui/ui-zenith-reservation.yml (100%) rename {chart => charts/azimuth-llm}/values.yaml (97%) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index ad8a592..fa2cca4 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -28,7 +28,7 @@ jobs: images: - 'web-apps/**' chart: - - 'chart/**' + - 'charts/**' # Job to build container images build_push_images: @@ -96,6 +96,7 @@ jobs: - name: Publish Helm charts uses: azimuth-cloud/github-actions/helm-publish@master with: + directory: charts token: ${{ secrets.GITHUB_TOKEN }} version: ${{ steps.semver.outputs.version }} app-version: ${{ steps.semver.outputs.short-sha }} diff --git a/.gitignore b/.gitignore index 9a57ecc..b6862d1 100644 --- a/.gitignore +++ b/.gitignore @@ -11,8 +11,8 @@ test-values.y[a]ml **venv*/ # Helm chart stuff -chart/Chart.lock -chart/charts +charts/*/Chart.lock +charts/*/charts # Python stuff **/build/ diff --git a/chart/ci/image-analysis-ui-values.yaml b/chart/ci/image-analysis-ui-values.yaml deleted file mode 100644 index 43e4eed..0000000 --- a/chart/ci/image-analysis-ui-values.yaml +++ /dev/null @@ -1,8 +0,0 @@ -api: - enabled: false -ui: - image: - repository: ghcr.io/stackhpc/azimuth-llm-image-analysis-ui - service: - zenith: - enabled: false diff --git a/chart/values.schema.json b/chart/values.schema.json deleted file mode 100644 index f6fb6a9..0000000 --- a/chart/values.schema.json +++ /dev/null @@ -1,123 +0,0 @@ -{ - "$schema": "http://json-schema.org/schema#", - "type": "object", - "properties": { - "huggingface": { - "type": "object", - "properties": { - "model": { - "type": "string", - "title": "Model", - "description": "The [HuggingFace model](https://huggingface.co/models) to deploy (see [here](https://github.com/stackhpc/azimuth-llm?tab=readme-ov-file#tested-models) for a list of tested models).", - "default": "microsoft/Phi-3.5-mini-instruct" - }, - "token": { - "type": [ - "string", - "null" - ], - "title": "Access Token", - "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)." - } - }, - "required": [ - "model" - ] - }, - "ui": { - "type": "object", - "properties": { - "appSettings": { - "type": "object", - "properties": { - "model_name": { - "type": "string", - "title": "Model Name", - "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above." - }, - "model_instruction": { - "type": "string", - "title": "Instruction", - "description": "The initial system prompt (i.e. the hidden instruction) to use when generating responses.", - "default": "You are a helpful AI assistant. Please respond appropriately." - }, - "page_title": { - "type": "string", - "title": "Page Title", - "description": "The title to display at the top of the chat interface.", - "default": "Large Language Model" - }, - "llm_max_tokens": { - "type": "integer", - "title": "Max Tokens", - "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", - "default": 1000 - }, - "llm_temperature": { - "type": "number", - "title": "LLM Temperature", - "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", - "default": 0, - "minimum": 0, - "maximum": 2 - }, - "llm_top_p": { - "type": "number", - "title": "LLM Top P", - "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", - "default": 1, - "exclusiveMinimum": 0, - "maximum": 1 - }, - "llm_top_k": { - "type": "integer", - "title": "LLM Top K", - "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", - "default": -1, - "minimum": -1 - }, - "llm_presence_penalty": { - "type": "number", - "title": "LLM Presence Penalty", - "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "llm_frequency_penalty": { - "type": "number", - "title": "LLM Frequency Penalty", - "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", - "default": 0, - "minimum": -2, - "maximum": 2 - } - }, - "required": [ - "model_name" - ] - } - } - }, - "api": { - "type": "object", - "properties": { - "modelMaxContextLength": { - "title": "Model Context Length", - "description": "An override for the maximum context length to allow, if the model's default is not suitable." - }, - "image": { - "type": "object", - "properties": { - "version": { - "type": "string", - "title": "Backend vLLM version", - "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)", - "default": "v0.6.3" - } - } - } - } - } - } -} diff --git a/charts/azimuth-chat/Chart.yaml b/charts/azimuth-chat/Chart.yaml new file mode 100644 index 0000000..b3e9fa0 --- /dev/null +++ b/charts/azimuth-chat/Chart.yaml @@ -0,0 +1,22 @@ +apiVersion: v2 +name: azimuth-llm-chat +description: HuggingFace vision model serving along with a simple web interface. +maintainers: + - name: "Scott Davidson" + email: scott@stackhpc.com + +type: application + +version: 0.1.0 + +appVersion: "0.1.0" + +icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg + +annotations: + azimuth.stackhpc.com/label: HuggingFace Image Analysis + +dependencies: + - name: azimuth-llm + version: 0.1.0 + repository: "file://../azimuth-llm/" diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml new file mode 100644 index 0000000..061a8ce --- /dev/null +++ b/charts/azimuth-chat/azimuth-ui.schema.yaml @@ -0,0 +1,34 @@ +controls: + /azimuth-llm/huggingface/model: + type: TextControl + required: true + /azimuth-llm/huggingface/token: + type: TextControl + secret: true + # Use mirror to mimic yaml anchor in base Helm chart + /azimuth-llm/ui/appSettings/hf_model_name: + type: MirrorControl + path: /azimuth-llm/huggingface/model + visuallyHidden: true + # Azimuth UI doesn't handle json type ["integer","null"] + # properly so we allow any type in JSON schema then + # constrain to (optional) integer here. + /azimuth-llm/api/modelMaxContextLength: + type: IntegerControl + minimum: 100 + step: 100 + required: false + +sortOrder: + - /azimuth-llm/huggingface/model + - /azimuth-llm/huggingface/token + - /azimuth-llm/ui/appSettings/hf_model_instruction + - /azimuth-llm/ui/appSettings/page_title + - /azimuth-llm/api/image/version + - /azimuth-llm/ui/appSettings/llm_temperature + - /azimuth-llm/ui/appSettings/llm_max_tokens + - /azimuth-llm/ui/appSettings/llm_frequency_penalty + - /azimuth-llm/ui/appSettings/llm_presence_penalty + - /azimuth-llm/ui/appSettings/llm_top_p + - /azimuth-llm/ui/appSettings/llm_top_k + - /azimuth-llm/api/modelMaxContextLength diff --git a/charts/azimuth-chat/ci/chat-ui-values.yaml b/charts/azimuth-chat/ci/chat-ui-values.yaml new file mode 100644 index 0000000..bf30ede --- /dev/null +++ b/charts/azimuth-chat/ci/chat-ui-values.yaml @@ -0,0 +1,7 @@ +azimuth-llm: + api: + enabled: false + ui: + service: + zenith: + enabled: false diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json new file mode 100644 index 0000000..96e5882 --- /dev/null +++ b/charts/azimuth-chat/values.schema.json @@ -0,0 +1,128 @@ +{ + "type": "object", + "properties": { + "azimuth-llm": { + "type": "object", + "properties": { + "huggingface": { + "type": "object", + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "The [HuggingFace model](https://huggingface.co/models) to deploy (see [here](https://github.com/stackhpc/azimuth-llm?tab=readme-ov-file#tested-models) for a list of tested models).", + "default": "microsoft/Phi-3.5-mini-instruct" + }, + "token": { + "type": [ + "string", + "null" + ], + "title": "Access Token", + "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)." + } + }, + "required": [ + "model" + ] + }, + "api": { + "type": "object", + "properties": { + "modelMaxContextLength": { + "title": "Model Context Length", + "description": "An override for the maximum context length to allow, if the model's default is not suitable." + }, + "image": { + "type": "object", + "properties": { + "version": { + "type": "string", + "title": "Backend vLLM version", + "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)", + "default": "v0.6.3" + } + } + } + } + }, + "ui": { + "type": "object", + "properties": { + "appSettings": { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "title": "Model Name", + "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above." + }, + "model_instruction": { + "type": "string", + "title": "Instruction", + "description": "The initial system prompt (i.e. the hidden instruction) to use when generating responses.", + "default": "You are a helpful AI assistant. Please respond appropriately." + }, + "page_title": { + "type": "string", + "title": "Page Title", + "description": "The title to display at the top of the chat interface.", + "default": "Large Language Model" + }, + "llm_max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", + "default": 1000 + }, + "llm_temperature": { + "type": "number", + "title": "LLM Temperature", + "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", + "default": 0, + "minimum": 0, + "maximum": 2 + }, + "llm_top_p": { + "type": "number", + "title": "LLM Top P", + "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", + "default": 1, + "exclusiveMinimum": 0, + "maximum": 1 + }, + "llm_top_k": { + "type": "integer", + "title": "LLM Top K", + "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", + "default": -1, + "minimum": -1 + }, + "llm_presence_penalty": { + "type": "number", + "title": "LLM Presence Penalty", + "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "llm_frequency_penalty": { + "type": "number", + "title": "LLM Frequency Penalty", + "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + } + }, + "required": [ + "model_name", + "model_instruction" + ] + } + } + } + } + } + } +} diff --git a/charts/azimuth-chat/values.yaml b/charts/azimuth-chat/values.yaml new file mode 100644 index 0000000..9a17317 --- /dev/null +++ b/charts/azimuth-chat/values.yaml @@ -0,0 +1,9 @@ +azimuth-llm: + huggingface: + model: &model-name microsoft/Phi-3.5-mini-instruct + ui: + image: + repository: ghcr.io/stackhpc/azimuth-llm-chat-ui + appSettings: + model_name: *model-name + model_instruction: "You are a helpful AI assistant; please respond appropriately." diff --git a/charts/azimuth-image-analysis/Chart.yaml b/charts/azimuth-image-analysis/Chart.yaml new file mode 100644 index 0000000..c2681ca --- /dev/null +++ b/charts/azimuth-image-analysis/Chart.yaml @@ -0,0 +1,22 @@ +apiVersion: v2 +name: azimuth-llm-image-analysis +description: HuggingFace vision model serving along with a simple web interface. +maintainers: + - name: "Scott Davidson" + email: scott@stackhpc.com + +type: application + +version: 0.1.0 + +appVersion: "0.1.0" + +icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg + +annotations: + azimuth.stackhpc.com/label: HuggingFace Image Analysis + +dependencies: + - name: azimuth-llm + version: 0.1.0 + repository: "file://../azimuth-llm/" diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml new file mode 100644 index 0000000..5c4799c --- /dev/null +++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml @@ -0,0 +1,26 @@ +controls: + /azimuth-llm/huggingface/model: + type: TextControl + required: true + /azimuth-llm/huggingface/token: + type: TextControl + secret: true + # Use mirror to mimic yaml anchor in base Helm chart + /azimuth-llm/ui/appSettings/hf_model_name: + type: MirrorControl + path: /azimuth-llm/huggingface/model + visuallyHidden: true + +sortOrder: + - /huggingface/model + - /huggingface/token + - /ui/appSettings/hf_model_instruction + - /ui/appSettings/page_title + - /api/image/version + - /ui/appSettings/llm_temperature + - /ui/appSettings/llm_max_tokens + - /ui/appSettings/llm_frequency_penalty + - /ui/appSettings/llm_presence_penalty + - /ui/appSettings/llm_top_p + - /ui/appSettings/llm_top_k + - /api/modelMaxContextLength diff --git a/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml b/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml new file mode 100644 index 0000000..bf30ede --- /dev/null +++ b/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml @@ -0,0 +1,7 @@ +azimuth-llm: + api: + enabled: false + ui: + service: + zenith: + enabled: false diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json new file mode 100644 index 0000000..2ddda05 --- /dev/null +++ b/charts/azimuth-image-analysis/values.schema.json @@ -0,0 +1,111 @@ +{ + "type": "object", + "properties": { + "azimuth-llm": { + "type": "object", + "properties": { + "huggingface": { + "type": "object", + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "The [HuggingFace model](https://huggingface.co/models) to deploy (see [here](https://github.com/stackhpc/azimuth-llm?tab=readme-ov-file#tested-models) for a list of tested models).", + "default": "microsoft/Phi-3.5-vision-instruct" + }, + "token": { + "type": [ + "string", + "null" + ], + "title": "Access Token", + "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)." + } + }, + "required": [ + "model" + ] + }, + "api": { + "type": "object", + "properties": { + "image": { + "type": "object", + "properties": { + "version": { + "type": "string", + "title": "Backend vLLM version", + "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)", + "default": "v0.6.3" + } + } + } + } + }, + "ui": { + "type": "object", + "properties": { + "appSettings": { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "title": "Model Name", + "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above." + }, + "llm_max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", + "default": 1000 + }, + "llm_temperature": { + "type": "number", + "title": "LLM Temperature", + "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", + "default": 0, + "minimum": 0, + "maximum": 2 + }, + "llm_top_p": { + "type": "number", + "title": "LLM Top P", + "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", + "default": 1, + "exclusiveMinimum": 0, + "maximum": 1 + }, + "llm_top_k": { + "type": "integer", + "title": "LLM Top K", + "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", + "default": -1, + "minimum": -1 + }, + "llm_presence_penalty": { + "type": "number", + "title": "LLM Presence Penalty", + "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "llm_frequency_penalty": { + "type": "number", + "title": "LLM Frequency Penalty", + "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + } + }, + "required": [ + "model_name" + ] + } + } + } + } + } + } +} diff --git a/charts/azimuth-image-analysis/values.yaml b/charts/azimuth-image-analysis/values.yaml new file mode 100644 index 0000000..c5a770c --- /dev/null +++ b/charts/azimuth-image-analysis/values.yaml @@ -0,0 +1,8 @@ +azimuth-llm: + huggingface: + model: &model-name microsoft/Phi-3.5-vision-instruct + ui: + image: + repository: ghcr.io/stackhpc/azimuth-llm-image-analysis-ui + appSettings: + model_name: *model-name diff --git a/chart/.helmignore b/charts/azimuth-llm/.helmignore similarity index 100% rename from chart/.helmignore rename to charts/azimuth-llm/.helmignore diff --git a/chart/Chart.yaml b/charts/azimuth-llm/Chart.yaml similarity index 89% rename from chart/Chart.yaml rename to charts/azimuth-llm/Chart.yaml index a6542df..637db49 100644 --- a/chart/Chart.yaml +++ b/charts/azimuth-llm/Chart.yaml @@ -18,7 +18,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.0 +version: 0.1.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to @@ -31,8 +31,8 @@ icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-l annotations: azimuth.stackhpc.com/label: HuggingFace LLM -dependencies: - - name: reloader - version: 1.0.63 - repository: https://stakater.github.io/stakater-charts - condition: ui.enabled +# dependencies: +# - name: reloader +# version: 1.0.63 +# repository: https://stakater.github.io/stakater-charts +# condition: ui.enabled diff --git a/chart/azimuth-ui.schema.yaml b/charts/azimuth-llm/azimuth-ui.schema.yaml similarity index 98% rename from chart/azimuth-ui.schema.yaml rename to charts/azimuth-llm/azimuth-ui.schema.yaml index de283f1..a633139 100644 --- a/chart/azimuth-ui.schema.yaml +++ b/charts/azimuth-llm/azimuth-ui.schema.yaml @@ -16,7 +16,6 @@ controls: /api/modelMaxContextLength: type: IntegerControl minimum: 100 - step: 100 required: false sortOrder: diff --git a/chart/ci/chat-ui-values.yaml b/charts/azimuth-llm/ci/no-api-values.yaml similarity index 100% rename from chart/ci/chat-ui-values.yaml rename to charts/azimuth-llm/ci/no-api-values.yaml diff --git a/chart/templates/NOTES.txt b/charts/azimuth-llm/templates/NOTES.txt similarity index 100% rename from chart/templates/NOTES.txt rename to charts/azimuth-llm/templates/NOTES.txt diff --git a/chart/templates/_helpers.tpl b/charts/azimuth-llm/templates/_helpers.tpl similarity index 100% rename from chart/templates/_helpers.tpl rename to charts/azimuth-llm/templates/_helpers.tpl diff --git a/chart/templates/api/config-map-grafana-dashboard-details.yml b/charts/azimuth-llm/templates/api/config-map-grafana-dashboard-details.yml similarity index 100% rename from chart/templates/api/config-map-grafana-dashboard-details.yml rename to charts/azimuth-llm/templates/api/config-map-grafana-dashboard-details.yml diff --git a/chart/templates/api/config-map-grafana-dashboard-summary.yml b/charts/azimuth-llm/templates/api/config-map-grafana-dashboard-summary.yml similarity index 98% rename from chart/templates/api/config-map-grafana-dashboard-summary.yml rename to charts/azimuth-llm/templates/api/config-map-grafana-dashboard-summary.yml index 9a2002e..672d862 100644 --- a/chart/templates/api/config-map-grafana-dashboard-summary.yml +++ b/charts/azimuth-llm/templates/api/config-map-grafana-dashboard-summary.yml @@ -184,7 +184,8 @@ data: "value": 80 } ] - } + }, + "unit": "locale" }, "overrides": [] }, @@ -279,7 +280,8 @@ data: "value": 80 } ] - } + }, + "unit": "locale" }, "overrides": [] }, @@ -378,7 +380,8 @@ data: "value": 80 } ] - } + }, + "unit": "locale" }, "overrides": [] }, @@ -477,7 +480,8 @@ data: "value": 80 } ] - } + }, + "unit": "locale" }, "overrides": [] }, @@ -518,7 +522,7 @@ data: "useBackend": false } ], - "title": "Tokens Generated (total)", + "title": "Generated Tokens (total)", "type": "timeseries" }, { @@ -814,14 +818,14 @@ data: "list": [] }, "time": { - "from": "now-120d", + "from": "now-90d", "to": "now" }, "timepicker": {}, "timezone": "", - "title": "vLLM Dashboard - Summary", + "title": "Scott test 1", "uid": "ee0cbu8l3b400dasdasfas", - "version": 1, + "version": 5, "weekStart": "" } {{- end -}} diff --git a/chart/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml similarity index 100% rename from chart/templates/api/deployment.yml rename to charts/azimuth-llm/templates/api/deployment.yml diff --git a/chart/templates/api/ingress.yml b/charts/azimuth-llm/templates/api/ingress.yml similarity index 100% rename from chart/templates/api/ingress.yml rename to charts/azimuth-llm/templates/api/ingress.yml diff --git a/chart/templates/api/service-monitor.yml b/charts/azimuth-llm/templates/api/service-monitor.yml similarity index 100% rename from chart/templates/api/service-monitor.yml rename to charts/azimuth-llm/templates/api/service-monitor.yml diff --git a/chart/templates/api/service.yml b/charts/azimuth-llm/templates/api/service.yml similarity index 100% rename from chart/templates/api/service.yml rename to charts/azimuth-llm/templates/api/service.yml diff --git a/chart/templates/api/zenith-client.yml b/charts/azimuth-llm/templates/api/zenith-client.yml similarity index 100% rename from chart/templates/api/zenith-client.yml rename to charts/azimuth-llm/templates/api/zenith-client.yml diff --git a/chart/templates/api/zenith-reservation.yml b/charts/azimuth-llm/templates/api/zenith-reservation.yml similarity index 100% rename from chart/templates/api/zenith-reservation.yml rename to charts/azimuth-llm/templates/api/zenith-reservation.yml diff --git a/chart/templates/test/end-to-end.yml b/charts/azimuth-llm/templates/test/end-to-end.yml similarity index 100% rename from chart/templates/test/end-to-end.yml rename to charts/azimuth-llm/templates/test/end-to-end.yml diff --git a/chart/templates/test/web-app.yml b/charts/azimuth-llm/templates/test/web-app.yml similarity index 100% rename from chart/templates/test/web-app.yml rename to charts/azimuth-llm/templates/test/web-app.yml diff --git a/chart/templates/ui/app-config-map.yml b/charts/azimuth-llm/templates/ui/app-config-map.yml similarity index 100% rename from chart/templates/ui/app-config-map.yml rename to charts/azimuth-llm/templates/ui/app-config-map.yml diff --git a/chart/templates/ui/deployment.yml b/charts/azimuth-llm/templates/ui/deployment.yml similarity index 83% rename from chart/templates/ui/deployment.yml rename to charts/azimuth-llm/templates/ui/deployment.yml index aa52e02..bed4167 100644 --- a/chart/templates/ui/deployment.yml +++ b/charts/azimuth-llm/templates/ui/deployment.yml @@ -19,6 +19,10 @@ spec: metadata: labels: {{- include "azimuth-llm.ui-selectorLabels" . | nindent 8 }} + # Restart deployment when settings config map changes + # https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments + annotations: + checksum/config: {{ include (print $.Template.BasePath "/ui/app-config-map.yml") . | sha256sum }} spec: containers: - name: {{ .Release.Name }}-ui diff --git a/chart/templates/ui/ingress.yml b/charts/azimuth-llm/templates/ui/ingress.yml similarity index 100% rename from chart/templates/ui/ingress.yml rename to charts/azimuth-llm/templates/ui/ingress.yml diff --git a/chart/templates/ui/service.yml b/charts/azimuth-llm/templates/ui/service.yml similarity index 100% rename from chart/templates/ui/service.yml rename to charts/azimuth-llm/templates/ui/service.yml diff --git a/chart/templates/ui/ui-zenith-client.yml b/charts/azimuth-llm/templates/ui/ui-zenith-client.yml similarity index 100% rename from chart/templates/ui/ui-zenith-client.yml rename to charts/azimuth-llm/templates/ui/ui-zenith-client.yml diff --git a/chart/templates/ui/ui-zenith-reservation.yml b/charts/azimuth-llm/templates/ui/ui-zenith-reservation.yml similarity index 100% rename from chart/templates/ui/ui-zenith-reservation.yml rename to charts/azimuth-llm/templates/ui/ui-zenith-reservation.yml diff --git a/chart/values.yaml b/charts/azimuth-llm/values.yaml similarity index 97% rename from chart/values.yaml rename to charts/azimuth-llm/values.yaml index ed66c5b..116385b 100644 --- a/chart/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -90,8 +90,6 @@ ui: # included in the web-apps/ folder of this git repository there is a # defaults.yml file (e.g. web-apps/text-chat/defaults.yml) listing all # available configuration options. - # FIXME: Figure out how to make JSON schema fit - # with different config options for each web app. appSettings: model_name: *model-name # Use local system fonts by default to avoid GDPR issues @@ -151,5 +149,6 @@ ingress: # Annotations to apply to the ingress resource # e.g. for cert-manager integration annotations: -reloader: - watchGlobally: false + +# reloader: +# watchGlobally: false diff --git a/ct.yaml b/ct.yaml index 866e08c..f5fada9 100644 --- a/ct.yaml +++ b/ct.yaml @@ -1,2 +1,8 @@ +# Complains about invalid maintainer URLs validate-maintainers: false -charts: chart/ +# Skip version bump detection and lint all charts +# since we're using the azimuth-cloud Helm chart publish +# workflow which doesn't use Chart.yaml's version key +all: true +# Split output to make it look nice in GitHub Actions tab +github-groups: true From 40f9b33c38674e08a20e21172b64c3ee70dc32b5 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 12:36:07 +0000 Subject: [PATCH 13/34] Mount local overrides as volume for testing --- web-apps/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web-apps/run.sh b/web-apps/run.sh index e6e20b5..877bf5d 100755 --- a/web-apps/run.sh +++ b/web-apps/run.sh @@ -18,4 +18,4 @@ else echo "Found local $IMAGE_TAG docker image" fi -docker run --rm -p 7860:7860 $IMAGE_TAG +docker run --rm -v ./$1/overrides.yml:/etc/web-app/overrides.yml -p 7860:7860 $IMAGE_TAG From 0f6882c1739da6b9e63dcf3b6ecfcc7cf1fcaa24 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 12:36:47 +0000 Subject: [PATCH 14/34] Clean up and formatting --- web-apps/chat/app.py | 79 -------------------------------------------- 1 file changed, 79 deletions(-) diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py index dbfae8f..8894fef 100644 --- a/web-apps/chat/app.py +++ b/web-apps/chat/app.py @@ -32,44 +32,6 @@ class AppSettings(BaseModel): model_config = ConfigDict(protected_namespaces=(), extra="forbid") -# class AppSettings(BaseModel): -# hf_model_name: str = Field( -# description="The model to use when constructing the LLM Chat client. This should match the model name running on the vLLM backend", -# ) -# backend_url: HttpUrl = Field( -# description="The address of the OpenAI compatible API server (either in-cluster or externally hosted)" -# ) -# page_title: str = Field(default="Large Language Model") -# page_description: Optional[str] = Field(default=None) -# hf_model_instruction: str = Field( -# default="You are a helpful and cheerful AI assistant. Please respond appropriately." -# ) - -# # Model settings - -# # For available parameters, see https://docs.vllm.ai/en/latest/dev/sampling_params.html -# # which is based on https://platform.openai.com/docs/api-reference/completions/create -# llm_max_tokens: int = Field(default=500) -# llm_temperature: float = Field(default=0) -# llm_top_p: float = Field(default=1) -# llm_top_k: float = Field(default=-1) -# llm_presence_penalty: float = Field(default=0, ge=-2, le=2) -# llm_frequency_penalty: float = Field(default=0, ge=-2, le=2) - -# # UI theming - -# # Variables explicitly passed to gradio.theme.Default() -# # For example: -# # {"primary_hue": "red"} -# theme_params: dict[str, Union[str, List[str]]] = Field(default_factory=dict) -# # Overrides for theme.body_background_fill property -# theme_background_colour: Optional[str] = Field(default=None) -# # Provides arbitrary CSS and JS overrides to the UI, -# # see https://www.gradio.app/guides/custom-CSS-and-JS -# css_overrides: Optional[str] = Field(default=None) -# custom_javascript: Optional[str] = Field(default=None) - - settings = AppSettings(**load_settings()) logger.info(settings) @@ -102,7 +64,6 @@ class PossibleSystemPromptException(Exception): }, streaming=True, ) -logger.info(llm) def inference(latest_message, history): @@ -176,7 +137,6 @@ def inference(latest_message, history): # UI theming theme = gr.themes.Default(**settings.theme_params) theme.set(**settings.theme_params_extended) -# theme.set(text) def inference_wrapper(*args): @@ -221,44 +181,5 @@ def inference_wrapper(*args): ) logger.debug("Gradio chat interface config: %s", app.config) app.queue( - # Allow 10 concurrent requests to backend - # vLLM backend should be clever enough to - # batch these requests appropriately. default_concurrency_limit=10, ).launch(server_name=settings.host_address) - -# with gr.ChatInterface( -# inference_wrapper, -# chatbot=gr.Chatbot( -# # Height of conversation window in CSS units (string) or pixels (int) -# height="68vh", -# show_copy_button=True, -# ), -# textbox=gr.Textbox( -# placeholder="Ask me anything...", -# container=False, -# # Ratio of text box to submit button width -# scale=7, -# ), -# title=settings.page_title, -# description=settings.page_description, -# retry_btn="Retry", -# undo_btn="Undo", -# clear_btn="Clear", -# analytics_enabled=False, -# theme=theme, -# css=settings.css_overrides, -# js=settings.custom_javascript, -# ) as app: -# logger.debug("Gradio chat interface config: %s", app.config) -# # For running locally in tilt dev setup -# if len(sys.argv) > 2 and sys.argv[2] == "localhost": -# app.launch() -# # For running on cluster -# else: -# app.queue( -# # Allow 10 concurrent requests to backend -# # vLLM backend should be clever enough to -# # batch these requests appropriately. -# default_concurrency_limit=10, -# ).launch(server_name=settings.host_address) From b8737bc45d34e37742b03a580fa5cb3d677199dc Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 13:55:31 +0000 Subject: [PATCH 15/34] Rename CI test values files --- .../azimuth-chat/ci/{chat-ui-values.yaml => ui-only-values.yaml} | 0 .../ci/{image-analysis-ui-values.yaml => ui-only-values.yaml} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename charts/azimuth-chat/ci/{chat-ui-values.yaml => ui-only-values.yaml} (100%) rename charts/azimuth-image-analysis/ci/{image-analysis-ui-values.yaml => ui-only-values.yaml} (100%) diff --git a/charts/azimuth-chat/ci/chat-ui-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml similarity index 100% rename from charts/azimuth-chat/ci/chat-ui-values.yaml rename to charts/azimuth-chat/ci/ui-only-values.yaml diff --git a/charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml b/charts/azimuth-image-analysis/ci/ui-only-values.yaml similarity index 100% rename from charts/azimuth-image-analysis/ci/image-analysis-ui-values.yaml rename to charts/azimuth-image-analysis/ci/ui-only-values.yaml From 9dfcf3afcdaaf6ac79b77f5cbf397933abfee9d2 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 13:56:44 +0000 Subject: [PATCH 16/34] Update Azimuth UI config --- charts/azimuth-chat/azimuth-ui.schema.yaml | 5 ++--- charts/azimuth-image-analysis/azimuth-ui.schema.yaml | 5 +---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml index 061a8ce..1c0da6a 100644 --- a/charts/azimuth-chat/azimuth-ui.schema.yaml +++ b/charts/azimuth-chat/azimuth-ui.schema.yaml @@ -6,7 +6,7 @@ controls: type: TextControl secret: true # Use mirror to mimic yaml anchor in base Helm chart - /azimuth-llm/ui/appSettings/hf_model_name: + /azimuth-llm/ui/appSettings/model_name: type: MirrorControl path: /azimuth-llm/huggingface/model visuallyHidden: true @@ -16,13 +16,12 @@ controls: /azimuth-llm/api/modelMaxContextLength: type: IntegerControl minimum: 100 - step: 100 required: false sortOrder: - /azimuth-llm/huggingface/model - /azimuth-llm/huggingface/token - - /azimuth-llm/ui/appSettings/hf_model_instruction + - /azimuth-llm/ui/appSettings/model_instruction - /azimuth-llm/ui/appSettings/page_title - /azimuth-llm/api/image/version - /azimuth-llm/ui/appSettings/llm_temperature diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml index 5c4799c..d244a5a 100644 --- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml +++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml @@ -6,7 +6,7 @@ controls: type: TextControl secret: true # Use mirror to mimic yaml anchor in base Helm chart - /azimuth-llm/ui/appSettings/hf_model_name: + /azimuth-llm/ui/appSettings/model_name: type: MirrorControl path: /azimuth-llm/huggingface/model visuallyHidden: true @@ -14,8 +14,6 @@ controls: sortOrder: - /huggingface/model - /huggingface/token - - /ui/appSettings/hf_model_instruction - - /ui/appSettings/page_title - /api/image/version - /ui/appSettings/llm_temperature - /ui/appSettings/llm_max_tokens @@ -23,4 +21,3 @@ sortOrder: - /ui/appSettings/llm_presence_penalty - /ui/appSettings/llm_top_p - /ui/appSettings/llm_top_k - - /api/modelMaxContextLength From 00972e24d821c0f0e8a9a2b7a1ccd60127f426fb Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 14:11:33 +0000 Subject: [PATCH 17/34] Fix chart dependency versions --- charts/azimuth-chat/Chart.yaml | 2 +- charts/azimuth-image-analysis/Chart.yaml | 2 +- charts/azimuth-llm/Chart.yaml | 8 +------- charts/azimuth-llm/values.yaml | 3 --- 4 files changed, 3 insertions(+), 12 deletions(-) diff --git a/charts/azimuth-chat/Chart.yaml b/charts/azimuth-chat/Chart.yaml index b3e9fa0..97dd341 100644 --- a/charts/azimuth-chat/Chart.yaml +++ b/charts/azimuth-chat/Chart.yaml @@ -18,5 +18,5 @@ annotations: dependencies: - name: azimuth-llm - version: 0.1.0 + version: ">=0-0" repository: "file://../azimuth-llm/" diff --git a/charts/azimuth-image-analysis/Chart.yaml b/charts/azimuth-image-analysis/Chart.yaml index c2681ca..238016b 100644 --- a/charts/azimuth-image-analysis/Chart.yaml +++ b/charts/azimuth-image-analysis/Chart.yaml @@ -18,5 +18,5 @@ annotations: dependencies: - name: azimuth-llm - version: 0.1.0 + version: ">=0-0" repository: "file://../azimuth-llm/" diff --git a/charts/azimuth-llm/Chart.yaml b/charts/azimuth-llm/Chart.yaml index 637db49..6c92b69 100644 --- a/charts/azimuth-llm/Chart.yaml +++ b/charts/azimuth-llm/Chart.yaml @@ -18,7 +18,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.2.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to @@ -30,9 +30,3 @@ icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-l annotations: azimuth.stackhpc.com/label: HuggingFace LLM - -# dependencies: -# - name: reloader -# version: 1.0.63 -# repository: https://stakater.github.io/stakater-charts -# condition: ui.enabled diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index 116385b..a037d46 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -149,6 +149,3 @@ ingress: # Annotations to apply to the ingress resource # e.g. for cert-manager integration annotations: - -# reloader: -# watchGlobally: false From 60204342560e8172d20ec6a55083560eefb14f1f Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 14:47:00 +0000 Subject: [PATCH 18/34] Fix scheme for passing custom LLM params --- charts/azimuth-chat/azimuth-ui.schema.yaml | 12 +++---- charts/azimuth-chat/ci/ui-only-values.yaml | 9 +++++ .../azimuth-ui.schema.yaml | 12 +++---- .../ci/ui-only-values.yaml | 9 +++++ charts/azimuth-llm/azimuth-ui.schema.yaml | 33 ------------------- 5 files changed, 30 insertions(+), 45 deletions(-) delete mode 100644 charts/azimuth-llm/azimuth-ui.schema.yaml diff --git a/charts/azimuth-chat/azimuth-ui.schema.yaml b/charts/azimuth-chat/azimuth-ui.schema.yaml index 1c0da6a..74bd573 100644 --- a/charts/azimuth-chat/azimuth-ui.schema.yaml +++ b/charts/azimuth-chat/azimuth-ui.schema.yaml @@ -24,10 +24,10 @@ sortOrder: - /azimuth-llm/ui/appSettings/model_instruction - /azimuth-llm/ui/appSettings/page_title - /azimuth-llm/api/image/version - - /azimuth-llm/ui/appSettings/llm_temperature - - /azimuth-llm/ui/appSettings/llm_max_tokens - - /azimuth-llm/ui/appSettings/llm_frequency_penalty - - /azimuth-llm/ui/appSettings/llm_presence_penalty - - /azimuth-llm/ui/appSettings/llm_top_p - - /azimuth-llm/ui/appSettings/llm_top_k + - /azimuth-llm/ui/appSettings/llm_params/temperature + - /azimuth-llm/ui/appSettings/llm_params/max_tokens + - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty + - /azimuth-llm/ui/appSettings/llm_params/presence_penalty + - /azimuth-llm/ui/appSettings/llm_params/top_p + - /azimuth-llm/ui/appSettings/llm_params/top_k - /azimuth-llm/api/modelMaxContextLength diff --git a/charts/azimuth-chat/ci/ui-only-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml index bf30ede..b66347d 100644 --- a/charts/azimuth-chat/ci/ui-only-values.yaml +++ b/charts/azimuth-chat/ci/ui-only-values.yaml @@ -5,3 +5,12 @@ azimuth-llm: service: zenith: enabled: false + appSettings: + # Verify that we can set non-standard LLM params + llm_params: + max_tokens: 101 + temperature: 0.1 + top_p: 0.15 + top_k: 1 + presence_penalty: 0.9 + frequency_penalty: 1 diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml index d244a5a..f1068c2 100644 --- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml +++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml @@ -15,9 +15,9 @@ sortOrder: - /huggingface/model - /huggingface/token - /api/image/version - - /ui/appSettings/llm_temperature - - /ui/appSettings/llm_max_tokens - - /ui/appSettings/llm_frequency_penalty - - /ui/appSettings/llm_presence_penalty - - /ui/appSettings/llm_top_p - - /ui/appSettings/llm_top_k + - /ui/appSettings/llm_params/temperature + - /ui/appSettings/llm_params/max_tokens + - /ui/appSettings/llm_params/frequency_penalty + - /ui/appSettings/llm_params/presence_penalty + - /ui/appSettings/llm_params/top_p + - /ui/appSettings/llm_params/top_k diff --git a/charts/azimuth-image-analysis/ci/ui-only-values.yaml b/charts/azimuth-image-analysis/ci/ui-only-values.yaml index bf30ede..b66347d 100644 --- a/charts/azimuth-image-analysis/ci/ui-only-values.yaml +++ b/charts/azimuth-image-analysis/ci/ui-only-values.yaml @@ -5,3 +5,12 @@ azimuth-llm: service: zenith: enabled: false + appSettings: + # Verify that we can set non-standard LLM params + llm_params: + max_tokens: 101 + temperature: 0.1 + top_p: 0.15 + top_k: 1 + presence_penalty: 0.9 + frequency_penalty: 1 diff --git a/charts/azimuth-llm/azimuth-ui.schema.yaml b/charts/azimuth-llm/azimuth-ui.schema.yaml deleted file mode 100644 index a633139..0000000 --- a/charts/azimuth-llm/azimuth-ui.schema.yaml +++ /dev/null @@ -1,33 +0,0 @@ -controls: - /huggingface/model: - type: TextControl - required: true - /huggingface/token: - type: TextControl - secret: true - # Use mirror to mimic yaml anchor in base Helm chart - /ui/appSettings/hf_model_name: - type: MirrorControl - path: /huggingface/model - visuallyHidden: true - # Azimuth UI doesn't handle json type ["integer","null"] - # properly so we allow any type in JSON schema then - # constrain to (optional) integer here. - /api/modelMaxContextLength: - type: IntegerControl - minimum: 100 - required: false - -sortOrder: - - /huggingface/model - - /huggingface/token - - /ui/appSettings/hf_model_instruction - - /ui/appSettings/page_title - - /api/image/version - - /ui/appSettings/llm_temperature - - /ui/appSettings/llm_max_tokens - - /ui/appSettings/llm_frequency_penalty - - /ui/appSettings/llm_presence_penalty - - /ui/appSettings/llm_top_p - - /ui/appSettings/llm_top_k - - /api/modelMaxContextLength From 456b2212bfcbbcaab9c83751a8929b61d82d564c Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 15:01:37 +0000 Subject: [PATCH 19/34] Fix scheme for passing custom LLM params --- charts/azimuth-chat/values.schema.json | 93 ++++++++++--------- .../azimuth-image-analysis/values.schema.json | 93 ++++++++++--------- 2 files changed, 98 insertions(+), 88 deletions(-) diff --git a/charts/azimuth-chat/values.schema.json b/charts/azimuth-chat/values.schema.json index 96e5882..ebc2622 100644 --- a/charts/azimuth-chat/values.schema.json +++ b/charts/azimuth-chat/values.schema.json @@ -69,50 +69,55 @@ "description": "The title to display at the top of the chat interface.", "default": "Large Language Model" }, - "llm_max_tokens": { - "type": "integer", - "title": "Max Tokens", - "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", - "default": 1000 - }, - "llm_temperature": { - "type": "number", - "title": "LLM Temperature", - "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", - "default": 0, - "minimum": 0, - "maximum": 2 - }, - "llm_top_p": { - "type": "number", - "title": "LLM Top P", - "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", - "default": 1, - "exclusiveMinimum": 0, - "maximum": 1 - }, - "llm_top_k": { - "type": "integer", - "title": "LLM Top K", - "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", - "default": -1, - "minimum": -1 - }, - "llm_presence_penalty": { - "type": "number", - "title": "LLM Presence Penalty", - "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "llm_frequency_penalty": { - "type": "number", - "title": "LLM Frequency Penalty", - "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", - "default": 0, - "minimum": -2, - "maximum": 2 + "llm_params": { + "type": "object", + "properties": { + "max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", + "default": 1000 + }, + "temperature": { + "type": "number", + "title": "LLM Temperature", + "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", + "default": 0, + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "title": "LLM Top P", + "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", + "default": 1, + "exclusiveMinimum": 0, + "maximum": 1 + }, + "top_k": { + "type": "integer", + "title": "LLM Top K", + "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", + "default": -1, + "minimum": -1 + }, + "presence_penalty": { + "type": "number", + "title": "LLM Presence Penalty", + "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "frequency_penalty": { + "type": "number", + "title": "LLM Frequency Penalty", + "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + } + } } }, "required": [ diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json index 2ddda05..029a7ae 100644 --- a/charts/azimuth-image-analysis/values.schema.json +++ b/charts/azimuth-image-analysis/values.schema.json @@ -53,50 +53,55 @@ "title": "Model Name", "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above." }, - "llm_max_tokens": { - "type": "integer", - "title": "Max Tokens", - "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", - "default": 1000 - }, - "llm_temperature": { - "type": "number", - "title": "LLM Temperature", - "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", - "default": 0, - "minimum": 0, - "maximum": 2 - }, - "llm_top_p": { - "type": "number", - "title": "LLM Top P", - "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", - "default": 1, - "exclusiveMinimum": 0, - "maximum": 1 - }, - "llm_top_k": { - "type": "integer", - "title": "LLM Top K", - "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", - "default": -1, - "minimum": -1 - }, - "llm_presence_penalty": { - "type": "number", - "title": "LLM Presence Penalty", - "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", - "default": 0, - "minimum": -2, - "maximum": 2 - }, - "llm_frequency_penalty": { - "type": "number", - "title": "LLM Frequency Penalty", - "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", - "default": 0, - "minimum": -2, - "maximum": 2 + "llm_params": { + "type": "object", + "properties": { + "max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "The maximum number of new [tokens](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) to generate for each LLM responses.", + "default": 1000 + }, + "temperature": { + "type": "number", + "title": "LLM Temperature", + "description": "The [temperature](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) value to use when generating LLM responses.", + "default": 0, + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "title": "LLM Top P", + "description": "The [top p](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p) value to use when generating LLM responses.", + "default": 1, + "exclusiveMinimum": 0, + "maximum": 1 + }, + "top_k": { + "type": "integer", + "title": "LLM Top K", + "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", + "default": -1, + "minimum": -1 + }, + "presence_penalty": { + "type": "number", + "title": "LLM Presence Penalty", + "description": "The [presence penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "frequency_penalty": { + "type": "number", + "title": "LLM Frequency Penalty", + "description": "The [frequency_penalty](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty) to use when generating LLM responses.", + "default": 0, + "minimum": -2, + "maximum": 2 + } + } } }, "required": [ From f5c86549098417608ee6dd988c6b3e49c86b19f7 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 16:10:34 +0000 Subject: [PATCH 20/34] Add model context length option to Azimuth UI --- .../azimuth-ui.schema.yaml | 26 ++++++++++++------- .../azimuth-image-analysis/values.schema.json | 4 +++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml index f1068c2..885ca8e 100644 --- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml +++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml @@ -10,14 +10,22 @@ controls: type: MirrorControl path: /azimuth-llm/huggingface/model visuallyHidden: true + # Azimuth UI doesn't handle json type ["integer","null"] + # properly so we allow any type in JSON schema then + # constrain to (optional) integer here. + /azimuth-llm/api/modelMaxContextLength: + type: IntegerControl + minimum: 100 + required: false sortOrder: - - /huggingface/model - - /huggingface/token - - /api/image/version - - /ui/appSettings/llm_params/temperature - - /ui/appSettings/llm_params/max_tokens - - /ui/appSettings/llm_params/frequency_penalty - - /ui/appSettings/llm_params/presence_penalty - - /ui/appSettings/llm_params/top_p - - /ui/appSettings/llm_params/top_k + - /azimuth-llm/huggingface/model + - /azimuth-llm/huggingface/token + - /azimuth-llm/api/image/version + - /azimuth-llm/ui/appSettings/llm_params/temperature + - /azimuth-llm/ui/appSettings/llm_params/max_tokens + - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty + - /azimuth-llm/ui/appSettings/llm_params/presence_penalty + - /azimuth-llm/ui/appSettings/llm_params/top_p + - /azimuth-llm/ui/appSettings/llm_params/top_k + - /azimuth-llm/api/modelMaxContextLength diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json index 029a7ae..7f747e4 100644 --- a/charts/azimuth-image-analysis/values.schema.json +++ b/charts/azimuth-image-analysis/values.schema.json @@ -39,6 +39,10 @@ "default": "v0.6.3" } } + }, + "modelMaxContextLength": { + "title": "Model Context Length", + "description": "An override for the maximum context length to allow, if the model's default is not suitable." } } }, From 8e58066329e8bfc9e5101783ab421d69a4aee5ae Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 16:44:13 +0000 Subject: [PATCH 21/34] Fix defaults for LLMParams data model --- web-apps/image-analysis/app.py | 2 +- web-apps/utils/utils.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/web-apps/image-analysis/app.py b/web-apps/image-analysis/app.py index 77cda84..c60f412 100644 --- a/web-apps/image-analysis/app.py +++ b/web-apps/image-analysis/app.py @@ -30,7 +30,7 @@ class AppSettings(BaseModel): page_title: str page_description: str examples: List[PromptExample] - llm_params: LLMParams + llm_params: LLMParams | None # Theme customisation theme_params: Dict[str, str | list] theme_params_extended: Dict[str, str] diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py index 252ccf4..de299f8 100644 --- a/web-apps/utils/utils.py +++ b/web-apps/utils/utils.py @@ -20,12 +20,12 @@ class LLMParams(BaseModel): https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#extra-parameters """ - max_tokens: PositiveInt | None - temperature: Annotated[float, Field(ge=0, le=2)] | None - top_p: Annotated[float, Field(gt=0, le=1)] | None - top_k: Annotated[int, Field(ge=-1)] | None - frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None - presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None + max_tokens: PositiveInt | None = None + temperature: Annotated[float, Field(ge=0, le=2)] | None = None + top_p: Annotated[float, Field(gt=0, le=1)] | None = None + top_k: Annotated[int, Field(ge=-1)] | None = None + frequency_penalty: Annotated[float, Field(ge=-2, le=2)] | None = None + presence_penalty: Annotated[float, Field(ge=0 - 2, le=2)] | None = None # Make sure we can't smuggle in extra request params / typos model_config = ConfigDict(extra="forbid") From ef832886997a302e061101b1f39cd493049fc573 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 16:45:06 +0000 Subject: [PATCH 22/34] Bump UI image tag --- charts/azimuth-llm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index a037d46..6c8f508 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -83,7 +83,7 @@ ui: # Container image config image: repository: ghcr.io/stackhpc/azimuth-llm-chat-ui - version: 58c4dcb + version: 8e58066 imagePullPolicy: # The settings to be passed to the frontend web app. # Format depends on the chosen UI image above. For each of the UIs From 35a14389bef050c462addd538988f21dba05158f Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 16:53:16 +0000 Subject: [PATCH 23/34] Bump UI image tag --- charts/azimuth-llm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index 6c8f508..dc3a95f 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -83,7 +83,7 @@ ui: # Container image config image: repository: ghcr.io/stackhpc/azimuth-llm-chat-ui - version: 8e58066 + version: ef83288 imagePullPolicy: # The settings to be passed to the frontend web app. # Format depends on the chosen UI image above. For each of the UIs From f3d5544849fa2e61966dd50cc9f2fa1546b419c0 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 17:44:09 +0000 Subject: [PATCH 24/34] Remove top_k from vision model UI options --- charts/azimuth-image-analysis/azimuth-ui.schema.yaml | 4 +++- charts/azimuth-image-analysis/ci/ui-only-values.yaml | 1 - charts/azimuth-image-analysis/values.schema.json | 8 +------- web-apps/image-analysis/defaults.yml | 4 +++- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml index 885ca8e..a960081 100644 --- a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml +++ b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml @@ -27,5 +27,7 @@ sortOrder: - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty - /azimuth-llm/ui/appSettings/llm_params/presence_penalty - /azimuth-llm/ui/appSettings/llm_params/top_p - - /azimuth-llm/ui/appSettings/llm_params/top_k + # vLLM responds with HTTP 400 BadRequest when top_k is + # passed to a vision model (but ollama accepts it) + # - /azimuth-llm/ui/appSettings/llm_params/top_k - /azimuth-llm/api/modelMaxContextLength diff --git a/charts/azimuth-image-analysis/ci/ui-only-values.yaml b/charts/azimuth-image-analysis/ci/ui-only-values.yaml index b66347d..96f716d 100644 --- a/charts/azimuth-image-analysis/ci/ui-only-values.yaml +++ b/charts/azimuth-image-analysis/ci/ui-only-values.yaml @@ -11,6 +11,5 @@ azimuth-llm: max_tokens: 101 temperature: 0.1 top_p: 0.15 - top_k: 1 presence_penalty: 0.9 frequency_penalty: 1 diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json index 7f747e4..c8be1ac 100644 --- a/charts/azimuth-image-analysis/values.schema.json +++ b/charts/azimuth-image-analysis/values.schema.json @@ -58,6 +58,7 @@ "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above." }, "llm_params": { + "$comment": "top_k parameter causes vLLM to error for most (all?) vision models so is excluded here", "type": "object", "properties": { "max_tokens": { @@ -82,13 +83,6 @@ "exclusiveMinimum": 0, "maximum": 1 }, - "top_k": { - "type": "integer", - "title": "LLM Top K", - "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).", - "default": -1, - "minimum": -1 - }, "presence_penalty": { "type": "number", "title": "LLM Presence Penalty", diff --git a/web-apps/image-analysis/defaults.yml b/web-apps/image-analysis/defaults.yml index e6f2791..21d233c 100644 --- a/web-apps/image-analysis/defaults.yml +++ b/web-apps/image-analysis/defaults.yml @@ -18,7 +18,9 @@ llm_params: max_tokens: temperature: top_p: - top_k: + # vLLM rejects requests with top_k parameter for + # most (all?) vision models so can't use it here + # top_k: frequency_penalty: presence_penalty: From 0dfd58a2123671ce3545bbded3eda736a3f3f382 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 19:47:53 +0000 Subject: [PATCH 25/34] Update workflow permissions avoid device-flow auth --- .github/workflows/build-push-artifacts.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index fa2cca4..4027fdd 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -34,8 +34,13 @@ jobs: build_push_images: name: Build and push images runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # needed for signing the images with GitHub OIDC Token + packages: write # required for pushing container images + security-events: write # required for pushing SARIF files needs: changes - if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} strategy: matrix: include: From bd6accb229c515524f10348adfc4fd209a4378a5 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 19:48:34 +0000 Subject: [PATCH 26/34] Reable change detection on image builds --- .github/workflows/build-push-artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 4027fdd..75c561f 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -40,7 +40,7 @@ jobs: packages: write # required for pushing container images security-events: write # required for pushing SARIF files needs: changes - # if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} strategy: matrix: include: From d6202c0083c471761e381b8534caaccb4d17b607 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 19:55:34 +0000 Subject: [PATCH 27/34] Remove redundant helm template check --- .github/workflows/test-pr.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 1187e7e..a064702 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -28,10 +28,6 @@ jobs: - name: Run chart linting run: ct lint --config ct.yaml - - name: Run helm template with default values - run: helm template ci-test . - working-directory: chart - - name: Create Kind Cluster uses: helm/kind-action@v1 with: From 96ca80a4e31eaf39afbf2e7372a3ff0915512b21 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 20:04:45 +0000 Subject: [PATCH 28/34] Skip change detection on tags --- .github/workflows/build-push-artifacts.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 75c561f..412566e 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -7,6 +7,7 @@ jobs: changes: name: Check for relevant changes runs-on: ubuntu-latest + if: ${{ github.ref_type != 'tag' }} # Required permissions permissions: pull-requests: read From 24b4f2c98807154c14edaa1afcb09fb137518bb8 Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 20:06:42 +0000 Subject: [PATCH 29/34] Always run artifact publishing on tags --- .github/workflows/build-push-artifacts.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 412566e..358fce0 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -41,7 +41,7 @@ jobs: packages: write # required for pushing container images security-events: write # required for pushing SARIF files needs: changes - if: ${{ needs.changes.outputs.images == 'true' || github.ref_type == 'tag' }} + if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }} strategy: matrix: include: @@ -86,7 +86,7 @@ jobs: runs-on: ubuntu-latest # Only build and push the chart if chart files have changed needs: [changes] - if: ${{ needs.changes.outputs.chart == 'true' || github.ref_type == 'tag' }} + if: ${{ github.ref_type == 'tag' || needs.changes.outputs.chart == 'true' }} steps: - name: Check out the repository uses: actions/checkout@v4 From df7b8eb7fd7e6698af74cf969f1479ca0bf4bb7d Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 20:09:30 +0000 Subject: [PATCH 30/34] Revert "Skip change detection on tags" This reverts commit 96ca80a4e31eaf39afbf2e7372a3ff0915512b21. --- .github/workflows/build-push-artifacts.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 358fce0..fd8969b 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -7,7 +7,6 @@ jobs: changes: name: Check for relevant changes runs-on: ubuntu-latest - if: ${{ github.ref_type != 'tag' }} # Required permissions permissions: pull-requests: read From f9eb0aa2c2722e6e994093f3759a6210b9d826cc Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 21:21:06 +0000 Subject: [PATCH 31/34] Remove unused reloader annotation --- charts/azimuth-llm/templates/ui/deployment.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/charts/azimuth-llm/templates/ui/deployment.yml b/charts/azimuth-llm/templates/ui/deployment.yml index bed4167..3938893 100644 --- a/charts/azimuth-llm/templates/ui/deployment.yml +++ b/charts/azimuth-llm/templates/ui/deployment.yml @@ -5,9 +5,6 @@ metadata: name: {{ .Release.Name }}-ui labels: {{- include "azimuth-llm.labels" . | nindent 4 }} - annotations: - # Make sure UI is reloaded when app settings are updated - reloader.stakater.com/auto: "true" spec: replicas: 1 selector: From d0d1fd024dc7037547ac37e1449c66424a3fdf7f Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 23:45:20 +0000 Subject: [PATCH 32/34] Test alternative docker cache settings --- .github/workflows/build-push-artifacts.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index fd8969b..9c81fe3 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -40,7 +40,7 @@ jobs: packages: write # required for pushing container images security-events: write # required for pushing SARIF files needs: changes - if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }} + # if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }} strategy: matrix: include: @@ -69,7 +69,8 @@ jobs: type=sha,prefix= - name: Build and push image - uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action + # uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action + uses: azimuth-cloud/github-actions/docker-multiarch-build-push@test/docker-build-cache with: cache-key: ${{ matrix.component }} context: ./web-apps/ From 7f915535832d0d539980e25616c19f0073b1075d Mon Sep 17 00:00:00 2001 From: sd109 Date: Thu, 31 Oct 2024 23:54:31 +0000 Subject: [PATCH 33/34] Dummy change for cache testing --- .github/workflows/build-push-artifacts.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index 9c81fe3..fdbf892 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -69,7 +69,6 @@ jobs: type=sha,prefix= - name: Build and push image - # uses: azimuth-cloud/github-actions/docker-multiarch-build-push@update-trivy-action uses: azimuth-cloud/github-actions/docker-multiarch-build-push@test/docker-build-cache with: cache-key: ${{ matrix.component }} From 717e582bac5dc7fe38931d21b3e5f7cce941b435 Mon Sep 17 00:00:00 2001 From: sd109 Date: Fri, 1 Nov 2024 10:18:29 +0000 Subject: [PATCH 34/34] Revert to master branch of docker build action --- .github/workflows/build-push-artifacts.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-push-artifacts.yml b/.github/workflows/build-push-artifacts.yml index fdbf892..73d8370 100644 --- a/.github/workflows/build-push-artifacts.yml +++ b/.github/workflows/build-push-artifacts.yml @@ -40,7 +40,7 @@ jobs: packages: write # required for pushing container images security-events: write # required for pushing SARIF files needs: changes - # if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }} + if: ${{ github.ref_type == 'tag' || needs.changes.outputs.images == 'true' }} strategy: matrix: include: @@ -69,7 +69,7 @@ jobs: type=sha,prefix= - name: Build and push image - uses: azimuth-cloud/github-actions/docker-multiarch-build-push@test/docker-build-cache + uses: azimuth-cloud/github-actions/docker-multiarch-build-push@master with: cache-key: ${{ matrix.component }} context: ./web-apps/