diff --git a/.gitignore b/.gitignore index b6862d1..84f43a8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ **/*.secret .DS_Store .tox/ +**/.ruff_cache # Ignore local dev helpers test-values.y[a]ml diff --git a/charts/azimuth-llm/values.schema.json b/charts/azimuth-llm/values.schema.json index eeac476..fe5cbff 100644 --- a/charts/azimuth-llm/values.schema.json +++ b/charts/azimuth-llm/values.schema.json @@ -118,9 +118,7 @@ } }, "required": [ - "model_name", - "model_instruction" - ] + "model_name" ] } } } diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index b193c9d..253056c 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -92,8 +92,6 @@ ui: # available configuration options. appSettings: model_name: *model-name - model_instruction: | - You are a helpful AI assistant. Please respond appropriately. # Use local system fonts by default to avoid GDPR issues # with Gradio's defaults fonts which require fetching from # the Google fonts API. To restore default Gradio theme diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py index 8894fef..99bc8d3 100644 --- a/web-apps/chat/app.py +++ b/web-apps/chat/app.py @@ -1,18 +1,14 @@ -import logging import openai - +import utils import gradio as gr from urllib.parse import urljoin from langchain.schema import HumanMessage, AIMessage, SystemMessage from langchain_openai import ChatOpenAI -from typing import Dict, List +from typing import Dict from pydantic import BaseModel, ConfigDict -from utils import LLMParams, load_settings -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) +log = utils.get_logger() class AppSettings(BaseModel): @@ -22,7 +18,7 @@ class AppSettings(BaseModel): model_name: str model_instruction: str page_title: str - llm_params: LLMParams + llm_params: utils.LLMParams # Theme customisation theme_params: Dict[str, str | list] theme_params_extended: Dict[str, str] @@ -32,8 +28,8 @@ class AppSettings(BaseModel): model_config = ConfigDict(protected_namespaces=(), extra="forbid") -settings = AppSettings(**load_settings()) -logger.info(settings) +settings = AppSettings(**utils.load_settings()) +log.info(settings) backend_url = str(settings.backend_url) backend_health_endpoint = urljoin(backend_url, "/health") @@ -82,7 +78,7 @@ def inference(latest_message, history): context.append(HumanMessage(content=human)) context.append(AIMessage(content=(ai or ""))) context.append(HumanMessage(content=latest_message)) - logger.debug("Chat context: %s", context) + log.debug("Chat context: %s", context) response = "" for chunk in llm.stream(context): @@ -104,7 +100,7 @@ def inference(latest_message, history): # https://github.com/openai/openai-python/tree/e8e5a0dc7ccf2db19d7f81991ee0987f9c3ae375?tab=readme-ov-file#handling-errors except openai.BadRequestError as err: - logger.error("Received BadRequestError from backend API: %s", err) + log.error("Received BadRequestError from backend API: %s", err) message = err.response.json()["message"] if INCLUDE_SYSTEM_PROMPT: raise PossibleSystemPromptException() @@ -115,12 +111,12 @@ def inference(latest_message, history): except openai.APIConnectionError as err: if not BACKEND_INITIALISED: - logger.info("Backend API not yet ready") + log.info("Backend API not yet ready") gr.Info( "Backend not ready - model may still be initialising - please try again later." ) else: - logger.error("Failed to connect to backend API: %s", err) + log.error("Failed to connect to backend API: %s", err) gr.Warning("Failed to connect to backend API.") except openai.InternalServerError as err: @@ -130,7 +126,7 @@ def inference(latest_message, history): # Catch-all for unexpected exceptions except Exception as err: - logger.error("Unexpected error during inference: %s", err) + log.error("Unexpected error during inference: %s", err) raise gr.Error("Unexpected error encountered - see logs for details.") @@ -150,7 +146,7 @@ def inference_wrapper(*args): for chunk in inference(*args): yield chunk except PossibleSystemPromptException: - logger.warning("Disabling system prompt and retrying previous request") + log.warning("Disabling system prompt and retrying previous request") INCLUDE_SYSTEM_PROMPT = False for chunk in inference(*args): yield chunk @@ -179,7 +175,7 @@ def inference_wrapper(*args): css=settings.css_overrides, js=settings.custom_javascript, ) -logger.debug("Gradio chat interface config: %s", app.config) +log.debug("Gradio chat interface config: %s", app.config) app.queue( default_concurrency_limit=10, ).launch(server_name=settings.host_address) diff --git a/web-apps/chat/requirements.txt b/web-apps/chat/requirements.txt index a82255b..9e080e4 100644 --- a/web-apps/chat/requirements.txt +++ b/web-apps/chat/requirements.txt @@ -4,4 +4,5 @@ openai langchain langchain_openai pydantic +structlog ../utils diff --git a/web-apps/image-analysis/app.py b/web-apps/image-analysis/app.py index c60f412..3602dc9 100644 --- a/web-apps/image-analysis/app.py +++ b/web-apps/image-analysis/app.py @@ -1,8 +1,8 @@ import base64 +import gradio as gr import logging import requests - -import gradio as gr +import utils from typing import List, Dict from io import BytesIO @@ -10,11 +10,8 @@ from pydantic import BaseModel, ConfigDict from urllib.parse import urljoin -from utils import load_settings, LLMParams -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) +log = utils.get_logger() class PromptExample(BaseModel): @@ -30,7 +27,7 @@ class AppSettings(BaseModel): page_title: str page_description: str examples: List[PromptExample] - llm_params: LLMParams | None + llm_params: utils.LLMParams | None # Theme customisation theme_params: Dict[str, str | list] theme_params_extended: Dict[str, str] @@ -40,8 +37,8 @@ class AppSettings(BaseModel): model_config = ConfigDict(protected_namespaces=(), extra="forbid") -settings = AppSettings(**load_settings()) -logger.info(settings) +settings = AppSettings(**utils.load_settings()) +log.info(settings) # TODO: Rewrite this to stream output? @@ -78,6 +75,7 @@ def analyze_image(image_url, prompt): payload["extra_body"] = { "top_k": settings.llm_params.top_k, } + log.debug("Request payload: %s", payload) # Make the API call to the vision model headers = {"Content-Type": "application/json"} @@ -86,7 +84,16 @@ def analyze_image(image_url, prompt): json=payload, headers=headers, ) - response.raise_for_status() + log.debug("Request payload: %s", payload) + try: + response.raise_for_status() + except Exception as e: + log.debug( + "Received HTTP %s response with content: %s", + response.status_code, + response.json(), + ) + raise e # Extract and return the model's response result = response.json() diff --git a/web-apps/image-analysis/requirements.txt b/web-apps/image-analysis/requirements.txt index 006c6a9..a54cba5 100644 --- a/web-apps/image-analysis/requirements.txt +++ b/web-apps/image-analysis/requirements.txt @@ -3,4 +3,5 @@ requests gradio<5 gradio_client pydantic +structlog ../utils diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py index de299f8..28e7a90 100644 --- a/web-apps/utils/utils.py +++ b/web-apps/utils/utils.py @@ -3,16 +3,29 @@ ##### import logging +import os import pathlib +import structlog import yaml from typing import Annotated from pydantic import BaseModel, ConfigDict, PositiveInt, Field -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) +LOG_LEVELS = { + "debug": logging.DEBUG, + "info": logging.INFO, + "warn": logging.WARN, + "error": logging.ERROR, +} +def get_logger(): + # Allow overwriting log level via env var + log_level = LOG_LEVELS[os.environ.get("PYTHON_GRADIO_LOG_LEVEL", "info").lower()] + structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(log_level)) + return structlog.get_logger() + +log = get_logger() + class LLMParams(BaseModel): """ Parameters for vLLM API requests. For details see @@ -37,7 +50,7 @@ def get_k8s_namespace(): try: current_k8s_namespace = open(NAMESPACE_FILE_PATH).read() return current_k8s_namespace - except FileNotFoundError as err: + except FileNotFoundError: return None @@ -46,7 +59,7 @@ def api_address_in_cluster(): if k8s_ns: return f"http://llm-backend.{k8s_ns}.svc" else: - logger.warning( + log.warning( "Failed to determine k8s namespace from %s - assuming non-kubernetes environment.", NAMESPACE_FILE_PATH, ) @@ -77,7 +90,7 @@ def load_settings() -> dict: # Sanity checks on settings unused_overrides = [k for k in overrides.keys() if k not in defaults.keys()] if unused_overrides: - logger.warning( + log.warning( f"Overrides {unused_overrides} not part of default settings so may be ignored." "Please check for typos" )