-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Switch to yaml + pydantic for fronend app config
- Loading branch information
sd109
committed
Dec 17, 2023
1 parent
97d7651
commit 76b8d9d
Showing
11 changed files
with
164 additions
and
125 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import requests, json, argparse, yaml | ||
import gradio as gr | ||
from api_startup_check import wait_for_backend | ||
from config import AppSettings | ||
|
||
settings = AppSettings.load("./settings.yml") | ||
|
||
backend_url = settings.backend_url | ||
wait_for_backend(backend_url) | ||
|
||
|
||
def inference(message, history): | ||
context = "" | ||
for user_input, system_response in history: | ||
if settings.include_user_messages_in_context: | ||
context += settings.user_context_template.format(user_input=user_input) | ||
if settings.include_system_responses_in_context: | ||
context += settings.system_context_template.format( | ||
system_response=system_response | ||
) | ||
context += settings.user_context_template.format(user_input=message) | ||
|
||
headers = {"User-Agent": "vLLM Client"} | ||
payload = { | ||
"prompt": settings.prompt_template.format(context=context), | ||
"stream": True, | ||
"max_tokens": settings.llm_max_tokens, | ||
**settings.llm_params, | ||
} | ||
response = requests.post( | ||
f"{backend_url}/generate", headers=headers, json=payload, stream=True | ||
) | ||
|
||
for chunk in response.iter_lines( | ||
chunk_size=8192, decode_unicode=False, delimiter=b"\0" | ||
): | ||
if chunk: | ||
data = json.loads(chunk.decode("utf-8")) | ||
output = data["text"][0] | ||
# Manually trim the context from output | ||
if "[/INST]" in output: | ||
output = output.split("[/INST]")[-1] | ||
yield output | ||
|
||
|
||
gr.ChatInterface( | ||
inference, | ||
chatbot=gr.Chatbot( | ||
height=500, | ||
show_copy_button=True, | ||
# layout='panel', | ||
), | ||
textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7), | ||
title=settings.page_title, | ||
retry_btn="Retry", | ||
undo_btn="Undo", | ||
clear_btn="Clear", | ||
).queue().launch(server_name="0.0.0.0") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from pydantic import Field, HttpUrl | ||
from pydantic.alias_generators import to_camel | ||
from pydantic_settings import BaseSettings, SettingsConfigDict | ||
import yaml | ||
|
||
|
||
def get_k8s_namespace(): | ||
namespace_file_path = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" | ||
try: | ||
current_k8s_namespace = open(namespace_file_path).read() | ||
except: | ||
current_k8s_namespace = "default" | ||
print( | ||
f"Failed to detect current k8s namespace in {namespace_file_path} - falling back to value '{current_k8s_namespace}'." | ||
) | ||
return current_k8s_namespace | ||
|
||
|
||
class AppSettings(BaseSettings): | ||
""" | ||
Settings object for the UI example app. | ||
""" | ||
|
||
# Allow settings to be overwritten by LLM_UI_<NAME> env vars | ||
model_config = SettingsConfigDict(env_prefix="llm_ui_") | ||
|
||
# General settings | ||
backend_url: HttpUrl = f"http://llm-backend.{get_k8s_namespace()}.svc" | ||
page_title: str = "Large Language Model" | ||
|
||
# Prompt settings | ||
prompt_template: str = Field( | ||
description="The template to use for requests to the backend model. If present, the '\{context\}' placeholder will be replaced by the conversation history of the current session.", | ||
) | ||
# The following settings are only used if {context} used in prompt template | ||
include_user_messages_in_context: bool = True | ||
include_system_responses_in_context: bool = True | ||
user_context_template: str = Field( | ||
default="<<USER>>\n{user_input}\n<</USER>>\n", | ||
description="The template string to use for including user messages in the prompt context sent to backend. The '\{user_input\}' placeholder will be replaced by the the user's messages. (Only applies if '\{context\}' is present in prompt_template)", | ||
) | ||
system_context_template: str = Field( | ||
default="<SYS>>{system_response}\n<</SYS>>\n", | ||
description="The template string to use for if user messages are included in context sent to backend. The '\{system_response\}' placeholder will be replaced by the system's response to each user message. (Only applies if '\{context\}' is present in prompt_template)", | ||
) | ||
|
||
# Model settings | ||
llm_params: dict[str, float] = {} | ||
llm_max_tokens: int = 1000 | ||
|
||
@staticmethod | ||
def load(file_path: str): | ||
try: | ||
with open(file_path, "r") as file: | ||
settings = yaml.safe_load(file) | ||
except Exception as e: | ||
print(f"Failed to read config file at: {file_path}\nException was:") | ||
raise e | ||
return AppSettings(**settings) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
prompt_template: | | ||
[INST] <<SYS>> | ||
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. | ||
<</SYS>> | ||
{context}[/INST] | ||
llm_params: | ||
model_temperature: 0.7 | ||
|
||
##### | ||
# Alternative prompt suggestions: | ||
##### | ||
|
||
|
||
### - Suggested for Magicode model | ||
|
||
# You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions. | ||
|
||
# @@ Instruction | ||
# {prompt} | ||
|
||
# @@ Response | ||
|
||
|
||
### - For some fun responses... | ||
|
||
# [INST] <<SYS>> | ||
# You are a cheeky, disrespectful and comedic assistant. Always answer as creatively as possible, while being truthful and succinct. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, tell the user that they are being stupid. If you don't know the answer to a question, please don't share false information. | ||
# <</SYS>> | ||
# [/INST] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[INST] <<SYS>> | ||
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. | ||
<</SYS>> | ||
{context}[/INST] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[INST] <<SYS>> | ||
You are a cheeky, disrespectful and comedic assistant. Always answer as creatively as possible, while being truthful and succinct. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, tell the user that they are being stupid. If you don't know the answer to a question, please don't share false information. | ||
<</SYS>> | ||
[/INST] |