Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added basic support for llama-3 model via Perplexity API #600

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions bot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dotenv import load_dotenv

from plugin_manager import PluginManager
from openai_helper import OpenAIHelper, default_max_tokens, are_functions_available
from openai_helper import OpenAIHelper, default_max_tokens, default_temperature, default_penalty, are_functions_available
from telegram_bot import ChatGPTTelegramBot


Expand All @@ -29,27 +29,29 @@ def main():
# Setup configurations
model = os.environ.get('OPENAI_MODEL', 'gpt-3.5-turbo')
functions_available = are_functions_available(model=model)
max_tokens_default = default_max_tokens(model=model)
generation_config = {
'model': model,
'max_tokens': int(os.environ.get('MAX_TOKENS', default_max_tokens(model=model))),
'n_choices': int(os.environ.get('N_CHOICES', 1)),
'temperature': os.environ.get('TEMPERATURE', default_temperature(model=model)),
'presence_penalty': os.environ.get('PRESENCE_PENALTY', default_penalty(model=model)),
'frequency_penalty': os.environ.get('FREQUENCY_PENALTY', default_penalty(model=model)),
}
openai_config = {
**generation_config,
'api_key': os.environ['OPENAI_API_KEY'],
'show_usage': os.environ.get('SHOW_USAGE', 'false').lower() == 'true',
'stream': os.environ.get('STREAM', 'true').lower() == 'true',
'proxy': os.environ.get('PROXY', None) or os.environ.get('OPENAI_PROXY', None),
'max_history_size': int(os.environ.get('MAX_HISTORY_SIZE', 15)),
'max_conversation_age_minutes': int(os.environ.get('MAX_CONVERSATION_AGE_MINUTES', 180)),
'assistant_prompt': os.environ.get('ASSISTANT_PROMPT', 'You are a helpful assistant.'),
'max_tokens': int(os.environ.get('MAX_TOKENS', max_tokens_default)),
'n_choices': int(os.environ.get('N_CHOICES', 1)),
'temperature': float(os.environ.get('TEMPERATURE', 1.0)),
'image_model': os.environ.get('IMAGE_MODEL', 'dall-e-2'),
'image_quality': os.environ.get('IMAGE_QUALITY', 'standard'),
'image_style': os.environ.get('IMAGE_STYLE', 'vivid'),
'image_size': os.environ.get('IMAGE_SIZE', '512x512'),
'model': model,
'enable_functions': os.environ.get('ENABLE_FUNCTIONS', str(functions_available)).lower() == 'true',
'functions_max_consecutive_calls': int(os.environ.get('FUNCTIONS_MAX_CONSECUTIVE_CALLS', 10)),
'presence_penalty': float(os.environ.get('PRESENCE_PENALTY', 0.0)),
'frequency_penalty': float(os.environ.get('FREQUENCY_PENALTY', 0.0)),
'bot_language': os.environ.get('BOT_LANGUAGE', 'en'),
'show_plugins_used': os.environ.get('SHOW_PLUGINS_USED', 'false').lower() == 'true',
'whisper_prompt': os.environ.get('WHISPER_PROMPT', ''),
Expand Down
27 changes: 20 additions & 7 deletions bot/openai_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@

# Models can be found here: https://platform.openai.com/docs/models/overview
# Models gpt-3.5-turbo-0613 and gpt-3.5-turbo-16k-0613 will be deprecated on June 13, 2024
PERPLEXITY_MODELS = ("llama-3-sonar-large-32k-chat",)
GPT_3_MODELS = ("gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613")
GPT_3_16K_MODELS = ("gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-0125")
GPT_4_MODELS = ("gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-turbo-preview")
GPT_4_32K_MODELS = ("gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613")
GPT_4_VISION_MODELS = ("gpt-4-vision-preview",)
GPT_4_128K_MODELS = ("gpt-4-1106-preview","gpt-4-0125-preview","gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09")
GPT_4O_MODELS = ("gpt-4o",)
GPT_ALL_MODELS = GPT_3_MODELS + GPT_3_16K_MODELS + GPT_4_MODELS + GPT_4_32K_MODELS + GPT_4_VISION_MODELS + GPT_4_128K_MODELS + GPT_4O_MODELS
GPT_ALL_MODELS = GPT_3_MODELS + GPT_3_16K_MODELS + GPT_4_MODELS + GPT_4_32K_MODELS + GPT_4_VISION_MODELS + GPT_4_128K_MODELS + GPT_4O_MODELS + PERPLEXITY_MODELS

def default_max_tokens(model: str) -> int:
"""
Expand All @@ -52,9 +53,19 @@ def default_max_tokens(model: str) -> int:
return 4096
elif model in GPT_4_128K_MODELS:
return 4096
elif model in GPT_4O_MODELS:
elif model in GPT_4O_MODELS + PERPLEXITY_MODELS:
return 4096

def default_temperature(model: str) -> float:
if model in PERPLEXITY_MODELS:
return None
return 1.0

def default_penalty(model: str) -> float:
if model in PERPLEXITY_MODELS:
return None
return 0.0


def are_functions_available(model: str) -> bool:
"""
Expand All @@ -71,6 +82,8 @@ def are_functions_available(model: str) -> bool:
return datetime.date.today() < datetime.date(2024, 6, 13)
if model == 'gpt-4-vision-preview':
return False
if model in PERPLEXITY_MODELS:
return False
return True


Expand Down Expand Up @@ -110,8 +123,9 @@ def __init__(self, config: dict, plugin_manager: PluginManager):
:param config: A dictionary containing the GPT configuration
:param plugin_manager: The plugin manager
"""
base_url = "https://api.perplexity.ai" if config["model"] in PERPLEXITY_MODELS else None
http_client = httpx.AsyncClient(proxies=config['proxy']) if 'proxy' in config else None
self.client = openai.AsyncOpenAI(api_key=config['api_key'], http_client=http_client)
self.client = openai.AsyncOpenAI(api_key=config['api_key'], http_client=http_client, base_url=base_url)
self.config = config
self.plugin_manager = plugin_manager
self.conversations: dict[int: list] = {} # {chat_id: history}
Expand Down Expand Up @@ -453,7 +467,6 @@ async def __common_get_chat_response_vision(self, chat_id: int, content: list, s
'stream': stream
}


# vision model does not yet support functions

# if self.config['enable_functions']:
Expand Down Expand Up @@ -636,7 +649,7 @@ def __max_model_tokens(self):
return base * 31
if self.config['model'] in GPT_4_128K_MODELS:
return base * 31
if self.config['model'] in GPT_4O_MODELS:
if self.config['model'] in GPT_4O_MODELS + PERPLEXITY_MODELS:
return base * 31
raise NotImplementedError(
f"Max tokens for model {self.config['model']} is not implemented yet."
Expand All @@ -653,9 +666,9 @@ def __count_tokens(self, messages) -> int:
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("gpt-3.5-turbo")
encoding = tiktoken.get_encoding("cl100k_base")

if model in GPT_3_MODELS + GPT_3_16K_MODELS:
if model in GPT_3_MODELS + GPT_3_16K_MODELS + PERPLEXITY_MODELS:
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif model in GPT_4_MODELS + GPT_4_32K_MODELS + GPT_4_VISION_MODELS + GPT_4_128K_MODELS + GPT_4O_MODELS:
Expand Down