n3d1117 · merionum · Jun 10, 2024 · Jun 10, 2024
diff --git a/bot/main.py b/bot/main.py
@@ -4,7 +4,7 @@
 from dotenv import load_dotenv
 
 from plugin_manager import PluginManager
-from openai_helper import OpenAIHelper, default_max_tokens, are_functions_available
+from openai_helper import OpenAIHelper, default_max_tokens, default_temperature, default_penalty, are_functions_available
 from telegram_bot import ChatGPTTelegramBot
 
 
@@ -29,27 +29,29 @@ def main():
     # Setup configurations
     model = os.environ.get('OPENAI_MODEL', 'gpt-3.5-turbo')
     functions_available = are_functions_available(model=model)
-    max_tokens_default = default_max_tokens(model=model)
+    generation_config = {
+        'model': model,
+        'max_tokens': int(os.environ.get('MAX_TOKENS', default_max_tokens(model=model))),
+        'n_choices': int(os.environ.get('N_CHOICES', 1)),
+        'temperature': os.environ.get('TEMPERATURE', default_temperature(model=model)),
+        'presence_penalty': os.environ.get('PRESENCE_PENALTY', default_penalty(model=model)),
+        'frequency_penalty': os.environ.get('FREQUENCY_PENALTY', default_penalty(model=model)),
+    }
     openai_config = {
+        **generation_config,
         'api_key': os.environ['OPENAI_API_KEY'],
         'show_usage': os.environ.get('SHOW_USAGE', 'false').lower() == 'true',
         'stream': os.environ.get('STREAM', 'true').lower() == 'true',
         'proxy': os.environ.get('PROXY', None) or os.environ.get('OPENAI_PROXY', None),
         'max_history_size': int(os.environ.get('MAX_HISTORY_SIZE', 15)),
         'max_conversation_age_minutes': int(os.environ.get('MAX_CONVERSATION_AGE_MINUTES', 180)),
         'assistant_prompt': os.environ.get('ASSISTANT_PROMPT', 'You are a helpful assistant.'),
-        'max_tokens': int(os.environ.get('MAX_TOKENS', max_tokens_default)),
-        'n_choices': int(os.environ.get('N_CHOICES', 1)),
-        'temperature': float(os.environ.get('TEMPERATURE', 1.0)),
         'image_model': os.environ.get('IMAGE_MODEL', 'dall-e-2'),
         'image_quality': os.environ.get('IMAGE_QUALITY', 'standard'),
         'image_style': os.environ.get('IMAGE_STYLE', 'vivid'),
         'image_size': os.environ.get('IMAGE_SIZE', '512x512'),
-        'model': model,
         'enable_functions': os.environ.get('ENABLE_FUNCTIONS', str(functions_available)).lower() == 'true',
         'functions_max_consecutive_calls': int(os.environ.get('FUNCTIONS_MAX_CONSECUTIVE_CALLS', 10)),
-        'presence_penalty': float(os.environ.get('PRESENCE_PENALTY', 0.0)),
-        'frequency_penalty': float(os.environ.get('FREQUENCY_PENALTY', 0.0)),
         'bot_language': os.environ.get('BOT_LANGUAGE', 'en'),
         'show_plugins_used': os.environ.get('SHOW_PLUGINS_USED', 'false').lower() == 'true',
         'whisper_prompt': os.environ.get('WHISPER_PROMPT', ''),

diff --git a/bot/openai_helper.py b/bot/openai_helper.py
@@ -22,14 +22,15 @@
 
 # Models can be found here: https://platform.openai.com/docs/models/overview
 # Models gpt-3.5-turbo-0613 and  gpt-3.5-turbo-16k-0613 will be deprecated on June 13, 2024
+PERPLEXITY_MODELS = ("llama-3-sonar-large-32k-chat",)
 GPT_3_MODELS = ("gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613")
 GPT_3_16K_MODELS = ("gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-1106", "gpt-3.5-turbo-0125")
 GPT_4_MODELS = ("gpt-4", "gpt-4-0314", "gpt-4-0613", "gpt-4-turbo-preview")
 GPT_4_32K_MODELS = ("gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613")
 GPT_4_VISION_MODELS = ("gpt-4-vision-preview",)
 GPT_4_128K_MODELS = ("gpt-4-1106-preview","gpt-4-0125-preview","gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09")
 GPT_4O_MODELS = ("gpt-4o",)
-GPT_ALL_MODELS = GPT_3_MODELS + GPT_3_16K_MODELS + GPT_4_MODELS + GPT_4_32K_MODELS + GPT_4_VISION_MODELS + GPT_4_128K_MODELS + GPT_4O_MODELS
+GPT_ALL_MODELS = GPT_3_MODELS + GPT_3_16K_MODELS + GPT_4_MODELS + GPT_4_32K_MODELS + GPT_4_VISION_MODELS + GPT_4_128K_MODELS + GPT_4O_MODELS + PERPLEXITY_MODELS
 
 def default_max_tokens(model: str) -> int:
     """
@@ -52,9 +53,19 @@ def default_max_tokens(model: str) -> int:
         return 4096
     elif model in GPT_4_128K_MODELS:
         return 4096
-    elif model in GPT_4O_MODELS:
+    elif model in GPT_4O_MODELS + PERPLEXITY_MODELS:
         return 4096
 
+def default_temperature(model: str) -> float:
+    if model in PERPLEXITY_MODELS:
+        return None
+    return 1.0
+
+def default_penalty(model: str) -> float:
+    if model in PERPLEXITY_MODELS:
+        return None
+    return 0.0
+
 
 def are_functions_available(model: str) -> bool:
     """
@@ -71,6 +82,8 @@ def are_functions_available(model: str) -> bool:
         return datetime.date.today() < datetime.date(2024, 6, 13)
     if model == 'gpt-4-vision-preview':
         return False
+    if model in PERPLEXITY_MODELS:
+        return False
     return True
 
 
@@ -110,8 +123,9 @@ def __init__(self, config: dict, plugin_manager: PluginManager):
         :param config: A dictionary containing the GPT configuration
         :param plugin_manager: The plugin manager
         """
+        base_url = "https://api.perplexity.ai" if config["model"] in PERPLEXITY_MODELS else None
         http_client = httpx.AsyncClient(proxies=config['proxy']) if 'proxy' in config else None
-        self.client = openai.AsyncOpenAI(api_key=config['api_key'], http_client=http_client)
+        self.client = openai.AsyncOpenAI(api_key=config['api_key'], http_client=http_client, base_url=base_url)
         self.config = config
         self.plugin_manager = plugin_manager
         self.conversations: dict[int: list] = {}  # {chat_id: history}
@@ -453,7 +467,6 @@ async def __common_get_chat_response_vision(self, chat_id: int, content: list, s
                 'stream': stream
             }
 
-
             # vision model does not yet support functions
 
             # if self.config['enable_functions']:
@@ -636,7 +649,7 @@ def __max_model_tokens(self):
             return base * 31
         if self.config['model'] in GPT_4_128K_MODELS:
             return base * 31
-        if self.config['model'] in GPT_4O_MODELS:
+        if self.config['model'] in GPT_4O_MODELS + PERPLEXITY_MODELS:
             return base * 31
         raise NotImplementedError(
             f"Max tokens for model {self.config['model']} is not implemented yet."
@@ -653,9 +666,9 @@ def __count_tokens(self, messages) -> int:
         try:
             encoding = tiktoken.encoding_for_model(model)
         except KeyError:
-            encoding = tiktoken.get_encoding("gpt-3.5-turbo")
+            encoding = tiktoken.get_encoding("cl100k_base")
 
-        if model in GPT_3_MODELS + GPT_3_16K_MODELS:
+        if model in GPT_3_MODELS + GPT_3_16K_MODELS + PERPLEXITY_MODELS:
             tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
             tokens_per_name = -1  # if there's a name, the role is omitted
         elif model in GPT_4_MODELS + GPT_4_32K_MODELS + GPT_4_VISION_MODELS + GPT_4_128K_MODELS + GPT_4O_MODELS: