Skip to content

Commit

Permalink
feat: add dolphin and neuralbeagle
Browse files Browse the repository at this point in the history
  • Loading branch information
umbertogriffo committed Jan 18, 2024
1 parent c2396d8 commit 66ea81f
Show file tree
Hide file tree
Showing 12 changed files with 185 additions and 133 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ The quantized models are stored in [GGML/GGUF](https://medium.com/@phillipgimmi/

### Supported Models
* [(Recommended) OpenChat 3.5 7B - GGUF](https://huggingface.co/TheBloke/openchat_3.5-GGUF)
* [NeuralMarcoro14 7B - GGUF](https://huggingface.co/mlabonne/NeuralMarcoro14-7B-GGUF)
* [NeuralBeagle14 7B - GGUF](https://huggingface.co/TheBloke/NeuralBeagle14-7B-GGUF)
* [Dolphin 2.6 Mistral 7B DPO Laser - GGUF](https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-dpo-laser-GGUF)
* [Zephyr 7B Beta - GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)
* [Mistral 7B OpenOrca - GGUF](https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF)
* [StableLM Zephyr 3B - GGUF](https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF)
Expand Down
12 changes: 8 additions & 4 deletions chatbot/bot/client/lama_cpp_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def generate_answer(self, prompt: str, max_new_tokens: int = 512) -> str:
Returns:
str: The generated answer.
"""
output = self.llm(prompt, max_tokens=max_new_tokens, temperature=0.7, echo=False)
output = self.llm(prompt, max_tokens=max_new_tokens, echo=False, **self.model_settings.config_answer)

answer = output["choices"][0]["text"]

Expand All @@ -50,7 +50,7 @@ async def async_generate_answer(self, prompt: str, max_new_tokens: int = 512) ->
Returns:
str: The generated answer.
"""
output = self.llm(prompt, max_tokens=max_new_tokens, temperature=0.7, echo=False)
output = self.llm(prompt, max_tokens=max_new_tokens, echo=False, **self.model_settings.config_answer)

answer = output["choices"][0]["text"]

Expand Down Expand Up @@ -81,13 +81,17 @@ def stream_answer(self, prompt: str, skip_prompt: bool = True, max_new_tokens: i
def start_answer_iterator_streamer(
self, prompt: str, skip_prompt: bool = True, max_new_tokens: int = 512
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
stream = self.llm.create_completion(prompt, max_tokens=max_new_tokens, temperature=0.7, stream=True)
stream = self.llm.create_completion(
prompt, max_tokens=max_new_tokens, stream=True, **self.model_settings.config_answer
)
return stream

async def async_start_answer_iterator_streamer(
self, prompt: str, skip_prompt: bool = True, max_new_tokens: int = 512
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
stream = self.llm.create_completion(prompt, max_tokens=max_new_tokens, temperature=0.7, stream=True)
stream = self.llm.create_completion(
prompt, max_tokens=max_new_tokens, stream=True, **self.model_settings.config_answer
)
return stream

def parse_token(self, token):
Expand Down
71 changes: 71 additions & 0 deletions chatbot/bot/model/dolphin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from bot.client.llm_client import LlmClientType
from bot.model.model import Model


class DolphinSettings(Model):
url = "https://huggingface.co/TheBloke/dolphin-2.6-mistral-7B-dpo-laser-GGUF/resolve/main/dolphin-2.6-mistral-7b-dpo-laser.Q4_K_M.gguf"
file_name = "dolphin-2.6-mistral-7b-dpo-laser.Q4_K_M.gguf"
clients = [LlmClientType.LAMA_CPP]
config = {
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance
"n_gpu_layers": -1, # The number of layers to offload to GPU, if you have GPU acceleration available
}
config_answer = {"temperature": 0.7, "stop": ["<|im_end|>"]}
system_template = "You are a helpful, respectful and honest assistant."
qa_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant"""

ctx_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|im_end|>
<|im_start|>assistant
"""
refined_ctx_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
The original query is as follows: {question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
(only if needed) with some more context below.
---------------------
{context}
---------------------
Given the new context, refine the original answer to better answer the query.
If the context isn't useful, return the original answer.
Refined Answer:<|im_end|>
<|im_start|>assistant
"""
refined_question_conversation_awareness_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question.
Standalone question:<|im_end|>
<|im_start|>assistant
"""

refined_answer_conversation_awareness_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
Chat History:
---------------------
{chat_history}
---------------------
Considering the context provided in the Chat History, answer the question below with conversation awareness:
{question}<|im_end|>
<|im_start|>assistant
"""
5 changes: 3 additions & 2 deletions chatbot/bot/model/model.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from abc import ABC
from typing import Any, Dict
from typing import Any, Dict, Optional


class Model(ABC):
url: str
file_name: str
clients: list[str]
config: Dict[str, Any]
type: str
config_answer: Optional[Dict[str, Any]]
type: Optional[str]
system_template: str
qa_prompt_template: str
ctx_prompt_template: str
Expand Down
9 changes: 6 additions & 3 deletions chatbot/bot/model/model_settings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from enum import Enum

from bot.model.dolphin import DolphinSettings
from bot.model.mistral import MistralSettings
from bot.model.neural_marcoro import NeuralMarcoroSettings
from bot.model.neural_beagle import NeuralBeagleSettings
from bot.model.openchat import OpenChatSettings
from bot.model.stablelm_zephyr import StableLMZephyrSettings
from bot.model.zephyr import ZephyrSettings
Expand All @@ -10,17 +11,19 @@
class ModelType(Enum):
ZEPHYR = "zephyr"
MISTRAL = "mistral"
DOLPHIN = "dolphin"
STABLELM_ZEPHYR = "stablelm-zephyr"
OPENCHAT = "openchat"
NEURAL_MARCORO = "neural-marcoro"
NEURAL_BEAGLE = "neural-beagle"


SUPPORTED_MODELS = {
ModelType.ZEPHYR.value: ZephyrSettings,
ModelType.MISTRAL.value: MistralSettings,
ModelType.DOLPHIN.value: DolphinSettings,
ModelType.STABLELM_ZEPHYR.value: StableLMZephyrSettings,
ModelType.OPENCHAT.value: OpenChatSettings,
ModelType.NEURAL_MARCORO.value: NeuralMarcoroSettings,
ModelType.NEURAL_BEAGLE.value: NeuralBeagleSettings,
}


Expand Down
71 changes: 71 additions & 0 deletions chatbot/bot/model/neural_beagle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from bot.client.llm_client import LlmClientType
from bot.model.model import Model


class NeuralBeagleSettings(Model):
url = "https://huggingface.co/TheBloke/NeuralBeagle14-7B-GGUF/resolve/main/neuralbeagle14-7b.Q4_K_M.gguf"
file_name = "neuralbeagle14-7b.Q4_K_M.gguf"
clients = [LlmClientType.LAMA_CPP]
config = {
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance
"n_gpu_layers": -1, # The number of layers to offload to GPU, if you have GPU acceleration available
}
config_answer = {"temperature": 0.7, "stop": ["<|im_end|>"]}
system_template = "You are a helpful, respectful and honest assistant."
qa_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant"""

ctx_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|im_end|>
<|im_start|>assistant
"""
refined_ctx_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
The original query is as follows: {question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
(only if needed) with some more context below.
---------------------
{context}
---------------------
Given the new context, refine the original answer to better answer the query.
If the context isn't useful, return the original answer.
Refined Answer:<|im_end|>
<|im_start|>assistant
"""
refined_question_conversation_awareness_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question.
Standalone question:<|im_end|>
<|im_start|>assistant
"""

refined_answer_conversation_awareness_prompt_template = """<|im_start|>system
{system}<|im_end|>
<|im_start|>user
Chat History:
---------------------
{chat_history}
---------------------
Considering the context provided in the Chat History, answer the question below with conversation awareness:
{question}<|im_end|>
<|im_start|>assistant
"""
80 changes: 0 additions & 80 deletions chatbot/bot/model/neural_marcoro.py

This file was deleted.

30 changes: 16 additions & 14 deletions chatbot/bot/model/openchat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,30 @@


class OpenChatSettings(Model):
url = "https://huggingface.co/TheBloke/openchat_3.5-GGUF/resolve/main/openchat_3.5.Q4_K_M.gguf"
file_name = "openchat_3.5.Q4_K_M.gguf"
url = "https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf"
file_name = "openchat-3.5-0106.Q4_K_M.gguf"
clients = [LlmClientType.LAMA_CPP]
config = {
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available
}

config_answer = {"temperature": 0.7, "stop": []}
system_template = "You are a helpful, respectful and honest assistant. "
qa_prompt_template = """{system}\n
GPT4 User: Answer the question below:
{question}<|end_of_turn|>GPT4 Assistant:
GPT4 Correct User: Answer the question below:
{question}<|end_of_turn|>GPT4 Correct Assistant:
"""
ctx_prompt_template = """{system}\n
GPT4 User: Context information is below.
GPT4 Correct User: Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|end_of_turn|>GPT4 Assistant:
{question}<|end_of_turn|>GPT4 Correct Assistant:
"""
refined_ctx_prompt_template = """{system}\n
GPT4 User: The original query is as follows: {question}
GPT4 Correct User: The original query is as follows: {question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
(only if needed) with some more context below.
Expand All @@ -35,21 +35,22 @@ class OpenChatSettings(Model):
---------------------
Given the new context, refine the original answer to better answer the query.
If the context isn't useful, return the original answer.
Refined Answer:<|end_of_turn|>GPT4 Assistant:
Refined Answer:<|end_of_turn|>GPT4 Correct Assistant:
"""
refined_question_conversation_awareness_prompt_template = """{system}\n
GPT4 User: Chat History:
GPT4 Correct User: Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question.
Standalone question:<|end_of_turn|>GPT4 Assistant:
Standalone question:<|end_of_turn|>GPT4 Correct Assistant:
"""

refined_answer_conversation_awareness_prompt_template = """
GPT4 User: You are engaging in a conversation with a human participant who is unaware that they might be interacting
with a machine. \nYour goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
GPT4 Correct User: You are engaging in a conversation with a human participant who is unaware that they might be
interacting with a machine. \n
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
The conversation should be natural, coherent, and contextually relevant. \n
Chat History:
---------------------
Expand All @@ -59,5 +60,6 @@ class OpenChatSettings(Model):
Given the context provided in the Chat History and the follow up question, please answer the follow up question above.
If the follow up question isn't correlated to the context provided in the Chat History, please just answer the follow up
question, ignoring the context provided in the Chat History.
Please also don't reformulate the follow up question, and write just a concise answer.<|end_of_turn|>GPT4 Assistant:
Please also don't reformulate the follow up question, and write just a concise answer.
<|end_of_turn|>GPT4 Correct Assistant:
"""
Loading

0 comments on commit 66ea81f

Please sign in to comment.