From 2d63efdd86452b91fad4331fb21df61766aa420b Mon Sep 17 00:00:00 2001 From: hahuyhoang411 Date: Fri, 15 Dec 2023 16:35:00 +0700 Subject: [PATCH 1/2] fix(models): update ctxlen + maxlength to 4096 --- models/capybara-34b/model.json | 4 ++-- models/deepseek-coder-1.3b/model.json | 4 ++-- models/deepseek-coder-34b/model.json | 4 ++-- models/llama2-chat-70b-q4/model.json | 4 ++-- models/llama2-chat-7b-q4/model.json | 4 ++-- models/lzlv-70b/model.json | 4 ++-- models/mistral-ins-7b-q4/model.json | 4 ++-- models/mixtral-8x7b-instruct/model.json | 4 ++-- models/noromaid-20b/model.json | 4 ++-- models/openhermes-neural-7b/model.json | 4 ++-- models/pandora-10.7b-v1/model.json | 4 ++-- models/phind-34b/model.json | 4 ++-- models/solar-10.7b-instruct/model.json | 4 ++-- models/solar-10.7b-slerp/model.json | 4 ++-- models/starling-7b/model.json | 4 ++-- models/tinyllama-1.1b/model.json | 4 ++-- models/trinity-v1-7b/model.json | 4 ++-- models/wizardcoder-13b/model.json | 4 ++-- models/yi-34b/model.json | 4 ++-- 19 files changed, 38 insertions(+), 38 deletions(-) diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json index e337191a91..366d7e968c 100644 --- a/models/capybara-34b/model.json +++ b/models/capybara-34b/model.json @@ -7,11 +7,11 @@ "description": "Nous Capybara 34B, a variant of the Yi-34B model, is the first Nous model with a 200K context length, trained for three epochs on the innovative Capybara dataset.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "USER:\n{prompt}\nASSISTANT:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "NousResearch, The Bloke", diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json index 77fa2d1f77..5ddd56ebbd 100644 --- a/models/deepseek-coder-1.3b/model.json +++ b/models/deepseek-coder-1.3b/model.json @@ -8,11 +8,11 @@ "description": "Deepseek Coder trained on 2T tokens (87% code, 13% English/Chinese), excelling in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "### Instruction:\n{prompt}\n### Response:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Deepseek, The Bloke", diff --git a/models/deepseek-coder-34b/model.json b/models/deepseek-coder-34b/model.json index 80b224691b..e68ec03347 100644 --- a/models/deepseek-coder-34b/model.json +++ b/models/deepseek-coder-34b/model.json @@ -7,11 +7,11 @@ "description": "Deepseek Coder trained on 2T tokens (87% code, 13% English/Chinese), excelling in project-level code completion with advanced capabilities across multiple programming languages.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "### Instruction:\n{prompt}\n### Response:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Deepseek, The Bloke", diff --git a/models/llama2-chat-70b-q4/model.json b/models/llama2-chat-70b-q4/model.json index abe44ddad5..6fe68f6fd3 100644 --- a/models/llama2-chat-70b-q4/model.json +++ b/models/llama2-chat-70b-q4/model.json @@ -7,11 +7,11 @@ "description": "This is a 4-bit quantized version of Meta AI's Llama 2 Chat 70b model.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "MetaAI, The Bloke", diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json index 9ec6014296..bf291a3867 100644 --- a/models/llama2-chat-7b-q4/model.json +++ b/models/llama2-chat-7b-q4/model.json @@ -7,11 +7,11 @@ "description": "This is a 4-bit quantized iteration of Meta AI's Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "MetaAI, The Bloke", diff --git a/models/lzlv-70b/model.json b/models/lzlv-70b/model.json index cb2387432a..bae7373287 100644 --- a/models/lzlv-70b/model.json +++ b/models/lzlv-70b/model.json @@ -7,11 +7,11 @@ "description": "lzlv_70B is a sophisticated AI model designed for roleplaying and creative tasks. This merge aims to combine intelligence with creativity, seemingly outperforming its individual components in complex scenarios and creative outputs.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "USER:\n{prompt}\nASSISTANT:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Lizpreciatior, The Bloke", diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json index 9f2c22a03e..39427152b5 100644 --- a/models/mistral-ins-7b-q4/model.json +++ b/models/mistral-ins-7b-q4/model.json @@ -7,14 +7,14 @@ "description": "This is a 4-bit quantized iteration of MistralAI's Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "system_prompt": "", "user_prompt": "[INST]", "ai_prompt": "[/INST]", "prompt_template": "[INST]{prompt}\n[/INST]" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "MistralAI, The Bloke", diff --git a/models/mixtral-8x7b-instruct/model.json b/models/mixtral-8x7b-instruct/model.json index 060692cb8d..e910c94ab7 100644 --- a/models/mixtral-8x7b-instruct/model.json +++ b/models/mixtral-8x7b-instruct/model.json @@ -7,11 +7,11 @@ "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms Llama 2 70B on most benchmarks we tested.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "[INST] {prompt} [/INST]" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "MistralAI, TheBloke", diff --git a/models/noromaid-20b/model.json b/models/noromaid-20b/model.json index 961c4bd25f..ea33f3ca14 100644 --- a/models/noromaid-20b/model.json +++ b/models/noromaid-20b/model.json @@ -7,11 +7,11 @@ "description": "The Noromaid 20b model is designed for role-playing and general use, featuring a unique touch with the no_robots dataset that enhances human-like behavior.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "### Instruction:{prompt}\n### Response:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "NeverSleep, The Bloke", diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json index 168df7a891..636916d9e5 100644 --- a/models/openhermes-neural-7b/model.json +++ b/models/openhermes-neural-7b/model.json @@ -7,11 +7,11 @@ "description": "OpenHermes Neural is a merged model using the TIES method.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Intel, Jan", diff --git a/models/pandora-10.7b-v1/model.json b/models/pandora-10.7b-v1/model.json index 9414144232..411b837f19 100644 --- a/models/pandora-10.7b-v1/model.json +++ b/models/pandora-10.7b-v1/model.json @@ -7,11 +7,11 @@ "description": "Pandora, our research model, employs the Passthrough merging technique to merge 2x7B models into 1.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Jan", diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json index d3fc6664ab..3d7c326f86 100644 --- a/models/phind-34b/model.json +++ b/models/phind-34b/model.json @@ -7,11 +7,11 @@ "description": "Phind-CodeLlama-34B-v2 is an AI model fine-tuned on 1.5B tokens of high-quality programming data. It's a SOTA open-source model in coding. This multi-lingual model excels in various programming languages, including Python, C/C++, TypeScript, Java, and is designed to be steerable and user-friendly.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Phind, The Bloke", diff --git a/models/solar-10.7b-instruct/model.json b/models/solar-10.7b-instruct/model.json index 0fc2e433ae..0416a37c0a 100644 --- a/models/solar-10.7b-instruct/model.json +++ b/models/solar-10.7b-instruct/model.json @@ -7,11 +7,11 @@ "description": "SOLAR-10.7B model built on the Llama2 architecture with Depth Up-Scaling and integrated Mistral 7B weights. Its robustness and adaptability make it ideal for fine-tuning applications, significantly enhancing performance with simple instruction-based techniques.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "### User: {prompt}\n### Assistant:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Upstage, Jan", diff --git a/models/solar-10.7b-slerp/model.json b/models/solar-10.7b-slerp/model.json index 16a91afb54..a3ca7d93f7 100644 --- a/models/solar-10.7b-slerp/model.json +++ b/models/solar-10.7b-slerp/model.json @@ -7,11 +7,11 @@ "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "### User: {prompt}\n### Assistant:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Jan", diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json index cdb06c2c72..e34b369f2d 100644 --- a/models/starling-7b/model.json +++ b/models/starling-7b/model.json @@ -7,11 +7,11 @@ "description": "Starling-RM-7B-alpha is a language model finetuned with Reinforcement Learning from AI Feedback from Openchat 3.5. It stands out for its impressive performance using GPT-4 as a judge, making it one of the top-performing models in its category.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Berkeley-nest, The Bloke", diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index d924be0460..3dd5bcecf9 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -7,11 +7,11 @@ "description": "The TinyLlama project, featuring a 1.1B parameter Llama model, is pretrained on an expansive 3 trillion token dataset. Its design ensures easy integration with various Llama-based open-source projects. Despite its smaller size, it efficiently utilizes lower computational and memory resources, drawing on GPT-4's analytical prowess to enhance its conversational abilities and versatility.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "TinyLlama", diff --git a/models/trinity-v1-7b/model.json b/models/trinity-v1-7b/model.json index 345eff9557..67272df72d 100644 --- a/models/trinity-v1-7b/model.json +++ b/models/trinity-v1-7b/model.json @@ -7,11 +7,11 @@ "description": "Trinity is an experimental model merge of GreenNodeLM & LeoScorpius using the Slerp method. Recommended for daily assistance purposes.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "Jan", diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json index 774c1a4241..77bf7050be 100644 --- a/models/wizardcoder-13b/model.json +++ b/models/wizardcoder-13b/model.json @@ -7,11 +7,11 @@ "description": "WizardCoder-Python-13B is a Python coding model major models like ChatGPT-3.5. This model based on the Llama2 architecture, demonstrate high proficiency in specific domains like coding and mathematics.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "### Instruction:\n{prompt}\n### Response:" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "WizardLM, The Bloke", diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json index 6a570b4de4..3fa06efed0 100644 --- a/models/yi-34b/model.json +++ b/models/yi-34b/model.json @@ -7,11 +7,11 @@ "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.", "format": "gguf", "settings": { - "ctx_len": 2048, + "ctx_len": 4096, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" }, "parameters": { - "max_tokens": 2048 + "max_tokens": 4096 }, "metadata": { "author": "01-ai, The Bloke", From a09a486906931ad477c322aaf7b0cfe09af62d18 Mon Sep 17 00:00:00 2001 From: hahuyhoang411 Date: Fri, 15 Dec 2023 16:36:39 +0700 Subject: [PATCH 2/2] fix(tinyllama): adjust ctx+maxtok --- models/tinyllama-1.1b/model.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index 3dd5bcecf9..d924be0460 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -7,11 +7,11 @@ "description": "The TinyLlama project, featuring a 1.1B parameter Llama model, is pretrained on an expansive 3 trillion token dataset. Its design ensures easy integration with various Llama-based open-source projects. Despite its smaller size, it efficiently utilizes lower computational and memory resources, drawing on GPT-4's analytical prowess to enhance its conversational abilities and versatility.", "format": "gguf", "settings": { - "ctx_len": 4096, + "ctx_len": 2048, "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>" }, "parameters": { - "max_tokens": 4096 + "max_tokens": 2048 }, "metadata": { "author": "TinyLlama",