Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
[Model Validation] Support Mistral-base-v0.2 (#192)
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhenzhong1 authored Mar 27, 2024
1 parent 1051182 commit 43a87df
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 11 deletions.
13 changes: 6 additions & 7 deletions docs/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Neural Speed supports the following models:
</tr>
<tr>
<td><a href="https://huggingface.co/mistralai/Mistral-7B-v0.1" target="_blank" rel="noopener noreferrer">Mistral-7B</a>,
<a href="https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2" target="_blank" rel="noopener noreferrer">Mistral-7B-Instruct-v0.2</a>,
<a href="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1" target="_blank" rel="noopener noreferrer">Mixtral-8x7B</a></td>
<td>✅</td>
<td>✅</td>
Expand Down Expand Up @@ -402,15 +403,15 @@ Neural Speed supports the following models:
<td></td>
</tr>
<tr>
<td><a href="https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mistral-7B-v0.1-GGUF</a>,
<td><a href="https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mistral-7B-v0.1-GGUF</a>, <a href="https://huggingface.co/TheBloke/Mistral-7B-v0.2-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mistral-7B-v0.2-GGUF</a>,
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td></td>
</tr>
<tr>
<td><a href="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUFF</a>,
<td><a href="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF" target="_blank" rel="noopener noreferrer">TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF</a>
<td>✅</td>
<td>✅</td>
<td>✅</td>
Expand All @@ -425,18 +426,16 @@ Neural Speed supports the following models:
<td>✅</td>
<td></td>
</tr>
</tr>
<tr>
<td><a href="https://huggingface.co/codellama/CodeLlama-7b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-7B-GGUF</a></td>
<td><a href="https://huggingface.co/codellama/CodeLlama-7b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-7B-GGUF</a>,<a href="https://huggingface.co/codellama/CodeLlama-13b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-13B-GGUF</a></td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td></td>
</tr>
</tr>
<tr>
<td><a href="https://huggingface.co/codellama/CodeLlama-13b-hf" target="_blank" rel="noopener noreferrer">TheBloke/CodeLlama-13B-GGUF</a></td>
<td><a href="https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF" target="_blank" rel="noopener noreferrer">Qwen1.5-7B-Chat-GGUF</a></td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
Expand Down Expand Up @@ -470,7 +469,7 @@ Neural Speed supports the following models:
</tr>
<tr>
<td><a href="https://huggingface.co/Qwen/Qwen-7B-Chat" target="_blank" rel="noopener noreferrer">Qwen-7B-Chat</a>,
<a href="https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF" target="_blank" rel="noopener noreferrer">Qwen1.5-7B-Chat-GGUF</a></td>
<a href="https://huggingface.co/Qwen/Qwen1.5-7B-Chat" target="_blank" rel="noopener noreferrer">Qwen1.5-7B-Chat</a></td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
Expand Down
6 changes: 3 additions & 3 deletions neural_speed/models/model_utils/model_files.h
Original file line number Diff line number Diff line change
Expand Up @@ -1138,15 +1138,15 @@ struct model_file_loader {
printf("%-16s %d.hparams.original_max_position_embeddings = %-30d\n", __func__, count++,
hparams.original_max_position_embeddings);
printf("%-16s %d.hparams.use_yarn = %-30d\n", __func__, count++, hparams.use_yarn);
unsigned int total = 25;
unsigned int total = 26;
if (count != total) {
fprintf(stderr, "The number of ne_parameters is wrong.\n");
fprintf(stderr, "The number of ne_parameters is wrong, total = %d, count = %d.\n", total, count);
}
}

void load_ne_vocab() {
unsigned int count = 0;
unsigned int ne_hparams_total = 25;
unsigned int ne_hparams_total = 26;
file.read_raw(&vocab.bos_token_id, sizeof(model_vocab::id));
file.read_raw(&vocab.eos_token_id, sizeof(model_vocab::id));
file.read_raw(&vocab.pad_token_id, sizeof(model_vocab::id));
Expand Down
2 changes: 1 addition & 1 deletion scripts/python_api_example_for_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def main(args_in: Optional[List[str]] = None) -> None:

gguf_path = args.model.as_posix()

prompt = "Once upon a time"
prompt = args.prompt
tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
inputs = tokenizer(prompt, return_tensors="pt").input_ids
streamer = TextStreamer(tokenizer)
Expand Down

0 comments on commit 43a87df

Please sign in to comment.