From 21d88dd5a49fd348b6ae85267b01154152efc02c Mon Sep 17 00:00:00 2001 From: rchan Date: Fri, 8 Sep 2023 18:16:15 +0100 Subject: [PATCH 1/4] llama2 chat with context and react demo --- models/llama-index-hack/Untitled.ipynb | 2682 +++++++++++ models/llama-index-hack/Untitled1.ipynb | 33 + models/llama-index-hack/llama2_ccp_chat.ipynb | 3991 +++++++++++------ 3 files changed, 5345 insertions(+), 1361 deletions(-) create mode 100644 models/llama-index-hack/Untitled.ipynb create mode 100644 models/llama-index-hack/Untitled1.ipynb diff --git a/models/llama-index-hack/Untitled.ipynb b/models/llama-index-hack/Untitled.ipynb new file mode 100644 index 00000000..58f1a0ea --- /dev/null +++ b/models/llama-index-hack/Untitled.ipynb @@ -0,0 +1,2682 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "4efa0972", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", + "\n", + "from llama_cpp import Llama\n", + "\n", + "from llama_index import (\n", + " SimpleDirectoryReader,\n", + " LangchainEmbedding,\n", + " VectorStoreIndex,\n", + " PromptHelper,\n", + " LLMPredictor,\n", + " ServiceContext,\n", + " Document\n", + ")\n", + "from llama_index.llms import LlamaCPP\n", + "from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "39695618", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.8.21'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import llama_index\n", + "llama_index.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2138e968", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/rchan/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index_proper/llama_index/__init__.py'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llama_index.__file__" + ] + }, + { + "cell_type": "markdown", + "id": "d8b085fc", + "metadata": {}, + "source": [ + "Note: notebook assumes that in the reginald directory, there is a `gguf_models/` folder. Here we've downloaded the quantized 4-bit version of Llama2-13b-chat from [`TheBloke/Llama-2-13B-chat-GGML`](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML). \n", + "\n", + "Note that we're currently running a version of `llama-cpp-python` which no longer supports `ggmmlv3` model formats and has changed to `gguf`. We need to convert the above to `gguf` format using the `convert-llama-ggmlv3-to-gguf.py` script in [`llama.cpp`](https://github.com/ggerganov/llama.cpp).\n", + "\n", + "## Quick example with llama-cpp-python" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3044b6b9", + "metadata": {}, + "outputs": [], + "source": [ + "llama_2_13b_chat_path = \"../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin\"" + ] + }, + { + "cell_type": "markdown", + "id": "cc1ad130", + "metadata": {}, + "source": [ + "## Using metal acceleration" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "21bee96c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - kv 0: general.architecture str \n", + "llama_model_loader: - kv 1: general.name str \n", + "llama_model_loader: - kv 2: general.description str \n", + "llama_model_loader: - kv 3: llama.context_length u32 \n", + "llama_model_loader: - kv 4: llama.embedding_length u32 \n", + "llama_model_loader: - kv 5: llama.block_count u32 \n", + "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", + "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", + "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", + "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", + "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", + "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", + "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", + "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", + "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", + "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", + "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - type f32: 81 tensors\n", + "llama_model_loader: - type q4_K: 281 tensors\n", + "llama_model_loader: - type q6_K: 1 tensors\n", + "llm_load_print_meta: format = GGUF V2 (latest)\n", + "llm_load_print_meta: arch = llama\n", + "llm_load_print_meta: vocab type = SPM\n", + "llm_load_print_meta: n_vocab = 32000\n", + "llm_load_print_meta: n_merges = 0\n", + "llm_load_print_meta: n_ctx_train = 2048\n", + "llm_load_print_meta: n_ctx = 512\n", + "llm_load_print_meta: n_embd = 5120\n", + "llm_load_print_meta: n_head = 40\n", + "llm_load_print_meta: n_head_kv = 40\n", + "llm_load_print_meta: n_layer = 40\n", + "llm_load_print_meta: n_rot = 128\n", + "llm_load_print_meta: n_gqa = 1\n", + "llm_load_print_meta: f_norm_eps = 1.0e-05\n", + "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", + "llm_load_print_meta: n_ff = 13824\n", + "llm_load_print_meta: freq_base = 10000.0\n", + "llm_load_print_meta: freq_scale = 1\n", + "llm_load_print_meta: model type = 13B\n", + "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", + "llm_load_print_meta: model size = 13.02 B\n", + "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", + "llm_load_print_meta: BOS token = 1 ''\n", + "llm_load_print_meta: EOS token = 2 ''\n", + "llm_load_print_meta: UNK token = 0 ''\n", + "llm_load_print_meta: LF token = 13 '<0x0A>'\n", + "llm_load_tensors: ggml ctx size = 0.12 MB\n", + "llm_load_tensors: mem required = 7024.01 MB (+ 400.00 MB per state)\n", + "...................................................................................................\n", + "llama_new_context_with_model: kv self size = 400.00 MB\n", + "ggml_metal_init: allocating\n", + "ggml_metal_init: loading '/Users/rchan/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n", + "ggml_metal_init: loaded kernel_add 0x11cba37b0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_add_row 0x11cba3a10 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul 0x11cba3c70 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_row 0x11cba3ed0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_scale 0x11cba4130 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_silu 0x11cba4390 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_relu 0x11cba45f0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_gelu 0x11cba4850 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_soft_max 0x11cba4ab0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_diag_mask_inf 0x11cba4d10 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_f16 0x11cba4f70 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_0 0x11cba51d0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_1 0x11cba5430 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q8_0 0x11cba5690 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q2_K 0x11cba58f0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q3_K 0x11cba5b50 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_K 0x11cba5db0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q5_K 0x11cba6010 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q6_K 0x11cba6270 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rms_norm 0x11cba64d0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_norm 0x11cba6730 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x11cba6d20 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x11cba71c0 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x11cba7780 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x11cba7d40 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x11cba8300 | th_max = 640 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x11cba88c0 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x11cba9080 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x11cba98a0 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x11cba9e60 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x11cbaa460 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x11cbaaa60 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x11cbab060 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x11cbab660 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x11cbabc60 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x11cbac260 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x11cbac860 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x11cbace60 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x11cbad460 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rope 0x11cbad7e0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_alibi_f32 0x11cbadf00 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f16 0x11cbae5f0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f32 0x11cbaece0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f16_f16 0x11cbaf3d0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n", + "ggml_metal_init: hasUnifiedMemory = true\n", + "ggml_metal_init: maxTransferRate = built-in GPU\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_new_context_with_model: compute buffer total size = 91.47 MB\n", + "llama_new_context_with_model: max tensor size = 128.17 MB\n", + "ggml_metal_add_buffer: allocated 'data ' buffer, size = 7024.61 MB, ( 7025.05 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, ( 7026.53 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, ( 7428.53 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, ( 7518.55 / 21845.34)\n", + "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n" + ] + } + ], + "source": [ + "llm = Llama(model_path=llama_2_13b_chat_path, n_gpu_layers=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "885156d8", + "metadata": {}, + "outputs": [], + "source": [ + "prompt_example = \"Name all the planets in the solar system and state their distances to the sun\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4bee457a", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 610.65 ms\n", + "llama_print_timings: sample time = 196.05 ms / 268 runs ( 0.73 ms per token, 1367.01 tokens per second)\n", + "llama_print_timings: prompt eval time = 610.63 ms / 17 tokens ( 35.92 ms per token, 27.84 tokens per second)\n", + "llama_print_timings: eval time = 14795.14 ms / 267 runs ( 55.41 ms per token, 18.05 tokens per second)\n", + "llama_print_timings: total time = 15977.86 ms\n" + ] + } + ], + "source": [ + "output = llm(prompt_example,\n", + " max_tokens=512,\n", + " echo=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "acef5902", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'id': 'cmpl-0026dd42-1853-4a6c-8e46-5a6e71740986', 'object': 'text_completion', 'created': 1694121339, 'model': '../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin', 'choices': [{'text': \"Name all the planets in the solar system and state their distances to the sun.\\n\\nThere are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of each planet along with its distance from the Sun (in astronomical units or AU):\\n\\n1. Mercury - 0.4 AU (very close to the Sun)\\n2. Venus - 1.0 AU (just inside Earth's orbit)\\n3. Earth - 1.0 AU (the distance from the Earth to the Sun is called an astronomical unit, or AU)\\n4. Mars - 1.6 AU (about 1.5 times the distance from the Earth to the Sun)\\n5. Jupiter - 5.2 AU (about 5 times the distance from the Earth to the Sun)\\n6. Saturn - 9.5 AU (almost twice the distance from the Earth to the Sun)\\n7. Uranus - 19.0 AU (about 4 times the distance from the Earth to the Sun)\\n8. Neptune - 30.1 AU (more than 3 times the distance from the Earth to the Sun)\", 'index': 0, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 17, 'completion_tokens': 267, 'total_tokens': 284}}\n" + ] + } + ], + "source": [ + "print(output)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6f1d16ea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name all the planets in the solar system and state their distances to the sun.\n", + "\n", + "There are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of each planet along with its distance from the Sun (in astronomical units or AU):\n", + "\n", + "1. Mercury - 0.4 AU (very close to the Sun)\n", + "2. Venus - 1.0 AU (just inside Earth's orbit)\n", + "3. Earth - 1.0 AU (the distance from the Earth to the Sun is called an astronomical unit, or AU)\n", + "4. Mars - 1.6 AU (about 1.5 times the distance from the Earth to the Sun)\n", + "5. Jupiter - 5.2 AU (about 5 times the distance from the Earth to the Sun)\n", + "6. Saturn - 9.5 AU (almost twice the distance from the Earth to the Sun)\n", + "7. Uranus - 19.0 AU (about 4 times the distance from the Earth to the Sun)\n", + "8. Neptune - 30.1 AU (more than 3 times the distance from the Earth to the Sun)\n" + ] + } + ], + "source": [ + "print(output[\"choices\"][0][\"text\"])" + ] + }, + { + "cell_type": "markdown", + "id": "865df6bf", + "metadata": {}, + "source": [ + "## Using CPU" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "db096045", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensorAVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n", + " 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - kv 0: general.architecture str \n", + "llama_model_loader: - kv 1: general.name str \n", + "llama_model_loader: - kv 2: general.description str \n", + "llama_model_loader: - kv 3: llama.context_length u32 \n", + "llama_model_loader: - kv 4: llama.embedding_length u32 \n", + "llama_model_loader: - kv 5: llama.block_count u32 \n", + "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", + "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", + "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", + "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", + "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", + "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", + "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", + "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", + "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", + "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", + "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - type f32: 81 tensors\n", + "llama_model_loader: - type q4_K: 281 tensors\n", + "llama_model_loader: - type q6_K: 1 tensors\n", + "llm_load_print_meta: format = GGUF V2 (latest)\n", + "llm_load_print_meta: arch = llama\n", + "llm_load_print_meta: vocab type = SPM\n", + "llm_load_print_meta: n_vocab = 32000\n", + "llm_load_print_meta: n_merges = 0\n", + "llm_load_print_meta: n_ctx_train = 2048\n", + "llm_load_print_meta: n_ctx = 512\n", + "llm_load_print_meta: n_embd = 5120\n", + "llm_load_print_meta: n_head = 40\n", + "llm_load_print_meta: n_head_kv = 40\n", + "llm_load_print_meta: n_layer = 40\n", + "llm_load_print_meta: n_rot = 128\n", + "llm_load_print_meta: n_gqa = 1\n", + "llm_load_print_meta: f_norm_eps = 1.0e-05\n", + "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", + "llm_load_print_meta: n_ff = 13824\n", + "llm_load_print_meta: freq_base = 10000.0\n", + "llm_load_print_meta: freq_scale = 1\n", + "llm_load_print_meta: model type = 13B\n", + "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", + "llm_load_print_meta: model size = 13.02 B\n", + "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", + "llm_load_print_meta: BOS token = 1 ''\n", + "llm_load_print_meta: EOS token = 2 ''\n", + "llm_load_print_meta: UNK token = 0 ''\n", + "llm_load_print_meta: LF token = 13 '<0x0A>'\n", + "llm_load_tensors: ggml ctx size = 0.12 MB\n", + "llm_load_tensors: mem required = 7024.01 MB (+ 400.00 MB per state)\n", + "...................................................................................................\n", + "llama_new_context_with_model: kv self size = 400.00 MB\n", + "llama_new_context_with_model: compute buffer total size = 75.47 MB\n", + "ggml_metal_free: deallocating\n" + ] + } + ], + "source": [ + "llm = Llama(model_path=llama_2_13b_chat_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "291a4c26", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 1480.25 ms\n", + "llama_print_timings: sample time = 162.22 ms / 212 runs ( 0.77 ms per token, 1306.87 tokens per second)\n", + "llama_print_timings: prompt eval time = 1480.21 ms / 17 tokens ( 87.07 ms per token, 11.48 tokens per second)\n", + "llama_print_timings: eval time = 20115.90 ms / 211 runs ( 95.34 ms per token, 10.49 tokens per second)\n", + "llama_print_timings: total time = 22063.41 ms\n" + ] + } + ], + "source": [ + "output = llm(prompt_example,\n", + " max_tokens=512,\n", + " echo=True)" + ] + }, + { + "cell_type": "markdown", + "id": "8cdce188", + "metadata": {}, + "source": [ + "By inspection, we can see that the metal acceleration is faster as expected." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d7b74226", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Name all the planets in the solar system and state their distances to the sun.\n", + "1. Mercury - 58 million kilometers (36 million miles)\n", + "2. Venus - 108 million kilometers (67 million miles)\n", + "3. Earth - 149.6 million kilometers (92.96 million miles)\n", + "4. Mars - 225 million kilometers (140 million miles)\n", + "5. Jupiter - 778.3 million kilometers (483.8 million miles)\n", + "6. Saturn - 1.4 billion kilometers (870 million miles)\n", + "7. Uranus - 2.9 billion kilometers (1.8 billion miles)\n", + "8. Neptune - 4.5 billion kilometers (2.8 billion miles)\n", + "\n", + "Note that the distance of each planet from the Sun is measured in terms of their average distance, as the orbits of the planets are not perfectly circular and the distances vary slightly over the course of a year.\n" + ] + } + ], + "source": [ + "print(output[\"choices\"][0][\"text\"])" + ] + }, + { + "cell_type": "markdown", + "id": "b54b606c", + "metadata": {}, + "source": [ + "## Using Llama2 in `llama-index`" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1c8f0f7d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - kv 0: general.architecture str \n", + "llama_model_loader: - kv 1: general.name str \n", + "llama_model_loader: - kv 2: general.description str \n", + "llama_model_loader: - kv 3: llama.context_length u32 \n", + "llama_model_loader: - kv 4: llama.embedding_length u32 \n", + "llama_model_loader: - kv 5: llama.block_count u32 \n", + "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", + "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", + "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", + "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", + "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", + "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", + "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", + "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", + "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", + "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", + "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - type f32: 81 tensors\n", + "llama_model_loader: - type q4_K: 281 tensors\n", + "llama_model_loader: - type q6_K: 1 tensors\n", + "llm_load_print_meta: format = GGUF V2 (latest)\n", + "llm_load_print_meta: arch = llama\n", + "llm_load_print_meta: vocab type = SPM\n", + "llm_load_print_meta: n_vocab = 32000\n", + "llm_load_print_meta: n_merges = 0\n", + "llm_load_print_meta: n_ctx_train = 2048\n", + "llm_load_print_meta: n_ctx = 3900\n", + "llm_load_print_meta: n_embd = 5120\n", + "llm_load_print_meta: n_head = 40\n", + "llm_load_print_meta: n_head_kv = 40\n", + "llm_load_print_meta: n_layer = 40\n", + "llm_load_print_meta: n_rot = 128\n", + "llm_load_print_meta: n_gqa = 1\n", + "llm_load_print_meta: f_norm_eps = 1.0e-05\n", + "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", + "llm_load_print_meta: n_ff = 13824\n", + "llm_load_print_meta: freq_base = 10000.0\n", + "llm_load_print_meta: freq_scale = 1\n", + "llm_load_print_meta: model type = 13B\n", + "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", + "llm_load_print_meta: model size = 13.02 B\n", + "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", + "llm_load_print_meta: BOS token = 1 ''\n", + "llm_load_print_meta: EOS token = 2 ''\n", + "llm_load_print_meta: UNK token = 0 ''\n", + "llm_load_print_meta: LF token = 13 '<0x0A>'\n", + "llm_load_tensors: ggml ctx size = 0.12 MB\n", + "llm_load_tensors: mem required = 7024.01 MB (+ 3046.88 MB per state)\n", + "...................................................................................................\n", + "llama_new_context_with_model: kv self size = 3046.88 MB\n", + "ggml_metal_init: allocating\n", + "ggml_metal_init: loading '/Users/rchan/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n", + "ggml_metal_init: loaded kernel_add 0x14ca7e860 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_add_row 0x14ca7eac0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul 0x14ca7ed20 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_row 0x14ca7ef80 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_scale 0x14ca7dc60 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_silu 0x14ca7dec0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_relu 0x14ca7fb80 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_gelu 0x14ca7fde0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_soft_max 0x14ca80040 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_diag_mask_inf 0x14ca802a0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_f16 0x14ca80500 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_0 0x14ca80760 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_1 0x14ca809c0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q8_0 0x14ca80c20 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q2_K 0x14ca80e80 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q3_K 0x14ca810e0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_K 0x14ca814e0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q5_K 0x14ca81740 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q6_K 0x14ca819a0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rms_norm 0x14ca81c00 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_norm 0x14ca81e60 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x14ca82450 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x14ca828f0 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x14ca82d90 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x14ca83230 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x14ca836d0 | th_max = 640 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x14ca83b70 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x14ca84010 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x14ca844b0 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x14ca84950 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x14ca84e30 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x14ca85310 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x14ca857f0 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x14ca85cd0 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x14ca861b0 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x14ca86690 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x1488391a0 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x148839d60 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x14883a240 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rope 0x14883a4a0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_alibi_f32 0x14883aaa0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f16 0x14883b190 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f32 0x14883b880 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f16_f16 0x14883bf70 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n", + "ggml_metal_init: hasUnifiedMemory = true\n", + "ggml_metal_init: maxTransferRate = built-in GPU\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_new_context_with_model: compute buffer total size = 356.16 MB\n", + "llama_new_context_with_model: max tensor size = 128.17 MB\n", + "ggml_metal_add_buffer: allocated 'data ' buffer, size = 7024.61 MB, ( 7025.11 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, ( 7026.59 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 3048.88 MB, (10075.47 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 354.70 MBAVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n", + ", (10430.17 / 21845.34)\n" + ] + } + ], + "source": [ + "llm = LlamaCPP(\n", + " model_path=\"../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin\",\n", + " temperature=0.1,\n", + " max_new_tokens=1024,\n", + " # llama2 has a context window of 4096 tokens,\n", + " # but we set it lower to allow for some wiggle room\n", + " context_window=3900,\n", + " # kwargs to pass to __call__()\n", + " generate_kwargs={},\n", + " # kwargs to pass to __init__()\n", + " # set to at least 1 to use GPU\n", + " model_kwargs={\"n_gpu_layers\": 1},\n", + " # transform inputs into Llama2 format\n", + " messages_to_prompt=messages_to_prompt,\n", + " completion_to_prompt=completion_to_prompt,\n", + " verbose=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "ad388e17", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LLMMetadata(context_window=3900, num_output=1024, is_chat_model=False, is_function_calling_model=False, model_name='../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm.metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "107331f3", + "metadata": {}, + "outputs": [], + "source": [ + "handbook = pd.read_csv(\"../../data/public/handbook-scraped.csv\")\n", + "wiki = pd.read_csv(\"../../data/turing_internal/wiki-scraped.csv\")\n", + "# turing = pd.read_csv(\"../../data/public/turingacuk-no-boilerplate.csv\")\n", + "\n", + "text_list = list(handbook[\"body\"].astype(\"str\")) + list(wiki[\"body\"].astype(\"str\"))\n", + "documents = [Document(text=t) for t in text_list]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6f0727c3", + "metadata": {}, + "outputs": [], + "source": [ + "hfemb = HuggingFaceEmbeddings()\n", + "embed_model = LangchainEmbedding(hfemb)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ff676438", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "HuggingFaceEmbeddings(client=SentenceTransformer(\n", + " (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel \n", + " (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n", + " (2): Normalize()\n", + "), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hfemb" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "00032b04", + "metadata": {}, + "outputs": [], + "source": [ + "# set number of output tokens\n", + "num_output = 1024\n", + "# set maximum input size\n", + "context_window = 4096\n", + "# set maximum chunk overlap\n", + "chunk_size_limit = 512\n", + "chunk_overlap_ratio = 0\n", + "\n", + "prompt_helper = PromptHelper(\n", + " context_window=context_window,\n", + " num_output=num_output,\n", + " chunk_size_limit=chunk_size_limit,\n", + " chunk_overlap_ratio=chunk_overlap_ratio,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a4f3d57e", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + " service_context = ServiceContext.from_defaults(\n", + " llm_predictor=LLMPredictor(llm=llm),\n", + " embed_model=embed_model,\n", + " prompt_helper=prompt_helper,\n", + " chunk_size=chunk_size_limit,\n", + ")\n", + "\n", + "index = VectorStoreIndex.from_documents(\n", + " documents, service_context=service_context\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7ddbe81c", + "metadata": {}, + "outputs": [], + "source": [ + "response_mode = \"simple_summarize\"" + ] + }, + { + "cell_type": "markdown", + "id": "d12e01b1", + "metadata": {}, + "source": [ + "## \"React\" chat engine" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "8a8b7edc", + "metadata": {}, + "outputs": [], + "source": [ + "chat_engine = index.as_chat_engine(chat_mode=\"react\",\n", + " verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1695904a", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", + "Action: query_engine_tool\n", + "Action Input: {'input': 'hello world'}\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 42.94 ms / 59 runs ( 0.73 ms per token, 1374.17 tokens per second)\n", + "llama_print_timings: prompt eval time = 7509.19 ms / 447 tokens ( 16.80 ms per token, 59.53 tokens per second)\n", + "llama_print_timings: eval time = 3475.79 ms / 58 runs ( 59.93 ms per token, 16.69 tokens per second)\n", + "llama_print_timings: total time = 11105.13 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36;1m\u001b[1;3mObservation: Hello! As a helpful, respectful, and honest assistant, I'm here to assist you with any questions or requests you may have. Based on the context information provided, it seems like you are looking for information about the Turing Institute and its various communications channels and events.\n", + "\n", + "To start, I can provide you with some general information about the Turing Institute and its activities. The Turing Institute is a research centre based in the UK that focuses on the development of algorithms and computational methods for solving complex problems in various fields, such as computer science, mathematics, and biology. The institute has a strong emphasis on interdisciplinary research and collaboration, and it hosts a variety of events and workshops to facilitate these interactions.\n", + "\n", + "In terms of communications channels, the Turing Institute uses a variety of platforms to keep its members and collaborators informed about its activities and research progress. These include email lists, Slack channels, and a website with information about ongoing projects, events, and research updates.\n", + "\n", + "Regarding events, the Turing Institute hosts a variety of activities throughout the year, including tech talks, workshops, and conferences. These events cover a range of topics related to the institute's research areas, and they are often open to members and non-members alike. You can find information about upcoming events on the Turing Institute's website or by checking the shared REG calendar.\n", + "\n", + "If you have any specific questions or requests, please feel free to ask, and I will do my best to assist you based on the information available to me.\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 244.08 ms / 339 runs ( 0.72 ms per token, 1388.92 tokens per second)\n", + "llama_print_timings: prompt eval time = 8954.56 ms / 537 tokens ( 16.68 ms per token, 59.97 tokens per second)\n", + "llama_print_timings: eval time = 21652.52 ms / 338 runs ( 64.06 ms per token, 15.61 tokens per second)\n", + "llama_print_timings: total time = 31331.83 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", + "Action: query_engine_tool\n", + "Action Input: {'input': 'what are the research areas of the Turing Institute?'}\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 32.78 ms / 46 runs ( 0.71 ms per token, 1403.21 tokens per second)\n", + "llama_print_timings: prompt eval time = 15197.21 ms / 832 tokens ( 18.27 ms per token, 54.75 tokens per second)\n", + "llama_print_timings: eval time = 2972.41 ms / 45 runs ( 66.05 ms per token, 15.14 tokens per second)\n", + "llama_print_timings: total time = 18262.37 ms\n", + "Llama.generate: prefix-match hit\n", + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 216.46 ms / 309 runs ( 0.70 ms per token, 1427.55 tokens per second)\n", + "llama_print_timings: prompt eval time = 11918.74 ms / 689 tokens ( 17.30 ms per token, 57.81 tokens per second)\n", + "llama_print_timings: eval time = 20660.28 ms / 308 runs ( 67.08 ms per token, 14.91 tokens per second)\n", + "llama_print_timings: total time = 33190.48 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36;1m\u001b[1;3mObservation: Based on the updated context, the research areas of the Turing Institute are focused on impact-driven research in data science and AI, with a particular emphasis on diversity and openness. The institute prioritizes projects that have realistic plans for meeting the priority of reproducible research and open source software development. Additionally, the institute values collaboration across academic, social, ethnic, and racial backgrounds, as well as cross-disciplinary and cross-cultural collaborations.\n", + "\n", + "Some specific research areas that may be of interest to the Turing Institute include:\n", + "\n", + "1. Data-driven approaches to addressing societal challenges, such as healthcare, education, and environmental sustainability.\n", + "2. Development of new AI technologies and techniques, such as machine learning, natural language processing, and computer vision.\n", + "3. Applications of data science and AI in various domains, such as finance, transportation, and culture.\n", + "4. Studies on the ethical, social, and economic implications of data science and AI.\n", + "5. Collaborative research projects that bring together diverse perspectives and expertise from academia, industry, and civil society.\n", + "\n", + "The Turing Institute is open to a wide range of research proposals that align with its guiding principles and priorities, including pioneering approaches and innovative methodologies. The institute encourages the sharing of outputs and methodologies as openly as possible to facilitate collaboration and reach a diverse audience.\n", + "\n", + "In addition to these general research areas, the Turing Institute is also interested in exploring new ways of tackling research, talent, and approaches. Project briefs will need to specify the extent to which the proposed approaches or methodologies are innovative in the context of similar research being undertaken elsewhere, and a pioneering score will be assigned to each project.\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 277.50 ms / 396 runs ( 0.70 ms per token, 1427.03 tokens per second)\n", + "llama_print_timings: prompt eval time = 11135.08 ms / 627 tokens ( 17.76 ms per token, 56.31 tokens per second)\n", + "llama_print_timings: eval time = 26738.70 ms / 395 runs ( 67.69 ms per token, 14.77 tokens per second)\n", + "llama_print_timings: total time = 38670.27 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: The research areas of the Turing Institute include data-driven approaches to addressing societal challenges, development of new AI technologies and techniques, applications of data science and AI in various domains, studies on the ethical, social, and economic implications of data science and AI, and collaborative research projects that bring together diverse perspectives and expertise from academia, industry, and civil society.\n", + "\u001b[0mThe research areas of the Turing Institute include data-driven approaches to addressing societal challenges, development of new AI technologies and techniques, applications of data science and AI in various domains, studies on the ethical, social, and economic implications of data science and AI, and collaborative research projects that bring together diverse perspectives and expertise from academia, industry, and civil society.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 72.06 ms / 103 runs ( 0.70 ms per token, 1429.42 tokens per second)\n", + "llama_print_timings: prompt eval time = 27985.35 ms / 1286 tokens ( 21.76 ms per token, 45.95 tokens per second)\n", + "llama_print_timings: eval time = 7628.91 ms / 102 runs ( 74.79 ms per token, 13.37 tokens per second)\n", + "llama_print_timings: total time = 35813.39 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"hello\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "d6fa2d0f", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: As a new starter in the Research Engineering Group (REG) at the Alan Turing Institute, there are several things you can do to get started and make the most of your time here:\n", + "\n", + "1. Familiarize yourself with the institute's research areas and ongoing projects. This will help you understand the scope of the work being done and how you can contribute.\n", + "2. Meet with your supervisor and other members of the REG to discuss your background, interests, and goals. They can provide valuable guidance and help you get settled into your new role.\n", + "3. Review the institute's policies and procedures to ensure you understand the expectations and requirements of your position.\n", + "4. Attend orientation sessions and training programs offered by the institute to learn more about the Turing Institute's culture, resources, and research practices.\n", + "5. Start exploring the tools and technologies used in the REG, such as query_engine_tool, to get a sense of the technical capabilities available to you.\n", + "6. Begin identifying potential research projects that align with your interests and skills, and reach out to relevant researchers to discuss possibilities.\n", + "7. Consider joining relevant working groups or workshops to connect with other researchers and stay up-to-date on the latest developments in the field.\n", + "8. Start building your network within the institute by attending social events, seminars, and other activities that promote collaboration and knowledge sharing.\n", + "9. Familiarize yourself with the Turing Institute's publication and dissemination processes to understand how research is shared and recognized within the institution.\n", + "10. Stay open-minded, curious, and willing to learn, as the Turing Institute is a dynamic and interdisciplinary environment that values collaboration and innovation.\n", + "\u001b[0m As a new starter in the Research Engineering Group (REG) at the Alan Turing Institute, there are several things you can do to get started and make the most of your time here:\n", + "\n", + "1. Familiarize yourself with the institute's research areas and ongoing projects. This will help you understand the scope of the work being done and how you can contribute.\n", + "2. Meet with your supervisor and other members of the REG to discuss your background, interests, and goals. They can provide valuable guidance and help you get settled into your new role.\n", + "3. Review the institute's policies and procedures to ensure you understand the expectations and requirements of your position.\n", + "4. Attend orientation sessions and training programs offered by the institute to learn more about the Turing Institute's culture, resources, and research practices.\n", + "5. Start exploring the tools and technologies used in the REG, such as query_engine_tool, to get a sense of the technical capabilities available to you.\n", + "6. Begin identifying potential research projects that align with your interests and skills, and reach out to relevant researchers to discuss possibilities.\n", + "7. Consider joining relevant working groups or workshops to connect with other researchers and stay up-to-date on the latest developments in the field.\n", + "8. Start building your network within the institute by attending social events, seminars, and other activities that promote collaboration and knowledge sharing.\n", + "9. Familiarize yourself with the Turing Institute's publication and dissemination processes to understand how research is shared and recognized within the institution.\n", + "10. Stay open-minded, curious, and willing to learn, as the Turing Institute is a dynamic and interdisciplinary environment that values collaboration and innovation.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 266.23 ms / 380 runs ( 0.70 ms per token, 1427.35 tokens per second)\n", + "llama_print_timings: prompt eval time = 2152.57 ms / 121 tokens ( 17.79 ms per token, 56.21 tokens per second)\n", + "llama_print_timings: eval time = 24885.95 ms / 379 runs ( 65.66 ms per token, 15.23 tokens per second)\n", + "llama_print_timings: total time = 27796.96 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"what should a new starter in the research engineering group (REG) at the Alan Turing Institute do?\")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "3d028880", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: No, I have not used the query engine yet. As a new starter in the Research Engineering Group at the Alan Turing Institute, I am still in the process of familiarizing myself with the tools and technologies available to me. However, I am eager to learn more about the query engine and how it can be used to support my research activities. Can you tell me more about the query engine and its capabilities?\n", + "\u001b[0m No, I have not used the query engine yet. As a new starter in the Research Engineering Group at the Alan Turing Institute, I am still in the process of familiarizing myself with the tools and technologies available to me. However, I am eager to learn more about the query engine and how it can be used to support my research activities. Can you tell me more about the query engine and its capabilities?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 61.06 ms / 87 runs ( 0.70 ms per token, 1424.85 tokens per second)\n", + "llama_print_timings: prompt eval time = 570.47 ms / 20 tokens ( 28.52 ms per token, 35.06 tokens per second)\n", + "llama_print_timings: eval time = 5989.95 ms / 86 runs ( 69.65 ms per token, 14.36 tokens per second)\n", + "llama_print_timings: total time = 6726.84 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"Have you used the query engine yet?\")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "1a01fe16", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: You have asked me the following questions so far:\n", + "\n", + "1. What should a new starter in the Research Engineering Group (REG) at the Alan Turing Institute do?\n", + "2. Have you used the query engine yet?\n", + "\u001b[0m You have asked me the following questions so far:\n", + "\n", + "1. What should a new starter in the Research Engineering Group (REG) at the Alan Turing Institute do?\n", + "2. Have you used the query engine yet?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 7509.25 ms\n", + "llama_print_timings: sample time = 34.37 ms / 49 runs ( 0.70 ms per token, 1425.79 tokens per second)\n", + "llama_print_timings: prompt eval time = 600.34 ms / 20 tokens ( 30.02 ms per token, 33.31 tokens per second)\n", + "llama_print_timings: eval time = 3407.13 ms / 48 runs ( 70.98 ms per token, 14.09 tokens per second)\n", + "llama_print_timings: total time = 4101.47 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"What have I asked you so far?\")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "b9c86b3d", + "metadata": {}, + "source": [ + "Reset chat engine..." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "753b4c72", + "metadata": {}, + "outputs": [], + "source": [ + "chat_engine.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "f7ca01f6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "********** prompt: **********\n", + " [INST] What did I ask you before? [/INST]\n", + "********** completion_response: **********\n", + " I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n", + "********** chat: chat_response: **********\n", + " assistant: I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n", + "\u001b[38;5;200m\u001b[1;3mResponse: I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n", + "\u001b[0m********** _process_actions: current_reasoning: **********\n", + " [ResponseReasoningStep(thought='I can answer without any tools.', response=\" I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\")]\n", + "********** _process_actions: is_done: **********\n", + " True\n", + "********** chat: reasoning_steps: **********\n", + " [ResponseReasoningStep(thought='I can answer without any tools.', response=\" I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\")]\n", + "********** chat: is_done: **********\n", + " True\n", + " I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 931.62 ms\n", + "llama_print_timings: sample time = 42.12 ms / 60 runs ( 0.70 ms per token, 1424.43 tokens per second)\n", + "llama_print_timings: prompt eval time = 288.19 ms / 14 tokens ( 20.59 ms per token, 48.58 tokens per second)\n", + "llama_print_timings: eval time = 3228.61 ms / 59 runs ( 54.72 ms per token, 18.27 tokens per second)\n", + "llama_print_timings: total time = 3630.86 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"What did I ask you before?\")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "c3941e6f", + "metadata": {}, + "source": [ + "## React engine and asking it to use query\n", + "\n", + "We saw that it didn't use the query engine in the above, but maybe we could force it to use it..." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a5629ffd", + "metadata": {}, + "outputs": [], + "source": [ + "chat_engine = index.as_chat_engine(chat_mode=\"react\",\n", + " response_mode=response_mode,\n", + " verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ded38211", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "********** prompt: **********\n", + " [INST] Please use the query engine. What should a new starter in the research engineering group do? [/INST]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "********** completion_response: **********\n", + " As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the research environment:\n", + "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the organization's policies and procedures:\n", + "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", + "6. Start contributing to research projects:\n", + "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and colleagues:\n", + "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n", + "********** chat_response: **********\n", + " assistant: As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the research environment:\n", + "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the organization's policies and procedures:\n", + "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", + "6. Start contributing to research projects:\n", + "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and colleagues:\n", + "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n", + "\u001b[38;5;200m\u001b[1;3mResponse: As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the research environment:\n", + "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the organization's policies and procedures:\n", + "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", + "6. Start contributing to research projects:\n", + "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and colleagues:\n", + "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n", + "\u001b[0m As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the research environment:\n", + "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the organization's policies and procedures:\n", + "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", + "6. Start contributing to research projects:\n", + "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and colleagues:\n", + "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 981.41 ms\n", + "llama_print_timings: sample time = 377.91 ms / 539 runs ( 0.70 ms per token, 1426.28 tokens per second)\n", + "llama_print_timings: prompt eval time = 307.84 ms / 23 tokens ( 13.38 ms per token, 74.72 tokens per second)\n", + "llama_print_timings: eval time = 31503.15 ms / 538 runs ( 58.56 ms per token, 17.08 tokens per second)\n", + "llama_print_timings: total time = 32916.09 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"Please use the query engine. What should a new starter in the research engineering group do?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "098a68a1", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "********** prompt: **********\n", + " [INST] <>\n", + "\n", + "You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.\n", + "\n", + "## Tools\n", + "You have access to a wide variety of tools. You are responsible for using\n", + "the tools in any sequence you deem appropriate to complete the task at hand.\n", + "This may require breaking the task into subtasks and using different tools\n", + "to complete each subtask.\n", + "\n", + "You have access to the following tools:\n", + "> Tool Name: query_engine_tool\n", + "Tool Description: Useful for running a natural language query\n", + "against a knowledge base and get back a natural language response.\n", + "\n", + "Tool Args: {'title': 'DefaultToolFnSchema', 'description': 'Default tool function Schema.', 'type': 'object', 'properties': {'input': {'title': 'Input', 'type': 'string'}}, 'required': ['input']}\n", + "\n", + "\n", + "## Output Format\n", + "To answer the question, please use the following format.\n", + "\n", + "```\n", + "Thought: I need to use a tool to help me answer the question.\n", + "Action: tool name (one of query_engine_tool)\n", + "Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {\"text\": \"hello world\", \"num_beams\": 5})\n", + "```\n", + "Please use a valid JSON format for the action input. Do NOT do this {'text': 'hello world', 'num_beams': 5}.\n", + "\n", + "If this format is used, the user will respond in the following format:\n", + "\n", + "```\n", + "Observation: tool response\n", + "```\n", + "\n", + "You should keep repeating the above format until you have enough information\n", + "to answer the question without using any more tools. At that point, you MUST respond\n", + "in the following format:\n", + "\n", + "```\n", + "Thought: I can answer without using any more tools.\n", + "Answer: [your answer here]\n", + "```\n", + "\n", + "## Current Conversation\n", + "Below is the current conversation consisting of interleaving human and assistant messages.\n", + "\n", + "\n", + "<>\n", + "\n", + "Please use the query engine. What should a new starter in the research engineering group do? [/INST] assistant: As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the research environment:\n", + "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the organization's policies and procedures:\n", + "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", + "6. Start contributing to research projects:\n", + "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and colleagues:\n", + "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects. [INST] I want to specifically know about a new starter in the REG team at the Turing institute [/INST]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "********** completion_response: **********\n", + " As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the Turing Institute's research environment:\n", + "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and REG colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", + "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", + "6. Start contributing to REG projects:\n", + "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and REG colleagues:\n", + "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n", + "********** chat_response: **********\n", + " assistant: As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the Turing Institute's research environment:\n", + "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and REG colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", + "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", + "6. Start contributing to REG projects:\n", + "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and REG colleagues:\n", + "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n", + "\u001b[38;5;200m\u001b[1;3mResponse: As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the Turing Institute's research environment:\n", + "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and REG colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", + "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", + "6. Start contributing to REG projects:\n", + "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and REG colleagues:\n", + "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n", + "\u001b[0m As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", + "\n", + "1. Familiarize yourself with the Turing Institute's research environment:\n", + "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", + "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", + "2. Set up your workspace:\n", + "\t* Install any necessary software or tools required for your work.\n", + "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", + "3. Meet with your supervisor and REG colleagues:\n", + "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", + "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", + "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", + "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", + "\t* Understand the intellectual property policies and how they apply to your work.\n", + "5. Attend training and orientation sessions:\n", + "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", + "6. Start contributing to REG projects:\n", + "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", + "7. Develop a plan for your research project:\n", + "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", + "8. Start building your network:\n", + "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", + "9. Keep up-to-date with relevant literature and trends:\n", + "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", + "10. Communicate regularly with your supervisor and REG colleagues:\n", + "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", + "\n", + "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 981.41 ms\n", + "llama_print_timings: sample time = 403.86 ms / 577 runs ( 0.70 ms per token, 1428.72 tokens per second)\n", + "llama_print_timings: prompt eval time = 21240.71 ms / 1045 tokens ( 20.33 ms per token, 49.20 tokens per second)\n", + "llama_print_timings: eval time = 43054.91 ms / 576 runs ( 74.75 ms per token, 13.38 tokens per second)\n", + "llama_print_timings: total time = 65498.11 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"I want to specifically know about a new starter in the REG team at the Turing institute\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26d96826", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8643927", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "reginald", + "language": "python", + "name": "reginald" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/models/llama-index-hack/Untitled1.ipynb b/models/llama-index-hack/Untitled1.ipynb new file mode 100644 index 00000000..7763533d --- /dev/null +++ b/models/llama-index-hack/Untitled1.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "456e1ccd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "reginald", + "language": "python", + "name": "reginald" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/models/llama-index-hack/llama2_ccp_chat.ipynb b/models/llama-index-hack/llama2_ccp_chat.ipynb index e5022422..c24cc03a 100644 --- a/models/llama-index-hack/llama2_ccp_chat.ipynb +++ b/models/llama-index-hack/llama2_ccp_chat.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 50, - "id": "0471137c", + "execution_count": 1, + "id": "e78eb6a3", "metadata": {}, "outputs": [], "source": [ @@ -16,8 +16,7 @@ "from llama_index import (\n", " SimpleDirectoryReader,\n", " LangchainEmbedding,\n", - " GPTListIndex,\n", - " GPTVectorStoreIndex,\n", + " VectorStoreIndex,\n", " PromptHelper,\n", " LLMPredictor,\n", " ServiceContext,\n", @@ -29,17 +28,17 @@ }, { "cell_type": "code", - "execution_count": 66, - "id": "a0683044", + "execution_count": 2, + "id": "fff875d3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'0.8.14'" + "'0.8.22'" ] }, - "execution_count": 66, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -51,29 +50,27 @@ }, { "cell_type": "markdown", - "id": "131d2c1e", + "id": "bee0bc50", "metadata": {}, "source": [ - "Note: notebook assumes that in the reginald directory, there is a `gguf_models/` folder. Here we've downloaded the quantized 4-bit version of Llama2-13b-chat from [`TheBloke/Llama-2-13B-chat-GGML`](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML). \n", - "\n", - "Note that we're currently running a version of `llama-cpp-python` which no longer supports `ggmmlv3` model formats and has changed to `gguf`. We need to convert the above to `gguf` format using the `convert-llama-ggmlv3-to-gguf.py` script in [`llama.cpp`](https://github.com/ggerganov/llama.cpp).\n", + "Note: notebook assumes that in the reginald directory, there is a `gguf_models/` folder. Here we've downloaded the quantized 6-bit version of Llama-2-13b-Chat from [`TheBloke/Llama-2-13b-Chat-GGUF`](https://huggingface.co/TheBloke/Llama-2-13b-Chat-GGUF). \n", "\n", "## Quick example with llama-cpp-python" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "dc00ba0c", + "execution_count": 3, + "id": "6ae386f1", "metadata": {}, "outputs": [], "source": [ - "llama_2_13b_chat_path = \"../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin\"" + "llama_2_path = \"../../gguf_models/llama-2-13b-chat.Q6_K.gguf\"" ] }, { "cell_type": "markdown", - "id": "9fbc2dfd", + "id": "17f77bb0", "metadata": {}, "source": [ "## Using metal acceleration" @@ -81,8 +78,8 @@ }, { "cell_type": "code", - "execution_count": 35, - "id": "d4aace92", + "execution_count": 4, + "id": "821f26be", "metadata": { "scrolled": true }, @@ -91,397 +88,397 @@ "name": "stderr", "output_type": "stream", "text": [ - "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", - "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.Q6_K.gguf (version GGUF V2 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 2: blk.0.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.1.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.10.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.10.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.10.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.10.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.10.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.10.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.10.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.11.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.11.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.11.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.11.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.11.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.11.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.11.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.12.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.12.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.12.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.12.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.12.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.12.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.12.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.13.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.13.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.13.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.13.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.13.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.13.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.13.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.14.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.14.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.14.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.14.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.14.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.14.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.14.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.15.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.15.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.2.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.2.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.2.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.2.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.2.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.2.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.2.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.3.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.3.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.3.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.3.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.3.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.3.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.3.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.4.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.4.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.4.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.4.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.4.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.4.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.4.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.5.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.5.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.5.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.5.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.5.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.5.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.5.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.6.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.6.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.6.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.6.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.6.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.6.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.6.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.7.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.7.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.7.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.7.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.7.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.7.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.7.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.8.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.8.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.8.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.8.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.8.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.8.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.8.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.9.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.9.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.9.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.9.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.9.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.9.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.9.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.16.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.17.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.18.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.19.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.20.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.21.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.22.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.23.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.24.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.25.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.26.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.27.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.28.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.29.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.30.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.30.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 277: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 289: blk.31.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 290: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 291: blk.32.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 292: blk.32.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 293: blk.32.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 294: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 295: blk.32.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 296: blk.32.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 297: blk.32.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 298: blk.32.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 299: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 300: blk.33.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 301: blk.33.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 302: blk.33.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 303: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 304: blk.33.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 305: blk.33.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 306: blk.33.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 307: blk.33.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 308: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 309: blk.34.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 310: blk.34.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 311: blk.34.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 312: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 313: blk.34.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 314: blk.34.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 315: blk.34.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 316: blk.34.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 317: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 318: blk.35.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 319: blk.35.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 320: blk.35.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 321: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 322: blk.35.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 323: blk.35.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 324: blk.35.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 325: blk.35.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 326: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 327: blk.36.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 328: blk.36.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 329: blk.36.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 330: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 331: blk.36.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 332: blk.36.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 333: blk.36.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 334: blk.36.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 335: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 336: blk.37.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 337: blk.37.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 338: blk.37.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 339: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 340: blk.37.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 341: blk.37.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 342: blk.37.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 343: blk.37.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 344: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 345: blk.38.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 346: blk.38.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 347: blk.38.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 348: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 349: blk.38.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 350: blk.38.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 351: blk.38.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 352: blk.38.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 353: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 354: blk.39.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 355: blk.39.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 356: blk.39.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 357: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 358: blk.39.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 359: blk.39.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 360: blk.39.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 361: blk.39.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 362: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", "llama_model_loader: - kv 0: general.architecture str \n", "llama_model_loader: - kv 1: general.name str \n", - "llama_model_loader: - kv 2: general.description str \n", - "llama_model_loader: - kv 3: llama.context_length u32 \n", - "llama_model_loader: - kv 4: llama.embedding_length u32 \n", - "llama_model_loader: - kv 5: llama.block_count u32 \n", - "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", - "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", - "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", - "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", - "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 2: llama.context_length u32 \n", + "llama_model_loader: - kv 3: llama.embedding_length u32 \n", + "llama_model_loader: - kv 4: llama.block_count u32 \n", + "llama_model_loader: - kv 5: llama.feed_forward_length u32 \n", + "llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n", + "llama_model_loader: - kv 7: llama.attention.head_count u32 \n", + "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n", + "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 10: general.file_type u32 \n", "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", - "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", - "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", - "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 \n", + "llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 \n", + "llama_model_loader: - kv 18: general.quantization_version u32 \n", "llama_model_loader: - type f32: 81 tensors\n", - "llama_model_loader: - type q4_K: 281 tensors\n", - "llama_model_loader: - type q6_K: 1 tensors\n", + "llama_model_loader: - type q6_K: 282 tensors\n", "llm_load_print_meta: format = GGUF V2 (latest)\n", "llm_load_print_meta: arch = llama\n", "llm_load_print_meta: vocab type = SPM\n", "llm_load_print_meta: n_vocab = 32000\n", "llm_load_print_meta: n_merges = 0\n", - "llm_load_print_meta: n_ctx_train = 2048\n", + "llm_load_print_meta: n_ctx_train = 4096\n", "llm_load_print_meta: n_ctx = 512\n", "llm_load_print_meta: n_embd = 5120\n", "llm_load_print_meta: n_head = 40\n", @@ -490,95 +487,89 @@ "llm_load_print_meta: n_rot = 128\n", "llm_load_print_meta: n_gqa = 1\n", "llm_load_print_meta: f_norm_eps = 1.0e-05\n", - "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", + "llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n", "llm_load_print_meta: n_ff = 13824\n", "llm_load_print_meta: freq_base = 10000.0\n", "llm_load_print_meta: freq_scale = 1\n", "llm_load_print_meta: model type = 13B\n", - "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", + "llm_load_print_meta: model ftype = mostly Q6_K\n", "llm_load_print_meta: model size = 13.02 B\n", - "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", + "llm_load_print_meta: general.name = LLaMA v2\n", "llm_load_print_meta: BOS token = 1 ''\n", "llm_load_print_meta: EOS token = 2 ''\n", "llm_load_print_meta: UNK token = 0 ''\n", "llm_load_print_meta: LF token = 13 '<0x0A>'\n", "llm_load_tensors: ggml ctx size = 0.12 MB\n", - "llm_load_tensors: mem required = 7024.01 MB (+ 400.00 MB per state)\n", - "...................................................................................................\n", + "llm_load_tensors: mem required = 10183.83 MB (+ 400.00 MB per state)\n", + "....................................................................................................\n", "llama_new_context_with_model: kv self size = 400.00 MB\n", "ggml_metal_init: allocating\n", "ggml_metal_init: loading '/Users/rchan/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n", - "ggml_metal_init: loaded kernel_add 0x1779615f0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_add_row 0x177961850 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul 0x177957ac0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_row 0x177957d20 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_scale 0x177957f80 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_silu 0x1779581e0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_relu 0x177955c40 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_gelu 0x177955ea0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_soft_max 0x177961e70 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_diag_mask_inf 0x1779620d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_f16 0x177962330 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_0 0x177962590 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_1 0x177965370 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q8_0 0x1779655d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q2_K 0x177965830 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q3_K 0x177965a90 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_K 0x177965cf0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q5_K 0x177965f50 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q6_K 0x1779661b0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rms_norm 0x177966410 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_norm 0x177966670 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x1779668d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x177966b30 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x177966d90 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x177966ff0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x177967250 | th_max = 640 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x177967530 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x177967790 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x1779679f0 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x177967c50 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x177967eb0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x177968110 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x177968370 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x1779685d0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x177968830 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x177968a90 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x177968cf0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x177968f50 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x1779691b0 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rope 0x177969410 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_alibi_f32 0x177969670 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f16 0x1779698d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f32 0x177969b30 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f16_f16 0x112775cc0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_add 0x162fd5370 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_add_row 0x162fd55d0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul 0x162fd5830 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_row 0x162fd5a90 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_scale 0x162fd5cf0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_silu 0x162fd5f50 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_relu 0x162fd61b0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_gelu 0x162fd6410 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_soft_max 0x162fd6670 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_diag_mask_inf 0x162fd68d0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_f16 0x162fd6b30 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_0 0x162fd6d90 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_1 0x162fd6ff0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q8_0 0x162fd7250 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q2_K 0x162fd74b0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q3_K 0x162fd7710 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_K 0x162fd7970 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q5_K 0x162fd7cf0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q6_K 0x162fd8200 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rms_norm 0x162fd8720 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_norm 0x162fd8c30 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x162fd9340 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x162fd9900 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x162fda040 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x162fda600 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x162fdabc0 | th_max = 640 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x162fdb180 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x162fdb940 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x162fdc160 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x162fdc720 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x162fdcd20 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x162fdd320 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x162fdd920 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x162fddf20 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x162fde520 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x162fdeb20 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x162fdf120 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x162fdf720 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x162fdfd20 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rope 0x162fe00a0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_alibi_f32 0x162fe07c0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f16 0x162fe0eb0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f32 0x162fe15a0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f16_f16 0x162fe1c90 | th_max = 1024 | th_width = 32\n", "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n", "ggml_metal_init: hasUnifiedMemory = true\n", - "ggml_metal_init: maxTransferRate = built-in GPU\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "ggml_metal_init: maxTransferRate = built-in GPU\n", "llama_new_context_with_model: compute buffer total size = 91.47 MB\n", "llama_new_context_with_model: max tensor size = 128.17 MB\n", - "ggml_metal_add_buffer: allocated 'data ' buffer, size = 7024.61 MB, (14549.28 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, (14550.77 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, (14952.77 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'data ' buffer, size = 10184.42 MB, (10184.86 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, (10186.34 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, (10588.34 / 21845.34)\n", "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n", - "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, (15042.78 / 21845.34)\n" + "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, (10678.36 / 21845.34)\n" ] } ], "source": [ - "llm = Llama(model_path=llama_2_13b_chat_path, n_gpu_layers=1)" + "llm = Llama(model_path=llama_2_path, n_gpu_layers=1)" ] }, { "cell_type": "code", - "execution_count": 36, - "id": "5aade599", + "execution_count": 5, + "id": "5c87e243", "metadata": {}, "outputs": [], "source": [ @@ -587,40 +578,40 @@ }, { "cell_type": "code", - "execution_count": 37, - "id": "4c1cf394", - "metadata": {}, + "execution_count": 6, + "id": "1d0c196c", + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", - "llama_print_timings: load time = 552.96 ms\n", - "llama_print_timings: sample time = 179.55 ms / 256 runs ( 0.70 ms per token, 1425.80 tokens per second)\n", - "llama_print_timings: prompt eval time = 552.93 ms / 17 tokens ( 32.53 ms per token, 30.75 tokens per second)\n", - "llama_print_timings: eval time = 14287.03 ms / 255 runs ( 56.03 ms per token, 17.85 tokens per second)\n", - "llama_print_timings: total time = 15342.45 ms\n" + "llama_print_timings: load time = 567.44 ms\n", + "llama_print_timings: sample time = 229.39 ms / 327 runs ( 0.70 ms per token, 1425.54 tokens per second)\n", + "llama_print_timings: prompt eval time = 567.41 ms / 17 tokens ( 33.38 ms per token, 29.96 tokens per second)\n", + "llama_print_timings: eval time = 30608.18 ms / 326 runs ( 93.89 ms per token, 10.65 tokens per second)\n", + "llama_print_timings: total time = 31823.15 ms\n" ] } ], "source": [ - "output = llm(prompt_example,\n", - " max_tokens=512,\n", - " echo=True)" + "output = llm(prompt_example, max_tokens=512, echo=True)" ] }, { "cell_type": "code", - "execution_count": 38, - "id": "fc2c20fb", + "execution_count": 7, + "id": "d67c8401", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'id': 'cmpl-618337c4-bc4d-4818-99d4-e87893cf21fb', 'object': 'text_completion', 'created': 1693518842, 'model': '../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin', 'choices': [{'text': \"Name all the planets in the solar system and state their distances to the sun.\\n\\nThere are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of each planet along with its distance from the Sun (in astronomical units or AU):\\n\\n1. Mercury - 0.4 AU (very close to the Sun)\\n2. Venus - 1.0 AU (just inside Earth's orbit)\\n3. Earth - 1.0 AU (the distance from the Earth to the Sun is called an astronomical unit, or AU)\\n4. Mars - 1.6 AU (about 1.5 times the distance from the Earth to the Sun)\\n5. Jupiter - 5.2 AU (about 5 times the distance from the Earth to the Sun)\\n6. Saturn - 9.5 AU (almost twice the distance from the Earth to the Sun)\\n7. Uranus - 19.0 AU (about 4 times the distance from the Earth to the Sun)\\n8. Neptune - 30.1 AU (more than \", 'index': 0, 'logprobs': None, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 17, 'completion_tokens': 256, 'total_tokens': 273}}\n" + "{'id': 'cmpl-35d1cb16-69fa-4ff5-9bf3-aaf53d3e866d', 'object': 'text_completion', 'created': 1694191828, 'model': '../../gguf_models/llama-2-13b-chat.Q6_K.gguf', 'choices': [{'text': 'Name all the planets in the solar system and state their distances to the sun.\\n\\nThere are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of the planets in order from closest to farthest from the Sun:\\n\\n1. Mercury - 57,909,227 km (0.387 AU)\\n2. Venus - 108,208,930 km (0.723 AU)\\n3. Earth - 149,597,870 km (1 AU)\\n4. Mars - 225,000,000 km (1.381 AU)\\n5. Jupiter - 778,299,000 km (5.203 AU)\\n6. Saturn - 1,426,666,400 km (8.388 AU)\\n7. Uranus - 2,870,972,200 km (19.218 AU)\\n8. Neptune - 4,497,072,000 km (30.05 AU)\\n\\nNote: One astronomical unit (AU) is the average distance between the Earth and the Sun, which is approximately 93 million miles or 149.6 million kilometers.', 'index': 0, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 17, 'completion_tokens': 326, 'total_tokens': 343}}\n" ] } ], @@ -630,8 +621,8 @@ }, { "cell_type": "code", - "execution_count": 39, - "id": "7cc677a8", + "execution_count": 8, + "id": "336029d1", "metadata": {}, "outputs": [ { @@ -640,16 +631,18 @@ "text": [ "Name all the planets in the solar system and state their distances to the sun.\n", "\n", - "There are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of each planet along with its distance from the Sun (in astronomical units or AU):\n", + "There are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of the planets in order from closest to farthest from the Sun:\n", + "\n", + "1. Mercury - 57,909,227 km (0.387 AU)\n", + "2. Venus - 108,208,930 km (0.723 AU)\n", + "3. Earth - 149,597,870 km (1 AU)\n", + "4. Mars - 225,000,000 km (1.381 AU)\n", + "5. Jupiter - 778,299,000 km (5.203 AU)\n", + "6. Saturn - 1,426,666,400 km (8.388 AU)\n", + "7. Uranus - 2,870,972,200 km (19.218 AU)\n", + "8. Neptune - 4,497,072,000 km (30.05 AU)\n", "\n", - "1. Mercury - 0.4 AU (very close to the Sun)\n", - "2. Venus - 1.0 AU (just inside Earth's orbit)\n", - "3. Earth - 1.0 AU (the distance from the Earth to the Sun is called an astronomical unit, or AU)\n", - "4. Mars - 1.6 AU (about 1.5 times the distance from the Earth to the Sun)\n", - "5. Jupiter - 5.2 AU (about 5 times the distance from the Earth to the Sun)\n", - "6. Saturn - 9.5 AU (almost twice the distance from the Earth to the Sun)\n", - "7. Uranus - 19.0 AU (about 4 times the distance from the Earth to the Sun)\n", - "8. Neptune - 30.1 AU (more than \n" + "Note: One astronomical unit (AU) is the average distance between the Earth and the Sun, which is approximately 93 million miles or 149.6 million kilometers.\n" ] } ], @@ -659,7 +652,7 @@ }, { "cell_type": "markdown", - "id": "036f5ead", + "id": "365beee4", "metadata": {}, "source": [ "## Using CPU" @@ -667,365 +660,465 @@ }, { "cell_type": "code", - "execution_count": 40, - "id": "67d96462", + "execution_count": 9, + "id": "ad96cb8c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", - "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llamAVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "a_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 306: blk.33.ffn_down" + "llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.Q6_K.gguf (version GGUF V2 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 2: blk.0.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.1.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.10.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.10.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.10.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.10.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.10.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.10.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.10.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.11.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.11.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.11.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.11.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.11.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.11.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.11.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.12.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.12.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.12.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.12.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.12.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.12.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.12.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.13.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.13.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.13.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.13.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.13.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.13.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.13.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.14.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.14.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.14.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.14.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.14.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.14.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.14.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.15.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.15.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.2.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.2.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.2.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.2.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.2.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.2.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.2.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.3.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.3.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.3.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.3.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.3.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.3.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.3.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.4.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.4.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.4.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.4.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.4.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.4.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.4.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.5.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.5.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.5.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.5.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.5.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.5.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.5.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.6.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.6.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.6.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.6.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.6.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.6.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.6.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.7.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.7.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.7.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.7.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.7.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.7.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.7.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.8.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.8.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.8.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.8.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.8.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.8.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.8.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.9.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.9.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.9.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.9.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.9.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.9.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.9.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.16.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.17.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.18.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.ffn_norm.weight f32 AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n", + " [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.19.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.20.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.21.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.22.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.23.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.24.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.25.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.26.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.27.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.28.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.29.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.30.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.30.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 277: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 289: blk.31.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 290: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 291: blk.32.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 292: blk.32.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 293: blk.32.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 294: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 295: blk.32.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 296: blk.32.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 297: blk.32.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 298: blk.32.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 299: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 300: blk.33.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 301: blk.33.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 302: blk.33.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 303: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 304: blk.33.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 305: blk.33.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 306: blk.33.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 307: blk.33.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 308: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 309: blk.34.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 310: blk.34.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 311: blk.34.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 312: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 313: blk.34.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 314: blk.34.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 315: blk.34.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 316: blk.34.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 317: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 318: blk.35.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 319: blk.35.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 320: blk.35.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 321: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 322: blk.35.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 323: blk.35.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 324: blk.35.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 325: blk.35.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 326: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 327: blk.36.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 328: blk.36.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 329: blk.36.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 330: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 331: blk.36.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 332: blk.36.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 333: blk.36.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 334: blk.36.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 335: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 336: blk.37.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 337: blk.37.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 338: blk.37.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 339: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 340: blk.37.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 341: blk.37.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 342: blk.37.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 343: blk.37.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 344: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 345: blk.38.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 346: blk.38.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 347: blk.38.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 348: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 349: blk.38.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 350: blk.38.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 351: blk.38.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 352: blk.38.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 353: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 354: blk.39.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 355: blk.39.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 356: blk.39.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 357: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 358: blk.39.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 359: blk.39.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 360: blk.39.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 361: blk.39.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 362: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - kv 0: general.architecture str \n", + "llama_model_loader: - kv 1: general.name str \n", + "llama_model_loader: - kv 2: llama.context_length u32 \n", + "llama_model_loader: - kv 3: llama.embedding_length u32 \n", + "llama_model_loader: - kv 4: llama.block_count u32 \n", + "llama_model_loader: - kv 5: llama.feed_forward_length u32 \n", + "llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n", + "llama_model_loader: - kv 7: llama.attention.head_count u32 \n", + "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n", + "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 10: general.file_type u32 \n", + "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", + "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", + "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", + "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", + "llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 \n", + "llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 \n", + "llama_model_loader: - kv 18: general.quantization_version u32 \n", + "llama_model_loader: - type f32: 81 tensors\n", + "llama_model_loader: - type q6_K: 282 tensors\n", + "llm_load_print_meta: format = GGUF V2 (latest)\n", + "llm_load_print_meta: arch = llama\n", + "llm_load_print_meta: vocab type = SPM\n", + "llm_load_print_meta: n_vocab = 32000\n", + "llm_load_print_meta: n_merges = 0\n", + "llm_load_print_meta: n_ctx_train = 4096\n", + "llm_load_print_meta: n_ctx = 512\n", + "llm_load_print_meta: n_embd = 5120\n", + "llm_load_print_meta: n_head = 40\n", + "llm_load_print_meta: n_head_kv = 40\n", + "llm_load_print_meta: n_layer = 40\n", + "llm_load_print_meta: n_rot = 128\n", + "llm_load_print_meta: n_gqa = 1\n", + "llm_load_print_meta: f_norm_eps = 1.0e-05\n", + "llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n", + "llm_load_print_meta: n_ff = 13824\n", + "llm_load_print_meta: freq_base = 10000.0\n", + "llm_load_print_meta: freq_scale = 1\n", + "llm_load_print_meta: model type = 13B\n", + "llm_load_print_meta: model ftype = mostly Q6_K\n", + "llm_load_print_meta: model size = 13.02 B\n", + "llm_load_print_meta: general.name = LLaMA v2\n", + "llm_load_print_meta: BOS token = 1 ''\n", + "llm_load_print_meta: EOS token = 2 ''\n", + "llm_load_print_meta: UNK token = 0 ''\n", + "llm_load_print_meta: LF token = 13 '<0x0A>'\n", + "llm_load_tensors: ggml ctx size = 0.12 MB\n", + "llm_load_tensors: mem required = 10183.83 MB (+ 400.00 MB per state)\n", + "....................................................................................................\n", + "llama_new_context_with_model: kv self size = 400.00 MB\n", + "llama_new_context_with_model: compute buffer total size = 75.47 MB\n", + "ggml_metal_free: deallocating\n" ] } ], "source": [ - "llm = Llama(model_path=llama_2_13b_chat_path)" + "llm = Llama(model_path=llama_2_path)" ] }, { "cell_type": "code", - "execution_count": 43, - "id": "b9a0d5b2", + "execution_count": 10, + "id": "bcce886c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Llama.generate: prefix-match hit\n", "\n", - "llama_print_timings: load time = 1496.01 ms\n", - "llama_print_timings: sample time = 182.77 ms / 256 runs ( 0.71 ms per token, 1400.66 tokens per second)\n", - "llama_print_timings: prompt eval time = 0.00 ms / 1 tokens ( 0.00 ms per token, inf tokens per second)\n", - "llama_print_timings: eval time = 21947.42 ms / 256 runs ( 85.73 ms per token, 11.66 tokens per second)\n", - "llama_print_timings: total time = 22482.00 ms\n" + "llama_print_timings: load time = 2349.61 ms\n", + "llama_print_timings: sample time = 258.02 ms / 329 runs ( 0.78 ms per token, 1275.08 tokens per second)\n", + "llama_print_timings: prompt eval time = 2349.57 ms / 17 tokens ( 138.21 ms per token, 7.24 tokens per second)\n", + "llama_print_timings: eval time = 44262.75 ms / 328 runs ( 134.95 ms per token, 7.41 tokens per second)\n", + "llama_print_timings: total time = 47359.38 ms\n" ] } ], "source": [ - "output = llm(prompt_example,\n", - " max_tokens=512,\n", - " echo=True)" + "output = llm(prompt_example, max_tokens=512, echo=True)" ] }, { "cell_type": "markdown", - "id": "55489fed", + "id": "80968f48", "metadata": {}, "source": [ "By inspection, we can see that the metal acceleration is faster as expected." @@ -1033,8 +1126,8 @@ }, { "cell_type": "code", - "execution_count": 45, - "id": "243ff1a4", + "execution_count": 11, + "id": "fd921ba0", "metadata": {}, "outputs": [ { @@ -1042,17 +1135,18 @@ "output_type": "stream", "text": [ "Name all the planets in the solar system and state their distances to the sun.\n", + "The eight planets of our solar system are: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. The average distance from each planet to the sun is as follows:\n", "\n", - "There are eight planets in our solar system, which are: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus and Neptune. Here's a list of the planets in order from closest to farthest from the Sun:\n", - "\n", - "1. Mercury - 57,909,227 km (0.38 AU)\n", - "2. Venus - 108,208,930 km (0.72 AU)\n", + "1. Mercury - 57,909,227 km (0.387 AU)\n", + "2. Venus - 108,208,930 km (0.723 AU)\n", "3. Earth - 149,597,890 km (1 AU)\n", - "4. Mars - 226,650,000 km (1.38 AU)\n", - "5. Jupiter - 778,299,000 km (5.2 AU)\n", - "6. Saturn - 1,426,666,400 km (9.5 AU)\n", - "7. Uranus - 2,870,972,200 km (19.2 AU)\n", - "8. Neptune - \n" + "4. Mars - 225,000,000 km (1.381 AU)\n", + "5. Jupiter - 778,299,000 km (5.203 AU)\n", + "6. Saturn - 1,426,666,400 km (8.388 AU)\n", + "7. Uranus - 2,870,972,200 km (19.18 AU)\n", + "8. Neptune - 4,497,072,000 km (30.05 AU)\n", + "\n", + "Note: One astronomical unit (AU) is the average distance between the Earth and the sun, which is about 149,600,000 km or 92,955,800 miles.\n" ] } ], @@ -1062,7 +1156,7 @@ }, { "cell_type": "markdown", - "id": "e0014652", + "id": "c6830776", "metadata": {}, "source": [ "## Using Llama2 in `llama-index`" @@ -1070,405 +1164,405 @@ }, { "cell_type": "code", - "execution_count": 46, - "id": "b45709e0", + "execution_count": 12, + "id": "bfb1fd3b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", - "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.Q6_K.gguf (version GGUF V2 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 2: blk.0.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.1.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.10.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.10.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.10.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.10.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.10.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.10.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.10.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.11.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.11.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.11.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.11.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.11.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.11.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.11.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.12.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.12.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.12.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.12.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.12.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.12.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.12.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.13.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.13.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.13.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.13.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.13.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.13.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.13.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.14.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.14.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.14.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.14.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.14.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.14.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.14.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.15.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.15.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.2.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.2.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.2.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.2.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.2.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.2.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.2.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.3.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.3.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.3.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.3.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.3.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.3.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.3.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.4.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.4.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.4.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.4.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.4.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.4.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.4.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.5.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.5.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.5.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.5.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.5.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.5.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.5.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.6.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.6.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.6.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.6.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.6.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.6.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.6.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.7.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.7.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.7.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.7.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.7.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.7.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.7.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.8.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.8.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.8.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.8.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.8.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.8.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.8.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.9.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.9.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.9.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.9.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.9.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.9.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.9.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.16.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.17.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.18.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.19.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.20.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.21.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.22.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.23.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.24.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.25.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.26.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.27.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.28.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.29.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.30.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.30.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 277: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 289: blk.31.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 290: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 291: blk.32.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 292: blk.32.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 293: blk.32.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 294: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 295: blk.32.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 296: blk.32.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 297: blk.32.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 298: blk.32.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 299: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 300: blk.33.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 301: blk.33.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 302: blk.33.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 303: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 304: blk.33.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 305: blk.33.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 306: blk.33.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 307: blk.33.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 308: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 309: blk.34.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 310: blk.34.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 311: blk.34.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 312: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 313: blk.34.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 314: blk.34.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 315: blk.34.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 316: blk.34.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 317: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 318: blk.35.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 319: blk.35.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 320: blk.35.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 321: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 322: blk.35.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 323: blk.35.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 324: blk.35.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 325: blk.35.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 326: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 327: blk.36.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 328: blk.36.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 329: blk.36.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 330: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 331: blk.36.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 332: blk.36.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 333: blk.36.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 334: blk.36.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 335: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 336: blk.37.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 337: blk.37.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 338: blk.37.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 339: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 340: blk.37.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 341: blk.37.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 342: blk.37.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 343: blk.37.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 344: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 345: blk.38.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 346: blk.38.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 347: blk.38.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 348: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 349: blk.38.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 350: blk.38.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 351: blk.38.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 352: blk.38.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 353: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 354: blk.39.ffn_down.weight q6_K [ 13824, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 355: blk.39.ffn_gate.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 356: blk.39.ffn_up.weight q6_K [ 5120, 13824, 1, 1 ]\n", + "llama_model_loader: - tensor 357: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 358: blk.39.attn_k.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 359: blk.39.attn_output.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 360: blk.39.attn_q.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 361: blk.39.attn_v.weight q6_K [ 5120, 5120, 1, 1 ]\n", + "llama_model_loader: - tensor 362: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", "llama_model_loader: - kv 0: general.architecture str \n", "llama_model_loader: - kv 1: general.name str \n", - "llama_model_loader: - kv 2: general.description str \n", - "llama_model_loader: - kv 3: llama.context_length u32 \n", - "llama_model_loader: - kv 4: llama.embedding_length u32 \n", - "llama_model_loader: - kv 5: llama.block_count u32 \n", - "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", - "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", - "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", - "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", - "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 2: llama.context_length u32 \n", + "llama_model_loader: - kv 3: llama.embedding_length u32 \n", + "llama_model_loader: - kv 4: llama.block_count u32 \n", + "llama_model_loader: - kv 5: llama.feed_forward_length u32 \n", + "llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n", + "llama_model_loader: - kv 7: llama.attention.head_count u32 \n", + "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n", + "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 10: general.file_type u32 \n", "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", - "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", - "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", - "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 \n", + "llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 \n", + "llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 \n", + "llama_model_loader: - kv 18: general.quantization_version u32 \n", "llama_model_loader: - type f32: 81 tensors\n", - "llama_model_loader: - type q4_K: 281 tensors\n", - "llama_model_loader: - type q6_K: 1 tensors\n", + "llama_model_loader: - type q6_K: 282 tensors\n", "llm_load_print_meta: format = GGUF V2 (latest)\n", "llm_load_print_meta: arch = llama\n", "llm_load_print_meta: vocab type = SPM\n", "llm_load_print_meta: n_vocab = 32000\n", "llm_load_print_meta: n_merges = 0\n", - "llm_load_print_meta: n_ctx_train = 2048\n", + "llm_load_print_meta: n_ctx_train = 4096\n", "llm_load_print_meta: n_ctx = 3900\n", "llm_load_print_meta: n_embd = 5120\n", "llm_load_print_meta: n_head = 40\n", @@ -1477,95 +1571,86 @@ "llm_load_print_meta: n_rot = 128\n", "llm_load_print_meta: n_gqa = 1\n", "llm_load_print_meta: f_norm_eps = 1.0e-05\n", - "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", + "llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n", "llm_load_print_meta: n_ff = 13824\n", "llm_load_print_meta: freq_base = 10000.0\n", "llm_load_print_meta: freq_scale = 1\n", "llm_load_print_meta: model type = 13B\n", - "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", + "llm_load_print_meta: model ftype = mostly Q6_K\n", "llm_load_print_meta: model size = 13.02 B\n", - "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", + "llm_load_print_meta: general.name = LLaMA v2\n", "llm_load_print_meta: BOS token = 1 ''\n", "llm_load_print_meta: EOS token = 2 ''\n", "llm_load_print_meta: UNK token = 0 ''\n", "llm_load_print_meta: LF token = 13 '<0x0A>'\n", "llm_load_tensors: ggml ctx size = 0.12 MB\n", - "llm_load_tensors: mem required = 7024.01 MB (+ 3046.88 MB per state)\n", - "...................................................................................................\n", + "llm_load_tensors: mem required = 10183.83 MB (+ 3046.88 MB per state)\n", + "....................................................................................................\n", "llama_new_context_with_model: kv self size = 3046.88 MB\n", "ggml_metal_init: allocating\n", "ggml_metal_init: loading '/Users/rchan/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n", - "ggml_metal_init: loaded kernel_add 0x17796b1b0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_add_row 0x17796ab00 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul 0x17796b860 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_row 0x177962df0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_scale 0x177964200 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_silu 0x177963b50 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_relu 0x177952de0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_gelu 0x17796c190 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_soft_max 0x17796c3f0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_diag_mask_inf 0x17796c650 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_f16 0x17796c8b0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_0 0x17796cb10 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_1 0x17796cd70 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q8_0 0x17796cfd0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q2_K 0x17796d230 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q3_K 0x17796d490 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_K 0x17796d6f0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q5_K 0x17796d950 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q6_K 0x17796dbb0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rms_norm 0x17796de10 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_norm 0x17796e070 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x17796e2d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x17796e530 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x17796e790 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x17796e9f0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x17796ec50 | th_max = 640 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x17796eeb0 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x17796f110 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x17796f370 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x17796f5d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x17796f830 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x17796fa90 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x17796fcf0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x17796ff50 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x1779701b0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x177970410 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x177970670 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x1779708d0 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x177970b30 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rope 0x177970d90 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_alibi_f32 0x177970ff0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f16 0x177971250 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f32 0x1779714b0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f16_f16 0x177971710 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_add 0x1010135f0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_add_row 0x101013a80 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul 0x101013e00 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_row 0x101014290 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_scale 0x101014610 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_silu 0x101014990 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_relu 0x101014d10 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_gelu 0x101015090 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_soft_max 0x1010155a0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_diag_mask_inf 0x101015a60 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_f16 0x101015f70 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_0 0x101016480 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_1 0x101016990 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q8_0 0x101016ea0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q2_K 0x1010173b0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q3_K 0x1010178c0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q4_K 0x101017dd0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q5_K 0x1010182e0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_get_rows_q6_K 0x1010187f0 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rms_norm 0x101018d10 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_norm 0x101019220 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x101019930 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x101019ef0 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x10101a4b0 | th_max = 896 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x10101aa70 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x10101b030 | th_max = 640 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x10101b5f0 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x10101bbb0 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x10101c170 | th_max = 576 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x10101c730 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x10101cd30 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x10101d330 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x10101d930 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x10101df30 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x10101e530 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x10101eb30 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x10101f390 | th_max = 768 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x10101f990 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x10101ff90 | th_max = 704 | th_width = 32\n", + "ggml_metal_init: loaded kernel_rope 0x101020310 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_alibi_f32 0x101020a30 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f16 0x101021120 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f32_f32 0x101021810 | th_max = 1024 | th_width = 32\n", + "ggml_metal_init: loaded kernel_cpy_f16_f16 0x101021f00 | th_max = 1024 | th_width = 32\n", "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n", "ggml_metal_init: hasUnifiedMemory = true\n", - "ggml_metal_init: maxTransferRate = built-in GPU\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "ggml_metal_init: maxTransferRate = built-in GPU\n", "llama_new_context_with_model: compute buffer total size = 356.16 MB\n", "llama_new_context_with_model: max tensor size = 128.17 MB\n", - "ggml_metal_add_buffer: allocated 'data ' buffer, size = 7024.61 MB, (22067.39 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n", - "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, (22068.88 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n", - "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 3048.88 MB, (25117.75 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n", + "ggml_metal_add_buffer: allocated 'data ' buffer, size = 10184.42 MB, (10184.92 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, (10186.41 / 21845.34)\n", + "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 3048.88 MB, (13235.28 / 21845.34)\n", "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n", - "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 354.70 MB, (25472.45 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n", - "ggml_metal_free: deallocating\n" + "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 354.70 MB, (13589.98 / 21845.34)\n" ] } ], "source": [ "llm = LlamaCPP(\n", - " model_path=\"../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin\",\n", + " model_path=llama_2_path,\n", " temperature=0.1,\n", - " max_new_tokens=256,\n", - " # llama2 has a context window of 4096 tokens,\n", - " # but we set it lower to allow for some wiggle room\n", + " max_new_tokens=1024,\n", " context_window=3900,\n", " # kwargs to pass to __call__()\n", " generate_kwargs={},\n", @@ -1581,22 +1666,44 @@ }, { "cell_type": "code", - "execution_count": 56, - "id": "d694eda6", + "execution_count": 13, + "id": "8760d520", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LLMMetadata(context_window=3900, num_output=1024, is_chat_model=False, is_function_calling_model=False, model_name='../../gguf_models/llama-2-13b-chat.Q6_K.gguf')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm.metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "843abeb0", "metadata": {}, "outputs": [], "source": [ "handbook = pd.read_csv(\"../../data/public/handbook-scraped.csv\")\n", - "turing = pd.read_csv(\"../../data/public/turingacuk-no-boilerplate.csv\")\n", + "wiki = pd.read_csv(\"../../data/turing_internal/wiki-scraped.csv\")\n", + "# turing = pd.read_csv(\"../../data/public/turingacuk-no-boilerplate.csv\")\n", "\n", - "text_list = list(handbook[\"body\"].astype(\"str\")) + list(turing[\"body\"].astype(\"str\"))\n", + "text_list = list(handbook[\"body\"].astype(\"str\")) + list(wiki[\"body\"].astype(\"str\"))\n", "documents = [Document(text=t) for t in text_list]" ] }, { "cell_type": "code", - "execution_count": 60, - "id": "99089231", + "execution_count": 15, + "id": "518887f1", "metadata": {}, "outputs": [], "source": [ @@ -1606,21 +1713,46 @@ }, { "cell_type": "code", - "execution_count": 61, - "id": "42d1da70", + "execution_count": 16, + "id": "68b0c63e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "HuggingFaceEmbeddings(client=SentenceTransformer(\n", + " (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel \n", + " (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n", + " (2): Normalize()\n", + "), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hfemb" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "88198eb3", "metadata": {}, "outputs": [], "source": [ "# set number of output tokens\n", - "num_output = 256\n", + "num_output = 1024\n", "# set maximum input size\n", - "max_input_size = 2048\n", + "context_window = 4096\n", "# set maximum chunk overlap\n", - "chunk_size_limit = 1024\n", + "chunk_size_limit = 512\n", "chunk_overlap_ratio = 0.1\n", "\n", "prompt_helper = PromptHelper(\n", - " context_window=max_input_size,\n", + " context_window=context_window,\n", " num_output=num_output,\n", " chunk_size_limit=chunk_size_limit,\n", " chunk_overlap_ratio=chunk_overlap_ratio,\n", @@ -1629,67 +1761,89 @@ }, { "cell_type": "code", - "execution_count": 62, - "id": "da87ad9c", + "execution_count": 18, + "id": "7a2ee69a", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package punkt to\n", - "[nltk_data] /Users/rchan/Library/Caches/llama_index...\n", - "[nltk_data] Unzipping tokenizers/punkt.zip.\n" - ] - } - ], + "outputs": [], "source": [ " service_context = ServiceContext.from_defaults(\n", - " llm_predictor=llm,\n", + " llm_predictor=LLMPredictor(llm=llm),\n", " embed_model=embed_model,\n", " prompt_helper=prompt_helper,\n", - " chunk_size_limit=chunk_size_limit,\n", + " chunk_size=chunk_size_limit,\n", ")\n", "\n", - "index = GPTVectorStoreIndex.from_documents(\n", + "index = VectorStoreIndex.from_documents(\n", " documents, service_context=service_context\n", ")" ] }, { "cell_type": "code", - "execution_count": 63, - "id": "fac67c98", + "execution_count": 19, + "id": "a614a37a", + "metadata": {}, + "outputs": [], + "source": [ + "response_mode = \"simple_summarize\"\n", + "similarity_top_k = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6e6a29f3", "metadata": {}, "outputs": [], "source": [ - "query_engine = index.as_query_engine()" + "query_engine = index.as_query_engine(response_mode=response_mode,\n", + " similarity_top_k=similarity_top_k)" ] }, { "cell_type": "code", - "execution_count": 65, - "id": "4e603ec1", + "execution_count": 21, + "id": "eecc1b2f", "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "'LlamaCPP' object has no attribute 'predict'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[65], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mquery_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwhat should a new starter in REG do?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(response\u001b[38;5;241m.\u001b[39mresponse)\n", - "File \u001b[0;32m~/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_index/indices/query/base.py:23\u001b[0m, in \u001b[0;36mBaseQueryEngine.query\u001b[0;34m(self, str_or_query_bundle)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(str_or_query_bundle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 22\u001b[0m str_or_query_bundle \u001b[38;5;241m=\u001b[39m QueryBundle(str_or_query_bundle)\n\u001b[0;32m---> 23\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstr_or_query_bundle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", - "File \u001b[0;32m~/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_index/query_engine/retriever_query_engine.py:171\u001b[0m, in \u001b[0;36mRetrieverQueryEngine._query\u001b[0;34m(self, query_bundle)\u001b[0m\n\u001b[1;32m 165\u001b[0m nodes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mretrieve(query_bundle)\n\u001b[1;32m 167\u001b[0m retrieve_event\u001b[38;5;241m.\u001b[39mon_end(\n\u001b[1;32m 168\u001b[0m payload\u001b[38;5;241m=\u001b[39m{EventPayload\u001b[38;5;241m.\u001b[39mNODES: nodes},\n\u001b[1;32m 169\u001b[0m )\n\u001b[0;32m--> 171\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_response_synthesizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msynthesize\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 172\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_bundle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 173\u001b[0m \u001b[43m \u001b[49m\u001b[43mnodes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnodes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 176\u001b[0m query_event\u001b[38;5;241m.\u001b[39mon_end(payload\u001b[38;5;241m=\u001b[39m{EventPayload\u001b[38;5;241m.\u001b[39mRESPONSE: response})\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", - "File \u001b[0;32m~/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_index/response_synthesizers/base.py:125\u001b[0m, in \u001b[0;36mBaseSynthesizer.synthesize\u001b[0;34m(self, query, nodes, additional_source_nodes)\u001b[0m\n\u001b[1;32m 120\u001b[0m query \u001b[38;5;241m=\u001b[39m QueryBundle(query_str\u001b[38;5;241m=\u001b[39mquery)\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_callback_manager\u001b[38;5;241m.\u001b[39mevent(\n\u001b[1;32m 123\u001b[0m CBEventType\u001b[38;5;241m.\u001b[39mSYNTHESIZE, payload\u001b[38;5;241m=\u001b[39m{EventPayload\u001b[38;5;241m.\u001b[39mQUERY_STR: query\u001b[38;5;241m.\u001b[39mquery_str}\n\u001b[1;32m 124\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m event:\n\u001b[0;32m--> 125\u001b[0m response_str \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_response\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[43m \u001b[49m\u001b[43mtext_chunks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 128\u001b[0m \u001b[43m \u001b[49m\u001b[43mn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_content\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmetadata_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mMetadataMode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mLLM\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mnodes\u001b[49m\n\u001b[1;32m 129\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 130\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m additional_source_nodes \u001b[38;5;241m=\u001b[39m additional_source_nodes \u001b[38;5;129;01mor\u001b[39;00m []\n\u001b[1;32m 133\u001b[0m source_nodes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(nodes) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(additional_source_nodes)\n", - "File \u001b[0;32m~/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_index/response_synthesizers/compact_and_refine.py:34\u001b[0m, in \u001b[0;36mCompactAndRefine.get_response\u001b[0;34m(self, query_str, text_chunks, **response_kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# use prompt helper to fix compact text_chunks under the prompt limitation\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# TODO: This is a temporary fix - reason it's temporary is that\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# the refine template does not account for size of previous answer.\u001b[39;00m\n\u001b[1;32m 33\u001b[0m new_texts \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_compact_text_chunks(query_str, text_chunks)\n\u001b[0;32m---> 34\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_response\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_chunks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_texts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kwargs\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", - "File \u001b[0;32m~/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_index/response_synthesizers/refine.py:120\u001b[0m, in \u001b[0;36mRefine.get_response\u001b[0;34m(self, query_str, text_chunks, **response_kwargs)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m text_chunk \u001b[38;5;129;01min\u001b[39;00m text_chunks:\n\u001b[1;32m 117\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m prev_response_obj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 118\u001b[0m \u001b[38;5;66;03m# if this is the first chunk, and text chunk already\u001b[39;00m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;66;03m# is an answer, then return it\u001b[39;00m\n\u001b[0;32m--> 120\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_give_response_single\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 121\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 122\u001b[0m \u001b[43m \u001b[49m\u001b[43mtext_chunk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 123\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 125\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_refine_response_single(\n\u001b[1;32m 126\u001b[0m prev_response_obj, query_str, text_chunk\n\u001b[1;32m 127\u001b[0m )\n", - "File \u001b[0;32m~/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_index/response_synthesizers/refine.py:177\u001b[0m, in \u001b[0;36mRefine._give_response_single\u001b[0;34m(self, query_str, text_chunk, **response_kwargs)\u001b[0m\n\u001b[1;32m 174\u001b[0m query_satisfied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_streaming:\n\u001b[1;32m 176\u001b[0m structured_response \u001b[38;5;241m=\u001b[39m cast(\n\u001b[0;32m--> 177\u001b[0m StructuredRefineResponse, \u001b[43mprogram\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcontext_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcur_text_chunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 178\u001b[0m )\n\u001b[1;32m 179\u001b[0m query_satisfied \u001b[38;5;241m=\u001b[39m structured_response\u001b[38;5;241m.\u001b[39mquery_satisfied\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m query_satisfied:\n", - "File \u001b[0;32m~/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_index/response_synthesizers/refine.py:60\u001b[0m, in \u001b[0;36mDefaultRefineProgram.__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m StructuredRefineResponse:\n\u001b[0;32m---> 60\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_llm_predictor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m(\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prompt,\n\u001b[1;32m 62\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds,\n\u001b[1;32m 63\u001b[0m )\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m StructuredRefineResponse(answer\u001b[38;5;241m=\u001b[39manswer, query_satisfied\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "\u001b[0;31mAttributeError\u001b[0m: 'LlamaCPP' object has no attribute 'predict'" + "name": "stdout", + "output_type": "stream", + "text": [ + " As a new starter in REG, you should:\n", + "\n", + "1. Attend buddy meetings with your assigned buddies to get familiarized with the team and ask any questions you may have.\n", + "2. Attend HR induction and IT induction meetings to discuss general information such as pay, health, leaves, benefits, and accounts.\n", + "3. Meet with your line manager to discuss your role and responsibilities.\n", + "4. Shadow meetings across the group to get a feel for how REG works and meet people.\n", + "5. Complete all necessary forms on Cezanne, including personal details, bank details, NI Health form, Additional characteristics form, and signed and dated scanned contract.\n", + "6. Upload your photo to the Documents section on Cezanne.\n", + "7. Request a British Library pass to access the office.\n", + "8. Complete Agenda screening (if you haven't already done so).\n", + "9. Read about health and dental insurance and decide whether to sign up.\n", + "10. Check the Turing Benefits site for useful discounts.\n", + "11. Provide a description for your public profile on the Turing website.\n", + "12. Verify your MoorePay account for payslips.\n", + "\n", + "It is also recommended that you:\n", + "\n", + "* Join in for welcome coffee(s) to introduce yourself to the whole REG team.\n", + "* Attend 1-on-1 meetings with REG's Director within the first few weeks.\n", + "* Use the time before being assigned to a project to set up your laptop and tools, get to know people, read the handbook and internal wiki, and shadow meetings.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 254.43 ms / 363 runs ( 0.70 ms per token, 1426.70 tokens per second)\n", + "llama_print_timings: prompt eval time = 29778.92 ms / 1296 tokens ( 22.98 ms per token, 43.52 tokens per second)\n", + "llama_print_timings: eval time = 41385.82 ms / 362 runs ( 114.33 ms per token, 8.75 tokens per second)\n", + "llama_print_timings: total time = 71899.77 ms\n" ] } ], @@ -1699,100 +1853,1215 @@ ] }, { - "cell_type": "markdown", - "id": "9e301002", + "cell_type": "code", + "execution_count": 22, + "id": "5a338a7b", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Chat engine" + "len(response.source_nodes)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f4a6b68d", + "execution_count": 23, + "id": "b4b62bea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Based on the provided context information, the starting salary for a standard RSE in REG would be £40,000. This is the bottom third baseline for the Standard role in the 2023-24 Institute-wide HERA Bands.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 42.72 ms / 61 runs ( 0.70 ms per token, 1427.84 tokens per second)\n", + "llama_print_timings: prompt eval time = 49457.50 ms / 1809 tokens ( 27.34 ms per token, 36.58 tokens per second)\n", + "llama_print_timings: eval time = 7267.23 ms / 60 runs ( 121.12 ms per token, 8.26 tokens per second)\n", + "llama_print_timings: total time = 56845.16 ms\n" + ] + } + ], "source": [ - "chat_engine = index.as_chat_engine(chat_mode=\"react\", verbose=True)" + "response = query_engine.query(\"What is the starting salary for a standard RSE in REG?\")\n", + "print(response.response)" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "5f8ce782", + "cell_type": "markdown", + "id": "d4212724", "metadata": {}, - "outputs": [], "source": [ - "response = chat_engine.chat(\n", - " \"what should a new starter in REG do?\"\n", - ")" + "## Context chat engine" ] }, { "cell_type": "code", - "execution_count": null, - "id": "351013fc", + "execution_count": 24, + "id": "dedcf17d", "metadata": {}, "outputs": [], "source": [ - "print(response)" + "system_prompt=(\n", + " \"You are a helpful assistant, able to have normal interactions, \"\n", + " # \"as well as talk about the Research Engineering Group (REG) \"\n", + " # \"and The Alan Turing Institute based on the context provided. \"\n", + " \"Please answer questions with the context provided if it is relevant. \"\n", + " \"If the context is not related to the question or message, answer normally. \"\n", + " \"Do not speculate or make up information. \"\n", + " \"Do not reference any given instructions or context. \"\n", + " \"Do not thank me for any additional context. \"\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "20449087", + "execution_count": 25, + "id": "647ab6db", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'You are a helpful assistant, able to have normal interactions, Please answer questions with the context provided if it is relevant. If the context is not related to the question or message, answer normally. Do not speculate or make up information. Do not reference any given instructions or context. Do not thank me for any additional context. '" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "response = chat_engine.chat(\"What did I ask you before?\")" + "system_prompt" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f1a2bab4", + "execution_count": 26, + "id": "5124242d", "metadata": {}, "outputs": [], "source": [ - "print(response)" + "chat_engine = index.as_chat_engine(\n", + " chat_mode=\"context\",\n", + " response_mode=response_mode,\n", + " similarity_top_k=similarity_top_k,\n", + " system_prompt=system_prompt,\n", + ")" ] }, { - "cell_type": "markdown", - "id": "0327d628", + "cell_type": "code", + "execution_count": 27, + "id": "5732405b", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ChatMessage(role=, content='You are a helpful assistant, able to have normal interactions, Please answer questions with the context provided if it is relevant. If the context is not related to the question or message, answer normally. Do not speculate or make up information. Do not reference any given instructions or context. Do not thank me for any additional context. ', additional_kwargs={})]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "Reset chat engine..." + "chat_engine._prefix_messages" ] }, { "cell_type": "code", - "execution_count": null, - "id": "b055a7ef", + "execution_count": 28, + "id": "81b31c72", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + ".wrap..wrapped_llm_chat of LlamaCPP(callback_manager=, model_url='https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin', model_path='../../gguf_models/llama-2-13b-chat.Q6_K.gguf', temperature=0.1, max_new_tokens=1024, context_window=3900, messages_to_prompt=, completion_to_prompt=, generate_kwargs={'temperature': 0.1, 'max_tokens': 1024, 'stream': False}, model_kwargs={'n_gpu_layers': 1, 'n_ctx': 3900, 'verbose': True}, verbose=True)>" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "chat_engine.reset()" + "chat_engine._llm.chat" ] }, { "cell_type": "code", - "execution_count": null, - "id": "a86a24cd", + "execution_count": 29, + "id": "10f1a940-38f9-476e-9db9-4a48afd02792", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Hello! I'm here to help with any questions you have. What would you like to know or discuss? Please keep in mind that I can only provide information based on the context provided, so if your question is not related to the context, I may not be able to assist. Go ahead and ask away!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 46.23 ms / 66 runs ( 0.70 ms per token, 1427.61 tokens per second)\n", + "llama_print_timings: prompt eval time = 9458.20 ms / 536 tokens ( 17.65 ms per token, 56.67 tokens per second)\n", + "llama_print_timings: eval time = 6521.28 ms / 65 runs ( 100.33 ms per token, 9.97 tokens per second)\n", + "llama_print_timings: total time = 16106.92 ms\n" + ] + } + ], "source": [ - "response = chat_engine.chat(\"What did I ask you before?\")" + "response = chat_engine.chat(\n", + " \"hello\"\n", + ")\n", + "print(response)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "d00949a5", + "execution_count": 30, + "id": "f81ee28c", "metadata": {}, - "outputs": [], - "source": [ + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Based on the provided context, here are some tasks that a new starter in REG might consider doing:\n", + "\n", + "1. Meet with their buddies: REG provides two buddies for each new starter to be friendly points of contact and help with any questions or issues.\n", + "2. Attend HR induction: HR will set up a meeting with the new starter to discuss general information such as pay, health, leaves, and benefits.\n", + "3. Attend IT induction: IT will meet the new starter to discuss accounts and Turing wide systems.\n", + "4. Shadow meetings: REG offers new starters the opportunity to shadow meetings across the group to meet people and get a feel for how they work.\n", + "5. Complete Agenda screening: HR requires all new starters to complete an Agenda screening report before starting.\n", + "6. Upload personal details and documents to Cezanne: New starters should enter specific personal details, such as DOB, home address, emergency contact details, and bank details, and upload a photo to the Documents section on Cezanne.\n", + "7. Complete and reupload in Cezanne documents area: New starters should complete and reupload in Cezanne documents area \"BL partners - Health, Safety and Security\" form and HMRC new starters form.\n", + "8. Signed and dated scanned contract: New starters should sign and date their scanned contract and upload it to Cezanne.\n", + "9. British Library pass: New starters should complete and reupload in Cezanne documents area \"BL partners - Health, Safety and Security\" form to get a British Library pass.\n", + "10. Read about health and dental insurance: REG provides health and dental insurance options, and new starters should read about them and decide whether to sign up.\n", + "11. Check the Turing Benefits site: The Turing Benefits site offers useful discounts, and new starters should check it to see if there are any discounts they can take advantage of.\n", + "12. Send P45 from previous job to HR contact directly by email: New starters should send their P45 from their previous job to the HR contact directly by email.\n", + "13. Provide a description for the ATI webpage: New starters should provide a description for their public profile on the Turing website.\n", + "14. Verify your MoorePay account: New starters should verify their MoorePay account to receive payslips.\n", + "\n", + "Please note that this list is not exhaustive and may not include all tasks that a new starter in REG might need to do. It's always a good idea to check with the person in charge of onboarding or your line manager for specific instructions tailored to your needs.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 423.75 ms / 603 runs ( 0.70 ms per token, 1423.00 tokens per second)\n", + "llama_print_timings: prompt eval time = 31744.09 ms / 1359 tokens ( 23.36 ms per token, 42.81 tokens per second)\n", + "llama_print_timings: eval time = 70752.81 ms / 602 runs ( 117.53 ms per token, 8.51 tokens per second)\n", + "llama_print_timings: total time = 103788.19 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"what should a new starter in REG do?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "1fcc61d1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Great, it sounds like you have already taken care of some of the essential tasks for new starters in REG! Here are a few more things you might consider doing:\n", + "\n", + "1. Familiarize yourself with the REG handbook: The REG handbook provides an overview of how REG operates, including information on projects, service areas, and 22 days time.\n", + "2. Explore the project tracker: The project tracker is a tool used to express preferences on upcoming projects and track progress. You can browse the tracker to get an idea of what REG is working on and express your interests in specific projects.\n", + "3. Join the #new-starters Slack channel: This channel is a great place to connect with other new starters and ask questions or share information.\n", + "4. Attend tech talks: REG runs tech talks every Tuesday lunchtime, which cover a range of topics related to research and technology. You can find the upcoming topics on the REG Tech Talks Slack channel.\n", + "5. Check out the Turing Data Stories (TDS) repository: TDS is a separate workspace for small projects that may be of interest to new starters. You can find more information about TDS and how to get involved in the TDS Slack channel.\n", + "6. Consider contributing to service areas: Service areas are REG-internal work, such as looking after the handbook, organizing recruitment, or managing computational resources. You may want to consider contributing to one service area, which should take approximately half a day a week.\n", + "7. Learn about the Turing Way: The Turing Way is a set of principles and practices that guide REG's work. You can find more information about the Turing Way on the Turing Complete website.\n", + "8. Network with other REG members: Connecting with other REG members can be a great way to learn more about the group and find opportunities for collaboration. You can find contact information for other REG members on the REG website or by reaching out to your line manager or buddies.\n", + "\n", + "Remember, these are just suggestions, and you should prioritize tasks based on your own needs and interests. Your line manager or buddies may also have specific tasks or recommendations for you to consider.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 353.78 ms / 505 runs ( 0.70 ms per token, 1427.43 tokens per second)\n", + "llama_print_timings: prompt eval time = 71155.50 ms / 2339 tokens ( 30.42 ms per token, 32.87 tokens per second)\n", + "llama_print_timings: eval time = 66587.39 ms / 504 runs ( 132.12 ms per token, 7.57 tokens per second)\n", + "llama_print_timings: total time = 138794.64 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"I've already completed my inductions and uploaded my \"\n", + " \"documents for Cezanne and the ATI website, what else is there to do?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "23f92e2c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " You're welcome! The link to the REG project tracker is:\n", + "\n", + "\n", + "\n", + "This page displays all upcoming, current, and completed projects in REG, along with information about each project's status and the issue number in the Hut23 repository. You can use this tracker to express preferences on upcoming projects and track progress.\n", + "\n", + "Please note that you may need to log in to access some of the links or features on the project tracker page. If you have any questions or need help finding something, feel free to reach out to your line manager or buddies for assistance.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 113.80 ms / 162 runs ( 0.70 ms per token, 1423.60 tokens per second)\n", + "llama_print_timings: prompt eval time = 79735.58 ms / 2512 tokens ( 31.74 ms per token, 31.50 tokens per second)\n", + "llama_print_timings: eval time = 21263.09 ms / 161 runs ( 132.07 ms per token, 7.57 tokens per second)\n", + "llama_print_timings: total time = 101326.02 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"thanks! what is the link to the project tracker?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "b26a7b4f-4bc2-4c09-816d-07cf9bf89c8b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Oh dear! It looks like I made a mistake. Thank you for letting me know.\n", + "\n", + "The REG project tracker is indeed located in the Hut23 GitHub repository, and you can access it by following these steps:\n", + "\n", + "1. Go to the Hut23 GitHub repository: \n", + "2. Click on the \"Projects\" tab in the top navigation menu.\n", + "3. You will see a list of all upcoming, current, and completed projects in REG, along with information about each project's status and the issue number in the Hut23 repository.\n", + "\n", + "You can use this tracker to express preferences on upcoming projects and track progress. If you have any questions or need help finding something, feel free to reach out to your line manager or buddies for assistance.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 133.99 ms / 191 runs ( 0.70 ms per token, 1425.46 tokens per second)\n", + "llama_print_timings: prompt eval time = 92744.62 ms / 2632 tokens ( 35.24 ms per token, 28.38 tokens per second)\n", + "llama_print_timings: eval time = 26045.85 ms / 190 runs ( 137.08 ms per token, 7.29 tokens per second)\n", + "llama_print_timings: total time = 119174.93 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"that link doesn't seem to be right. the project tracker is in the Hut23 GitHub repo\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "052d380f-6219-4756-82ec-8ca9d63fadd1", + "metadata": {}, + "source": [ + "Ran out of context length after this." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "daff22ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ChatMessage(role=, content='hello', additional_kwargs={}),\n", + " ChatMessage(role=, content=\" Hello! I'm here to help with any questions you have. What would you like to know or discuss? Please keep in mind that I can only provide information based on the context provided, so if your question is not related to the context, I may not be able to assist. Go ahead and ask away!\", additional_kwargs={}),\n", + " ChatMessage(role=, content='what should a new starter in REG do?', additional_kwargs={}),\n", + " ChatMessage(role=, content=' Based on the provided context, here are some tasks that a new starter in REG might consider doing:\\n\\n1. Meet with their buddies: REG provides two buddies for each new starter to be friendly points of contact and help with any questions or issues.\\n2. Attend HR induction: HR will set up a meeting with the new starter to discuss general information such as pay, health, leaves, and benefits.\\n3. Attend IT induction: IT will meet the new starter to discuss accounts and Turing wide systems.\\n4. Shadow meetings: REG offers new starters the opportunity to shadow meetings across the group to meet people and get a feel for how they work.\\n5. Complete Agenda screening: HR requires all new starters to complete an Agenda screening report before starting.\\n6. Upload personal details and documents to Cezanne: New starters should enter specific personal details, such as DOB, home address, emergency contact details, and bank details, and upload a photo to the Documents section on Cezanne.\\n7. Complete and reupload in Cezanne documents area: New starters should complete and reupload in Cezanne documents area \"BL partners - Health, Safety and Security\" form and HMRC new starters form.\\n8. Signed and dated scanned contract: New starters should sign and date their scanned contract and upload it to Cezanne.\\n9. British Library pass: New starters should complete and reupload in Cezanne documents area \"BL partners - Health, Safety and Security\" form to get a British Library pass.\\n10. Read about health and dental insurance: REG provides health and dental insurance options, and new starters should read about them and decide whether to sign up.\\n11. Check the Turing Benefits site: The Turing Benefits site offers useful discounts, and new starters should check it to see if there are any discounts they can take advantage of.\\n12. Send P45 from previous job to HR contact directly by email: New starters should send their P45 from their previous job to the HR contact directly by email.\\n13. Provide a description for the ATI webpage: New starters should provide a description for their public profile on the Turing website.\\n14. Verify your MoorePay account: New starters should verify their MoorePay account to receive payslips.\\n\\nPlease note that this list is not exhaustive and may not include all tasks that a new starter in REG might need to do. It\\'s always a good idea to check with the person in charge of onboarding or your line manager for specific instructions tailored to your needs.', additional_kwargs={}),\n", + " ChatMessage(role=, content=\"I've already completed my inductions and uploaded my documents for Cezanne and the ATI website, what else is there to do?\", additional_kwargs={}),\n", + " ChatMessage(role=, content=\" Great, it sounds like you have already taken care of some of the essential tasks for new starters in REG! Here are a few more things you might consider doing:\\n\\n1. Familiarize yourself with the REG handbook: The REG handbook provides an overview of how REG operates, including information on projects, service areas, and 22 days time.\\n2. Explore the project tracker: The project tracker is a tool used to express preferences on upcoming projects and track progress. You can browse the tracker to get an idea of what REG is working on and express your interests in specific projects.\\n3. Join the #new-starters Slack channel: This channel is a great place to connect with other new starters and ask questions or share information.\\n4. Attend tech talks: REG runs tech talks every Tuesday lunchtime, which cover a range of topics related to research and technology. You can find the upcoming topics on the REG Tech Talks Slack channel.\\n5. Check out the Turing Data Stories (TDS) repository: TDS is a separate workspace for small projects that may be of interest to new starters. You can find more information about TDS and how to get involved in the TDS Slack channel.\\n6. Consider contributing to service areas: Service areas are REG-internal work, such as looking after the handbook, organizing recruitment, or managing computational resources. You may want to consider contributing to one service area, which should take approximately half a day a week.\\n7. Learn about the Turing Way: The Turing Way is a set of principles and practices that guide REG's work. You can find more information about the Turing Way on the Turing Complete website.\\n8. Network with other REG members: Connecting with other REG members can be a great way to learn more about the group and find opportunities for collaboration. You can find contact information for other REG members on the REG website or by reaching out to your line manager or buddies.\\n\\nRemember, these are just suggestions, and you should prioritize tasks based on your own needs and interests. Your line manager or buddies may also have specific tasks or recommendations for you to consider.\", additional_kwargs={}),\n", + " ChatMessage(role=, content='thanks! what is the link to the project tracker?', additional_kwargs={}),\n", + " ChatMessage(role=, content=\" You're welcome! The link to the REG project tracker is:\\n\\n\\n\\nThis page displays all upcoming, current, and completed projects in REG, along with information about each project's status and the issue number in the Hut23 repository. You can use this tracker to express preferences on upcoming projects and track progress.\\n\\nPlease note that you may need to log in to access some of the links or features on the project tracker page. If you have any questions or need help finding something, feel free to reach out to your line manager or buddies for assistance.\", additional_kwargs={}),\n", + " ChatMessage(role=, content=\"that link doesn't seem to be right. the project tracker is in the Hut23 GitHub repo\", additional_kwargs={}),\n", + " ChatMessage(role=, content=' Oh dear! It looks like I made a mistake. Thank you for letting me know.\\n\\nThe REG project tracker is indeed located in the Hut23 GitHub repository, and you can access it by following these steps:\\n\\n1. Go to the Hut23 GitHub repository: \\n2. Click on the \"Projects\" tab in the top navigation menu.\\n3. You will see a list of all upcoming, current, and completed projects in REG, along with information about each project\\'s status and the issue number in the Hut23 repository.\\n\\nYou can use this tracker to express preferences on upcoming projects and track progress. If you have any questions or need help finding something, feel free to reach out to your line manager or buddies for assistance.', additional_kwargs={})]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chat_engine.chat_history" + ] + }, + { + "cell_type": "markdown", + "id": "c9fabedd-5925-420b-8f08-0b795c37de2a", + "metadata": {}, + "source": [ + "## Context chat example for obtaining starting salary" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "68bb3d04-5e75-494f-889e-b58429ef5d0c", + "metadata": {}, + "outputs": [], + "source": [ + "chat_engine = index.as_chat_engine(\n", + " chat_mode=\"context\",\n", + " response_mode=response_mode,\n", + " similarity_top_k=similarity_top_k,\n", + " system_prompt=system_prompt,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "162878b8-c074-4fba-814e-1ef4ee35d37b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Based on the provided context, the starting salary for a Standard RSE in REG is £40,000. This is the bottom third baseline for the Standard band, which is £40,000 - £48,491.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 41.39 ms / 59 runs ( 0.70 ms per token, 1425.47 tokens per second)\n", + "llama_print_timings: prompt eval time = 50427.23 ms / 1866 tokens ( 27.02 ms per token, 37.00 tokens per second)\n", + "llama_print_timings: eval time = 7028.91 ms / 58 runs ( 121.19 ms per token, 8.25 tokens per second)\n", + "llama_print_timings: total time = 57572.15 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"what is the starting salary for a standard RSE in REG?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "dcf979f0-e91e-4b40-b8d9-82e5836e6d70", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Yes, the starting salary of £40,000 is for the 2023/24 financial year, as mentioned in the context. The cost of living increase for 2023 is 5%, and the cumulative cost of living increase from 2020 to 2023 is 20.74%.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 55.32 ms / 79 runs ( 0.70 ms per token, 1427.95 tokens per second)\n", + "llama_print_timings: prompt eval time = 34095.64 ms / 1427 tokens ( 23.89 ms per token, 41.85 tokens per second)\n", + "llama_print_timings: eval time = 8928.90 ms / 78 runs ( 114.47 ms per token, 8.74 tokens per second)\n", + "llama_print_timings: total time = 43178.32 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"is that for 2023/24?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "8e723487-eacc-48f6-8643-a67bc5f8fa4c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Certainly! Here's the table for the 2023/24 salaries based on the provided context:\n", + "\n", + "| Role | Band | Role Salary Min | Role Salary Max | Bottom Third Baseline | Middle Third Baseline | Top Third Baseline |\n", + "| --- | --- | --- | --- | --- | --- | --- |\n", + "| Principal | 6 | £73,526 | £84,488 | £73,526 | £77,180 | £80,834 |\n", + "| Lead | 5 | £62,666 | £73,297 | £62,666 | £66,210 | £69,754 |\n", + "| Senior | 4 | £51,476 | £62,108 | £51,476 | £55,020 | £58,564 |\n", + "| Standard | 3b* | £42,000 | £50,916 | £42,000 | £44,972 | £47,944 |\n", + "| Junior | 3a* | £38,048 | £39,900 | £38,048 | £38,665 | £39,283 |\n", + "\n", + "Note that the table only shows the salary ranges for the REG roles, as the other roles are not relevant to the context. Also, the bottom third baseline is the starting salary for a new hire in the role, while the middle and top third baselines represent the salary progression for existing employees based on their performance and experience.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 282.04 ms / 396 runs ( 0.71 ms per token, 1404.07 tokens per second)\n", + "llama_print_timings: prompt eval time = 59088.33 ms / 2057 tokens ( 28.73 ms per token, 34.81 tokens per second)\n", + "llama_print_timings: eval time = 50284.80 ms / 395 runs ( 127.30 ms per token, 7.86 tokens per second)\n", + "llama_print_timings: total time = 110246.08 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"can you show me the table for the 2023/24 salaries?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "605acfbd", + "metadata": {}, + "source": [ + "## \"React\" chat engine" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "8be62e6f", + "metadata": {}, + "outputs": [], + "source": [ + "chat_engine = index.as_chat_engine(chat_mode=\"react\",\n", + " response_mode=response_mode,\n", + " similarity_top_k=similarity_top_k,\n", + " verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "6f55aa2f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", + "Action: query_engine_tool\n", + "Action Input: {'input': 'What should a new starter in REG do?'}\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 39.60 ms / 56 runs ( 0.71 ms per token, 1414.21 tokens per second)\n", + "llama_print_timings: prompt eval time = 7363.70 ms / 441 tokens ( 16.70 ms per token, 59.89 tokens per second)\n", + "llama_print_timings: eval time = 5465.05 ms / 55 runs ( 99.36 ms per token, 10.06 tokens per second)\n", + "llama_print_timings: total time = 12942.65 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36;1m\u001b[1;3mObservation: As a new starter in REG, you should:\n", + "\n", + "1. Attend buddy meetings with your assigned buddies to get familiarized with the team and ask any questions you may have.\n", + "2. Attend HR induction and IT induction meetings to complete necessary paperwork and set up accounts.\n", + "3. Meet with your line manager to discuss your role, responsibilities, and project assignments.\n", + "4. Shadow meetings across the group to get a feel for how REG works and meet people.\n", + "5. Complete all necessary forms and tasks on Cezanne, including updating personal details, completing health and safety forms, and signing the \"Right to Work\" document.\n", + "6. Request a British Library pass to access the office.\n", + "7. Read about health and dental insurance options and decide whether to sign up.\n", + "8. Check the Turing Benefits site for discounts and benefits.\n", + "9. Provide a description for your profile on the ATI website.\n", + "10. Verify your MoorePay account for payslips.\n", + "\n", + "It is also recommended that you:\n", + "\n", + "* Join in for welcome coffee(s) to introduce yourself to the whole REG team.\n", + "* Attend 1-on-1 meetings with REG's Director within the first few weeks of starting.\n", + "* Use the first few days to set up your laptop and tools, get familiarized with the internal wiki and handbook, and shadow meetings.\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 228.08 ms / 320 runs ( 0.71 ms per token, 1403.03 tokens per second)\n", + "llama_print_timings: prompt eval time = 29339.14 ms / 1282 tokens ( 22.89 ms per token, 43.70 tokens per second)\n", + "llama_print_timings: eval time = 36476.99 ms / 319 runs ( 114.35 ms per token, 8.75 tokens per second)\n", + "llama_print_timings: total time = 66512.02 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: As a new starter in REG, you should attend buddy meetings with your assigned buddies to get familiarized with the team and ask any questions you may have. You should also attend HR induction and IT induction meetings, meet with your line manager to discuss your role and project assignments, shadow meetings across the group, complete all necessary forms and tasks on Cezanne, request a British Library pass, read about health and dental insurance options, check the Turing Benefits site for discounts and benefits, provide a description for your profile on the ATI website, and verify your MoorePay account for payslips. Additionally, you should join in for welcome coffee(s) to introduce yourself to the whole REG team, attend 1-on-1 meetings with REG's Director within the first few weeks of starting, and use the first few days to set up your laptop and tools, get familiarized with the internal wiki and handbook, and shadow meetings.\n", + "\u001b[0mAs a new starter in REG, you should attend buddy meetings with your assigned buddies to get familiarized with the team and ask any questions you may have. You should also attend HR induction and IT induction meetings, meet with your line manager to discuss your role and project assignments, shadow meetings across the group, complete all necessary forms and tasks on Cezanne, request a British Library pass, read about health and dental insurance options, check the Turing Benefits site for discounts and benefits, provide a description for your profile on the ATI website, and verify your MoorePay account for payslips. Additionally, you should join in for welcome coffee(s) to introduce yourself to the whole REG team, attend 1-on-1 meetings with REG's Director within the first few weeks of starting, and use the first few days to set up your laptop and tools, get familiarized with the internal wiki and handbook, and shadow meetings.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 161.03 ms / 230 runs ( 0.70 ms per token, 1428.29 tokens per second)\n", + "llama_print_timings: prompt eval time = 15762.62 ms / 817 tokens ( 19.29 ms per token, 51.83 tokens per second)\n", + "llama_print_timings: eval time = 24342.82 ms / 229 runs ( 106.30 ms per token, 9.41 tokens per second)\n", + "llama_print_timings: total time = 40567.23 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"what should a new starter in REG do?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a82115a1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: You asked me: what should a new starter in REG do?\n", + "\u001b[0m You asked me: what should a new starter in REG do?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 11.91 ms / 17 runs ( 0.70 ms per token, 1426.89 tokens per second)\n", + "llama_print_timings: prompt eval time = 4737.11 ms / 231 tokens ( 20.51 ms per token, 48.76 tokens per second)\n", + "llama_print_timings: eval time = 1635.97 ms / 16 runs ( 102.25 ms per token, 9.78 tokens per second)\n", + "llama_print_timings: total time = 6406.09 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"What did I ask you before?\")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "f7dd6b2e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: No, I have not used the query engine yet.\n", + "\u001b[0m No, I have not used the query engine yet.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 9.11 ms / 13 runs ( 0.70 ms per token, 1427.00 tokens per second)\n", + "llama_print_timings: prompt eval time = 980.09 ms / 36 tokens ( 27.22 ms per token, 36.73 tokens per second)\n", + "llama_print_timings: eval time = 1232.60 ms / 12 runs ( 102.72 ms per token, 9.74 tokens per second)\n", + "llama_print_timings: total time = 2237.77 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"Have you used the query engine yet?\")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "7a424919", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: You have asked me:\n", + "\n", + "1. What should a new starter in REG do?\n", + "2. Have you used the query engine yet?\n", + "\u001b[0m You have asked me:\n", + "\n", + "1. What should a new starter in REG do?\n", + "2. Have you used the query engine yet?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 23.11 ms / 33 runs ( 0.70 ms per token, 1427.83 tokens per second)\n", + "llama_print_timings: prompt eval time = 694.77 ms / 32 tokens ( 21.71 ms per token, 46.06 tokens per second)\n", + "llama_print_timings: eval time = 3312.30 ms / 32 runs ( 103.51 ms per token, 9.66 tokens per second)\n", + "llama_print_timings: total time = 4071.13 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"What have I asked you so far?\")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "5ebd4646", + "metadata": {}, + "source": [ + "Reset chat engine..." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "d27686bb", + "metadata": {}, + "outputs": [], + "source": [ + "chat_engine.reset()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "f67d46e6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", + "Action: query_engine_tool\n", + "Action Input: {'input': 'What did I ask you before?'}\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 42.02 ms / 60 runs ( 0.70 ms per token, 1427.76 tokens per second)\n", + "llama_print_timings: prompt eval time = 382.90 ms / 11 tokens ( 34.81 ms per token, 28.73 tokens per second)\n", + "llama_print_timings: eval time = 5846.63 ms / 59 runs ( 99.10 ms per token, 10.09 tokens per second)\n", + "llama_print_timings: total time = 6345.72 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36;1m\u001b[1;3mObservation: Based on the current context information provided, you have not asked me any questions before.\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 13.35 ms / 19 runs ( 0.70 ms per token, 1423.54 tokens per second)\n", + "llama_print_timings: prompt eval time = 1406.31 ms / 102 tokens ( 13.79 ms per token, 72.53 tokens per second)\n", + "llama_print_timings: eval time = 1687.97 ms / 18 runs ( 93.78 ms per token, 10.66 tokens per second)\n", + "llama_print_timings: total time = 3130.96 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mThought: Hmm, that's correct. Let me try again.\n", + "Action: query_engine_tool\n", + "Action Input: {'input': 'What is the purpose of this conversation?'}\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 30.17 ms / 43 runs ( 0.70 ms per token, 1425.35 tokens per second)\n", + "llama_print_timings: prompt eval time = 8934.27 ms / 508 tokens ( 17.59 ms per token, 56.86 tokens per second)\n", + "llama_print_timings: eval time = 4208.91 ms / 42 runs ( 100.21 ms per token, 9.98 tokens per second)\n", + "llama_print_timings: total time = 13226.36 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36;1m\u001b[1;3mObservation: Based on the context information provided, the purpose of this conversation is to discuss and share information related to technology, specifically about projects, data science, computer science, and software engineering. The conversation may also be used as an opportunity to seek help and input from others.\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 38.51 ms / 55 runs ( 0.70 ms per token, 1428.16 tokens per second)\n", + "llama_print_timings: prompt eval time = 3965.45 ms / 274 tokens ( 14.47 ms per token, 69.10 tokens per second)\n", + "llama_print_timings: eval time = 5213.82 ms / 54 runs ( 96.55 ms per token, 10.36 tokens per second)\n", + "llama_print_timings: total time = 9286.15 ms\n", + "Llama.generate: prefix-match hit\n", + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 16.80 ms / 24 runs ( 0.70 ms per token, 1428.74 tokens per second)\n", + "llama_print_timings: prompt eval time = 11186.44 ms / 617 tokens ( 18.13 ms per token, 55.16 tokens per second)\n", + "llama_print_timings: eval time = 2336.65 ms / 23 runs ( 101.59 ms per token, 9.84 tokens per second)\n", + "llama_print_timings: total time = 13570.41 ms\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Could not parse output: Thought: Ah, I see. That's helpful to know.\nAction: None (for now)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index/llama_index/agent/react/base.py:124\u001b[0m, in \u001b[0;36mReActAgent._extract_reasoning_step\u001b[0;34m(self, output)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 124\u001b[0m reasoning_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_parser\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessage_content\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index/llama_index/agent/react/output_parser.py:77\u001b[0m, in \u001b[0;36mReActOutputParser.parse\u001b[0;34m(self, output)\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAction:\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m output:\n\u001b[0;32m---> 77\u001b[0m thought, action, action_input \u001b[38;5;241m=\u001b[39m \u001b[43mextract_tool_use\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 78\u001b[0m json_str \u001b[38;5;241m=\u001b[39m extract_json_str(action_input)\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index/llama_index/agent/react/output_parser.py:22\u001b[0m, in \u001b[0;36mextract_tool_use\u001b[0;34m(input_text)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m match:\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 23\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not extract tool use from input text: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(input_text)\n\u001b[1;32m 24\u001b[0m )\n\u001b[1;32m 26\u001b[0m thought \u001b[38;5;241m=\u001b[39m match\u001b[38;5;241m.\u001b[39mgroup(\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m.\u001b[39mstrip()\n", + "\u001b[0;31mValueError\u001b[0m: Could not extract tool use from input text: Thought: Ah, I see. That's helpful to know.\nAction: None (for now)", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[45], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mchat_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhat did I ask you before?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(response)\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index/llama_index/callbacks/utils.py:38\u001b[0m, in \u001b[0;36mtrace_method..decorator..wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m callback_manager \u001b[38;5;241m=\u001b[39m cast(CallbackManager, callback_manager)\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m callback_manager\u001b[38;5;241m.\u001b[39mas_trace(trace_id):\n\u001b[0;32m---> 38\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index/llama_index/agent/react/base.py:228\u001b[0m, in \u001b[0;36mReActAgent.chat\u001b[0;34m(self, message, chat_history)\u001b[0m\n\u001b[1;32m 226\u001b[0m chat_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_llm\u001b[38;5;241m.\u001b[39mchat(input_chat)\n\u001b[1;32m 227\u001b[0m \u001b[38;5;66;03m# given react prompt outputs, call tools or return response\u001b[39;00m\n\u001b[0;32m--> 228\u001b[0m reasoning_steps, is_done \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_actions\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchat_response\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 229\u001b[0m current_reasoning\u001b[38;5;241m.\u001b[39mextend(reasoning_steps)\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_done:\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index/llama_index/agent/react/base.py:143\u001b[0m, in \u001b[0;36mReActAgent._process_actions\u001b[0;34m(self, output)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_process_actions\u001b[39m(\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28mself\u001b[39m, output: ChatResponse\n\u001b[1;32m 142\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tuple[List[BaseReasoningStep], \u001b[38;5;28mbool\u001b[39m]:\n\u001b[0;32m--> 143\u001b[0m _, current_reasoning, is_done \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_extract_reasoning_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_done:\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m current_reasoning, \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index/llama_index/agent/react/base.py:126\u001b[0m, in \u001b[0;36mReActAgent._extract_reasoning_step\u001b[0;34m(self, output)\u001b[0m\n\u001b[1;32m 124\u001b[0m reasoning_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_parser\u001b[38;5;241m.\u001b[39mparse(message_content)\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not parse output: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage_content\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_verbose:\n\u001b[1;32m 128\u001b[0m print_text(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mreasoning_step\u001b[38;5;241m.\u001b[39mget_content()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, color\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpink\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mValueError\u001b[0m: Could not parse output: Thought: Ah, I see. That's helpful to know.\nAction: None (for now)" + ] + } + ], + "source": [ + "response = chat_engine.chat(\"What did I ask you before?\")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "f44ac4c6", + "metadata": {}, + "source": [ + "## React engine and asking it to use query\n", + "\n", + "We saw that it didn't use the query engine in the above, but maybe we could force it to use it..." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "f6374408", + "metadata": {}, + "outputs": [], + "source": [ + "chat_engine = index.as_chat_engine(chat_mode=\"react\",\n", + " response_mode=response_mode,\n", + " similarity_top_k=similarity_top_k,\n", + " verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "006f178e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", + "Action: query_engine_tool\n", + "Action Input: {'input': 'What should a new starter in the research engineering group do?'}\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 43.88 ms / 60 runs ( 0.73 ms per token, 1367.43 tokens per second)\n", + "llama_print_timings: prompt eval time = 866.61 ms / 23 tokens ( 37.68 ms per token, 26.54 tokens per second)\n", + "llama_print_timings: eval time = 5842.61 ms / 59 runs ( 99.03 ms per token, 10.10 tokens per second)\n", + "llama_print_timings: total time = 6827.89 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36;1m\u001b[1;3mObservation: Based on the provided context information, here are some suggestions for what a new starter in the Research Engineering Group (REG) should do:\n", + "\n", + "1. Familiarize yourself with the New Starter page to get an overview of the team's structure, roles, and key contacts.\n", + "2. Meet your buddies, who will provide informal friendly faces for advice, guidance, and encouragement on any aspect of working within REG and ARC. Your buddies should not be assigned to the projects you will be working on, and ideally, they should be at a similarly senior level to you.\n", + "3. Shadow projects for a short while to get an idea of how the team works.\n", + "4. Participate in \"Hacktoberfest\"-style issues to quickly get up to speed with the team's projects and get involved if there are any gaps in allocations.\n", + "5. Attend welcome coffee sessions to meet the team and get familiar with the group's culture and processes.\n", + "6. Check in with your buddies at least once in the first couple of weeks, and again a few weeks after, to discuss any pain points or concerns you may have.\n", + "7. Be open, honest, and respect confidentiality, and feel free to reach out to your buddies or other team members for technical pointers or questions.\n", + "\n", + "Remember that the buddy system is in place to help you get settled into your new role and provide support as needed. Don't hesitate to reach out to your buddies or other team members if you have any questions or need assistance.\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 239.88 ms / 342 runs ( 0.70 ms per token, 1425.74 tokens per second)\n", + "llama_print_timings: prompt eval time = 31629.28 ms / 1354 tokens ( 23.36 ms per token, 42.81 tokens per second)\n", + "llama_print_timings: eval time = 39397.15 ms / 341 runs ( 115.53 ms per token, 8.66 tokens per second)\n", + "llama_print_timings: total time = 71716.37 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: Based on the provided context information, here are some suggestions for what a new starter in the Research Engineering Group (REG) should do:\n", + "\u001b[0mBased on the provided context information, here are some suggestions for what a new starter in the Research Engineering Group (REG) should do:\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 251.04 ms / 358 runs ( 0.70 ms per token, 1426.08 tokens per second)\n", + "llama_print_timings: prompt eval time = 16593.54 ms / 849 tokens ( 19.54 ms per token, 51.16 tokens per second)\n", + "llama_print_timings: eval time = 38505.53 ms / 357 runs ( 107.86 ms per token, 9.27 tokens per second)\n", + "llama_print_timings: total time = 55835.46 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"Please use the query engine. What should a new starter in the research engineering group do?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "ff81fbc8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", + "Action: query_engine_tool\n", + "Action Input: {'input': 'What should a new starter in the REG team at the Turing Institute do?'}\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 52.01 ms / 74 runs ( 0.70 ms per token, 1422.80 tokens per second)\n", + "llama_print_timings: prompt eval time = 1112.21 ms / 61 tokens ( 18.23 ms per token, 54.85 tokens per second)\n", + "llama_print_timings: eval time = 7314.27 ms / 73 runs ( 100.20 ms per token, 9.98 tokens per second)\n", + "llama_print_timings: total time = 8572.35 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36;1m\u001b[1;3mObservation: As a new starter in the REG team at the Turing Institute, you should:\n", + "\n", + "1. Expect to be assigned two buddies who will be friendly points of contact for you. Your buddies will welcome you on your first day and introduce you to the rest of the team.\n", + "2. Attend a welcome coffee on your first day to meet the whole REG team.\n", + "3. Have a 1-on-1 meeting with the REG Director within the first few weeks of starting.\n", + "4. Use the time before being assigned to a project to do admin tasks, set up your laptop and tools, get to know people, read the handbook and internal wiki, and shadow meetings.\n", + "5. Sign up for the buddy system to be matched with two REG buddies who can offer informal technical help and social support.\n", + "6. Review the getting started checklist and first few days pages for more information on what to expect and how to prepare.\n", + "7. Familiarize yourself with the REG wiki, which contains a repository of knowledge helpful to the Hut 23 team, including howtos and instructions for new joiners.\n", + "8. Review the salary bands for all REG roles, annual pay increases, and probation review information.\n", + "9. Understand the project process, service areas, and remote working policies.\n", + "10. Familiarize yourself with the equipment and regular events at the Turing Institute.\n", + "11. Take care of your wellbeing and EDI, and participate in team outputs and knowledge sharing.\n", + "\n", + "Remember to update the buddy sign-up sheet if you have any preferences for being a buddy or if you need to change your buddy assignment.\n", + "\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 260.69 ms / 372 runs ( 0.70 ms per token, 1426.95 tokens per second)\n", + "llama_print_timings: prompt eval time = 39191.95 ms / 1566 tokens ( 25.03 ms per token, 39.96 tokens per second)\n", + "llama_print_timings: eval time = 44213.05 ms / 371 runs ( 119.17 ms per token, 8.39 tokens per second)\n", + "llama_print_timings: total time = 84170.13 ms\n", + "Llama.generate: prefix-match hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;200m\u001b[1;3mResponse: Based on the provided information, a new starter in the REG team at the Turing Institute should take the following actions to prepare for their role and integrate into the team: attend a welcome coffee, schedule a 1-on-1 meeting with the REG Director, complete admin tasks, sign up for the buddy system, review the getting started checklist and first few days pages, familiarize themselves with the REG wiki, and take care of their wellbeing and EDI.\n", + "\u001b[0mBased on the provided information, a new starter in the REG team at the Turing Institute should take the following actions to prepare for their role and integrate into the team: attend a welcome coffee, schedule a 1-on-1 meeting with the REG Director, complete admin tasks, sign up for the buddy system, review the getting started checklist and first few days pages, familiarize themselves with the REG wiki, and take care of their wellbeing and EDI.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "llama_print_timings: load time = 9387.84 ms\n", + "llama_print_timings: sample time = 305.88 ms / 435 runs ( 0.70 ms per token, 1422.12 tokens per second)\n", + "llama_print_timings: prompt eval time = 19322.01 ms / 945 tokens ( 20.45 ms per token, 48.91 tokens per second)\n", + "llama_print_timings: eval time = 47720.26 ms / 434 runs ( 109.95 ms per token, 9.09 tokens per second)\n", + "llama_print_timings: total time = 67951.26 ms\n" + ] + } + ], + "source": [ + "response = chat_engine.chat(\n", + " \"I want to specifically know about a new starter in the REG team at the Turing institute\"\n", + ")\n", "print(response)" ] } From 17d7b1345d000caf3e6fea15784dd6b541abe5b5 Mon Sep 17 00:00:00 2001 From: rchan Date: Fri, 8 Sep 2023 18:17:33 +0100 Subject: [PATCH 2/4] remove untitled notebooks --- models/llama-index-hack/Untitled.ipynb | 2682 ----------------------- models/llama-index-hack/Untitled1.ipynb | 33 - 2 files changed, 2715 deletions(-) delete mode 100644 models/llama-index-hack/Untitled.ipynb delete mode 100644 models/llama-index-hack/Untitled1.ipynb diff --git a/models/llama-index-hack/Untitled.ipynb b/models/llama-index-hack/Untitled.ipynb deleted file mode 100644 index 58f1a0ea..00000000 --- a/models/llama-index-hack/Untitled.ipynb +++ /dev/null @@ -1,2682 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "4efa0972", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", - "\n", - "from llama_cpp import Llama\n", - "\n", - "from llama_index import (\n", - " SimpleDirectoryReader,\n", - " LangchainEmbedding,\n", - " VectorStoreIndex,\n", - " PromptHelper,\n", - " LLMPredictor,\n", - " ServiceContext,\n", - " Document\n", - ")\n", - "from llama_index.llms import LlamaCPP\n", - "from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "39695618", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'0.8.21'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import llama_index\n", - "llama_index.__version__" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "2138e968", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/Users/rchan/Library/CloudStorage/OneDrive-TheAlanTuringInstitute/llama_index_proper/llama_index/__init__.py'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "llama_index.__file__" - ] - }, - { - "cell_type": "markdown", - "id": "d8b085fc", - "metadata": {}, - "source": [ - "Note: notebook assumes that in the reginald directory, there is a `gguf_models/` folder. Here we've downloaded the quantized 4-bit version of Llama2-13b-chat from [`TheBloke/Llama-2-13B-chat-GGML`](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML). \n", - "\n", - "Note that we're currently running a version of `llama-cpp-python` which no longer supports `ggmmlv3` model formats and has changed to `gguf`. We need to convert the above to `gguf` format using the `convert-llama-ggmlv3-to-gguf.py` script in [`llama.cpp`](https://github.com/ggerganov/llama.cpp).\n", - "\n", - "## Quick example with llama-cpp-python" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3044b6b9", - "metadata": {}, - "outputs": [], - "source": [ - "llama_2_13b_chat_path = \"../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin\"" - ] - }, - { - "cell_type": "markdown", - "id": "cc1ad130", - "metadata": {}, - "source": [ - "## Using metal acceleration" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "21bee96c", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", - "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - kv 0: general.architecture str \n", - "llama_model_loader: - kv 1: general.name str \n", - "llama_model_loader: - kv 2: general.description str \n", - "llama_model_loader: - kv 3: llama.context_length u32 \n", - "llama_model_loader: - kv 4: llama.embedding_length u32 \n", - "llama_model_loader: - kv 5: llama.block_count u32 \n", - "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", - "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", - "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", - "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", - "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", - "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", - "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", - "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", - "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", - "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", - "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", - "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", - "llama_model_loader: - type f32: 81 tensors\n", - "llama_model_loader: - type q4_K: 281 tensors\n", - "llama_model_loader: - type q6_K: 1 tensors\n", - "llm_load_print_meta: format = GGUF V2 (latest)\n", - "llm_load_print_meta: arch = llama\n", - "llm_load_print_meta: vocab type = SPM\n", - "llm_load_print_meta: n_vocab = 32000\n", - "llm_load_print_meta: n_merges = 0\n", - "llm_load_print_meta: n_ctx_train = 2048\n", - "llm_load_print_meta: n_ctx = 512\n", - "llm_load_print_meta: n_embd = 5120\n", - "llm_load_print_meta: n_head = 40\n", - "llm_load_print_meta: n_head_kv = 40\n", - "llm_load_print_meta: n_layer = 40\n", - "llm_load_print_meta: n_rot = 128\n", - "llm_load_print_meta: n_gqa = 1\n", - "llm_load_print_meta: f_norm_eps = 1.0e-05\n", - "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", - "llm_load_print_meta: n_ff = 13824\n", - "llm_load_print_meta: freq_base = 10000.0\n", - "llm_load_print_meta: freq_scale = 1\n", - "llm_load_print_meta: model type = 13B\n", - "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", - "llm_load_print_meta: model size = 13.02 B\n", - "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", - "llm_load_print_meta: BOS token = 1 ''\n", - "llm_load_print_meta: EOS token = 2 ''\n", - "llm_load_print_meta: UNK token = 0 ''\n", - "llm_load_print_meta: LF token = 13 '<0x0A>'\n", - "llm_load_tensors: ggml ctx size = 0.12 MB\n", - "llm_load_tensors: mem required = 7024.01 MB (+ 400.00 MB per state)\n", - "...................................................................................................\n", - "llama_new_context_with_model: kv self size = 400.00 MB\n", - "ggml_metal_init: allocating\n", - "ggml_metal_init: loading '/Users/rchan/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n", - "ggml_metal_init: loaded kernel_add 0x11cba37b0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_add_row 0x11cba3a10 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul 0x11cba3c70 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_row 0x11cba3ed0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_scale 0x11cba4130 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_silu 0x11cba4390 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_relu 0x11cba45f0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_gelu 0x11cba4850 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_soft_max 0x11cba4ab0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_diag_mask_inf 0x11cba4d10 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_f16 0x11cba4f70 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_0 0x11cba51d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_1 0x11cba5430 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q8_0 0x11cba5690 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q2_K 0x11cba58f0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q3_K 0x11cba5b50 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_K 0x11cba5db0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q5_K 0x11cba6010 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q6_K 0x11cba6270 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rms_norm 0x11cba64d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_norm 0x11cba6730 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x11cba6d20 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x11cba71c0 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x11cba7780 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x11cba7d40 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x11cba8300 | th_max = 640 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x11cba88c0 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x11cba9080 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x11cba98a0 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x11cba9e60 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x11cbaa460 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x11cbaaa60 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x11cbab060 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x11cbab660 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x11cbabc60 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x11cbac260 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x11cbac860 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x11cbace60 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x11cbad460 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rope 0x11cbad7e0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_alibi_f32 0x11cbadf00 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f16 0x11cbae5f0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f32 0x11cbaece0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f16_f16 0x11cbaf3d0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n", - "ggml_metal_init: hasUnifiedMemory = true\n", - "ggml_metal_init: maxTransferRate = built-in GPU\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "llama_new_context_with_model: compute buffer total size = 91.47 MB\n", - "llama_new_context_with_model: max tensor size = 128.17 MB\n", - "ggml_metal_add_buffer: allocated 'data ' buffer, size = 7024.61 MB, ( 7025.05 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, ( 7026.53 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, ( 7428.53 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, ( 7518.55 / 21845.34)\n", - "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n" - ] - } - ], - "source": [ - "llm = Llama(model_path=llama_2_13b_chat_path, n_gpu_layers=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "885156d8", - "metadata": {}, - "outputs": [], - "source": [ - "prompt_example = \"Name all the planets in the solar system and state their distances to the sun\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "4bee457a", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 610.65 ms\n", - "llama_print_timings: sample time = 196.05 ms / 268 runs ( 0.73 ms per token, 1367.01 tokens per second)\n", - "llama_print_timings: prompt eval time = 610.63 ms / 17 tokens ( 35.92 ms per token, 27.84 tokens per second)\n", - "llama_print_timings: eval time = 14795.14 ms / 267 runs ( 55.41 ms per token, 18.05 tokens per second)\n", - "llama_print_timings: total time = 15977.86 ms\n" - ] - } - ], - "source": [ - "output = llm(prompt_example,\n", - " max_tokens=512,\n", - " echo=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "acef5902", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id': 'cmpl-0026dd42-1853-4a6c-8e46-5a6e71740986', 'object': 'text_completion', 'created': 1694121339, 'model': '../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin', 'choices': [{'text': \"Name all the planets in the solar system and state their distances to the sun.\\n\\nThere are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of each planet along with its distance from the Sun (in astronomical units or AU):\\n\\n1. Mercury - 0.4 AU (very close to the Sun)\\n2. Venus - 1.0 AU (just inside Earth's orbit)\\n3. Earth - 1.0 AU (the distance from the Earth to the Sun is called an astronomical unit, or AU)\\n4. Mars - 1.6 AU (about 1.5 times the distance from the Earth to the Sun)\\n5. Jupiter - 5.2 AU (about 5 times the distance from the Earth to the Sun)\\n6. Saturn - 9.5 AU (almost twice the distance from the Earth to the Sun)\\n7. Uranus - 19.0 AU (about 4 times the distance from the Earth to the Sun)\\n8. Neptune - 30.1 AU (more than 3 times the distance from the Earth to the Sun)\", 'index': 0, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 17, 'completion_tokens': 267, 'total_tokens': 284}}\n" - ] - } - ], - "source": [ - "print(output)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "6f1d16ea", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Name all the planets in the solar system and state their distances to the sun.\n", - "\n", - "There are eight planets in the solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Here is a list of each planet along with its distance from the Sun (in astronomical units or AU):\n", - "\n", - "1. Mercury - 0.4 AU (very close to the Sun)\n", - "2. Venus - 1.0 AU (just inside Earth's orbit)\n", - "3. Earth - 1.0 AU (the distance from the Earth to the Sun is called an astronomical unit, or AU)\n", - "4. Mars - 1.6 AU (about 1.5 times the distance from the Earth to the Sun)\n", - "5. Jupiter - 5.2 AU (about 5 times the distance from the Earth to the Sun)\n", - "6. Saturn - 9.5 AU (almost twice the distance from the Earth to the Sun)\n", - "7. Uranus - 19.0 AU (about 4 times the distance from the Earth to the Sun)\n", - "8. Neptune - 30.1 AU (more than 3 times the distance from the Earth to the Sun)\n" - ] - } - ], - "source": [ - "print(output[\"choices\"][0][\"text\"])" - ] - }, - { - "cell_type": "markdown", - "id": "865df6bf", - "metadata": {}, - "source": [ - "## Using CPU" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "db096045", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", - "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensorAVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n", - " 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - kv 0: general.architecture str \n", - "llama_model_loader: - kv 1: general.name str \n", - "llama_model_loader: - kv 2: general.description str \n", - "llama_model_loader: - kv 3: llama.context_length u32 \n", - "llama_model_loader: - kv 4: llama.embedding_length u32 \n", - "llama_model_loader: - kv 5: llama.block_count u32 \n", - "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", - "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", - "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", - "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", - "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", - "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", - "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", - "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", - "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", - "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", - "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", - "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", - "llama_model_loader: - type f32: 81 tensors\n", - "llama_model_loader: - type q4_K: 281 tensors\n", - "llama_model_loader: - type q6_K: 1 tensors\n", - "llm_load_print_meta: format = GGUF V2 (latest)\n", - "llm_load_print_meta: arch = llama\n", - "llm_load_print_meta: vocab type = SPM\n", - "llm_load_print_meta: n_vocab = 32000\n", - "llm_load_print_meta: n_merges = 0\n", - "llm_load_print_meta: n_ctx_train = 2048\n", - "llm_load_print_meta: n_ctx = 512\n", - "llm_load_print_meta: n_embd = 5120\n", - "llm_load_print_meta: n_head = 40\n", - "llm_load_print_meta: n_head_kv = 40\n", - "llm_load_print_meta: n_layer = 40\n", - "llm_load_print_meta: n_rot = 128\n", - "llm_load_print_meta: n_gqa = 1\n", - "llm_load_print_meta: f_norm_eps = 1.0e-05\n", - "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", - "llm_load_print_meta: n_ff = 13824\n", - "llm_load_print_meta: freq_base = 10000.0\n", - "llm_load_print_meta: freq_scale = 1\n", - "llm_load_print_meta: model type = 13B\n", - "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", - "llm_load_print_meta: model size = 13.02 B\n", - "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", - "llm_load_print_meta: BOS token = 1 ''\n", - "llm_load_print_meta: EOS token = 2 ''\n", - "llm_load_print_meta: UNK token = 0 ''\n", - "llm_load_print_meta: LF token = 13 '<0x0A>'\n", - "llm_load_tensors: ggml ctx size = 0.12 MB\n", - "llm_load_tensors: mem required = 7024.01 MB (+ 400.00 MB per state)\n", - "...................................................................................................\n", - "llama_new_context_with_model: kv self size = 400.00 MB\n", - "llama_new_context_with_model: compute buffer total size = 75.47 MB\n", - "ggml_metal_free: deallocating\n" - ] - } - ], - "source": [ - "llm = Llama(model_path=llama_2_13b_chat_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "291a4c26", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 1480.25 ms\n", - "llama_print_timings: sample time = 162.22 ms / 212 runs ( 0.77 ms per token, 1306.87 tokens per second)\n", - "llama_print_timings: prompt eval time = 1480.21 ms / 17 tokens ( 87.07 ms per token, 11.48 tokens per second)\n", - "llama_print_timings: eval time = 20115.90 ms / 211 runs ( 95.34 ms per token, 10.49 tokens per second)\n", - "llama_print_timings: total time = 22063.41 ms\n" - ] - } - ], - "source": [ - "output = llm(prompt_example,\n", - " max_tokens=512,\n", - " echo=True)" - ] - }, - { - "cell_type": "markdown", - "id": "8cdce188", - "metadata": {}, - "source": [ - "By inspection, we can see that the metal acceleration is faster as expected." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "d7b74226", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Name all the planets in the solar system and state their distances to the sun.\n", - "1. Mercury - 58 million kilometers (36 million miles)\n", - "2. Venus - 108 million kilometers (67 million miles)\n", - "3. Earth - 149.6 million kilometers (92.96 million miles)\n", - "4. Mars - 225 million kilometers (140 million miles)\n", - "5. Jupiter - 778.3 million kilometers (483.8 million miles)\n", - "6. Saturn - 1.4 billion kilometers (870 million miles)\n", - "7. Uranus - 2.9 billion kilometers (1.8 billion miles)\n", - "8. Neptune - 4.5 billion kilometers (2.8 billion miles)\n", - "\n", - "Note that the distance of each planet from the Sun is measured in terms of their average distance, as the orbits of the planets are not perfectly circular and the distances vary slightly over the course of a year.\n" - ] - } - ], - "source": [ - "print(output[\"choices\"][0][\"text\"])" - ] - }, - { - "cell_type": "markdown", - "id": "b54b606c", - "metadata": {}, - "source": [ - "## Using Llama2 in `llama-index`" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "1c8f0f7d", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "llama_model_loader: loaded meta data with 18 key-value pairs and 363 tensors from ../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin (version GGUF V2 (latest))\n", - "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 1: output_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 2: output.weight q6_K [ 5120, 32000, 1, 1 ]\n", - "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 5: blk.0.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 7: blk.0.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 8: blk.0.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 9: blk.0.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 10: blk.0.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 14: blk.1.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 16: blk.1.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 17: blk.1.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 18: blk.1.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 19: blk.1.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 23: blk.2.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 25: blk.2.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 26: blk.2.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 27: blk.2.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 28: blk.2.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 32: blk.3.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 34: blk.3.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 35: blk.3.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 36: blk.3.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 37: blk.3.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 43: blk.4.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 44: blk.4.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 45: blk.4.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 46: blk.4.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 52: blk.5.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 53: blk.5.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 54: blk.5.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 55: blk.5.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 59: blk.6.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 61: blk.6.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 62: blk.6.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 63: blk.6.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 64: blk.6.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 70: blk.7.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 71: blk.7.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 72: blk.7.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 73: blk.7.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 79: blk.8.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 80: blk.8.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 81: blk.8.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 82: blk.8.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 86: blk.9.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 88: blk.9.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 89: blk.9.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 90: blk.9.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 91: blk.9.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 97: blk.10.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 98: blk.10.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 99: blk.10.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 100: blk.10.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 106: blk.11.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 107: blk.11.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 108: blk.11.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 109: blk.11.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 113: blk.12.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 115: blk.12.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 116: blk.12.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 117: blk.12.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 118: blk.12.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 124: blk.13.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 125: blk.13.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 126: blk.13.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 127: blk.13.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 133: blk.14.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 134: blk.14.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 135: blk.14.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 136: blk.14.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 140: blk.15.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 142: blk.15.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 143: blk.15.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 144: blk.15.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 145: blk.15.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 151: blk.16.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 152: blk.16.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 153: blk.16.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 154: blk.16.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 160: blk.17.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 161: blk.17.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 162: blk.17.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 163: blk.17.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 167: blk.18.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 169: blk.18.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 170: blk.18.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 171: blk.18.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 172: blk.18.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 178: blk.19.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 179: blk.19.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 180: blk.19.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 181: blk.19.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 187: blk.20.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 188: blk.20.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 189: blk.20.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 190: blk.20.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 194: blk.21.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 196: blk.21.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 197: blk.21.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 198: blk.21.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 199: blk.21.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 205: blk.22.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 206: blk.22.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 207: blk.22.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 208: blk.22.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 214: blk.23.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 215: blk.23.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 216: blk.23.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 217: blk.23.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 221: blk.24.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 223: blk.24.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 224: blk.24.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 225: blk.24.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 226: blk.24.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 232: blk.25.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 233: blk.25.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 234: blk.25.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 235: blk.25.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 241: blk.26.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 242: blk.26.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 243: blk.26.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 244: blk.26.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 248: blk.27.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 250: blk.27.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 251: blk.27.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 252: blk.27.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 253: blk.27.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 257: blk.28.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 259: blk.28.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 260: blk.28.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 261: blk.28.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 262: blk.28.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 266: blk.29.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 268: blk.29.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 269: blk.29.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 270: blk.29.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 271: blk.29.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 275: blk.30.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 277: blk.30.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 278: blk.30.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 279: blk.30.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 280: blk.30.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 284: blk.31.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 286: blk.31.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 287: blk.31.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 288: blk.31.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 289: blk.31.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 291: blk.32.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 292: blk.32.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 293: blk.32.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 294: blk.32.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 295: blk.32.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 296: blk.32.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 297: blk.32.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 298: blk.32.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 299: blk.32.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 300: blk.33.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 301: blk.33.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 302: blk.33.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 303: blk.33.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 304: blk.33.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 305: blk.33.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 306: blk.33.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 307: blk.33.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 308: blk.33.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 309: blk.34.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 310: blk.34.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 311: blk.34.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 312: blk.34.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 313: blk.34.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 314: blk.34.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 315: blk.34.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 316: blk.34.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 317: blk.34.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 318: blk.35.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 319: blk.35.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 320: blk.35.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 321: blk.35.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 322: blk.35.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 323: blk.35.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 324: blk.35.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 325: blk.35.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 326: blk.35.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 327: blk.36.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 328: blk.36.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 329: blk.36.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 330: blk.36.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 331: blk.36.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 332: blk.36.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 333: blk.36.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 334: blk.36.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 335: blk.36.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 336: blk.37.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 337: blk.37.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 338: blk.37.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 339: blk.37.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 340: blk.37.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 341: blk.37.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 342: blk.37.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 343: blk.37.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 344: blk.37.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 345: blk.38.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 346: blk.38.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 347: blk.38.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 348: blk.38.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 349: blk.38.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 350: blk.38.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 351: blk.38.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 352: blk.38.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 353: blk.38.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 354: blk.39.attn_q.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 355: blk.39.attn_k.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 356: blk.39.attn_v.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 357: blk.39.attn_output.weight q4_K [ 5120, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 358: blk.39.attn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - tensor 359: blk.39.ffn_gate.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 360: blk.39.ffn_down.weight q4_K [ 13824, 5120, 1, 1 ]\n", - "llama_model_loader: - tensor 361: blk.39.ffn_up.weight q4_K [ 5120, 13824, 1, 1 ]\n", - "llama_model_loader: - tensor 362: blk.39.ffn_norm.weight f32 [ 5120, 1, 1, 1 ]\n", - "llama_model_loader: - kv 0: general.architecture str \n", - "llama_model_loader: - kv 1: general.name str \n", - "llama_model_loader: - kv 2: general.description str \n", - "llama_model_loader: - kv 3: llama.context_length u32 \n", - "llama_model_loader: - kv 4: llama.embedding_length u32 \n", - "llama_model_loader: - kv 5: llama.block_count u32 \n", - "llama_model_loader: - kv 6: llama.feed_forward_length u32 \n", - "llama_model_loader: - kv 7: llama.rope.dimension_count u32 \n", - "llama_model_loader: - kv 8: llama.attention.head_count u32 \n", - "llama_model_loader: - kv 9: llama.attention.head_count_kv u32 \n", - "llama_model_loader: - kv 10: llama.attention.layer_norm_rms_epsilon f32 \n", - "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", - "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", - "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", - "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", - "llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 \n", - "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 \n", - "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 \n", - "llama_model_loader: - type f32: 81 tensors\n", - "llama_model_loader: - type q4_K: 281 tensors\n", - "llama_model_loader: - type q6_K: 1 tensors\n", - "llm_load_print_meta: format = GGUF V2 (latest)\n", - "llm_load_print_meta: arch = llama\n", - "llm_load_print_meta: vocab type = SPM\n", - "llm_load_print_meta: n_vocab = 32000\n", - "llm_load_print_meta: n_merges = 0\n", - "llm_load_print_meta: n_ctx_train = 2048\n", - "llm_load_print_meta: n_ctx = 3900\n", - "llm_load_print_meta: n_embd = 5120\n", - "llm_load_print_meta: n_head = 40\n", - "llm_load_print_meta: n_head_kv = 40\n", - "llm_load_print_meta: n_layer = 40\n", - "llm_load_print_meta: n_rot = 128\n", - "llm_load_print_meta: n_gqa = 1\n", - "llm_load_print_meta: f_norm_eps = 1.0e-05\n", - "llm_load_print_meta: f_norm_rms_eps = 5.0e-06\n", - "llm_load_print_meta: n_ff = 13824\n", - "llm_load_print_meta: freq_base = 10000.0\n", - "llm_load_print_meta: freq_scale = 1\n", - "llm_load_print_meta: model type = 13B\n", - "llm_load_print_meta: model ftype = mostly Q4_K - Medium (guessed)\n", - "llm_load_print_meta: model size = 13.02 B\n", - "llm_load_print_meta: general.name = llama-2-13b-chat.ggmlv3.q4_K_S.bin\n", - "llm_load_print_meta: BOS token = 1 ''\n", - "llm_load_print_meta: EOS token = 2 ''\n", - "llm_load_print_meta: UNK token = 0 ''\n", - "llm_load_print_meta: LF token = 13 '<0x0A>'\n", - "llm_load_tensors: ggml ctx size = 0.12 MB\n", - "llm_load_tensors: mem required = 7024.01 MB (+ 3046.88 MB per state)\n", - "...................................................................................................\n", - "llama_new_context_with_model: kv self size = 3046.88 MB\n", - "ggml_metal_init: allocating\n", - "ggml_metal_init: loading '/Users/rchan/opt/miniconda3/envs/reginald/lib/python3.11/site-packages/llama_cpp/ggml-metal.metal'\n", - "ggml_metal_init: loaded kernel_add 0x14ca7e860 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_add_row 0x14ca7eac0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul 0x14ca7ed20 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_row 0x14ca7ef80 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_scale 0x14ca7dc60 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_silu 0x14ca7dec0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_relu 0x14ca7fb80 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_gelu 0x14ca7fde0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_soft_max 0x14ca80040 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_diag_mask_inf 0x14ca802a0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_f16 0x14ca80500 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_0 0x14ca80760 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_1 0x14ca809c0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q8_0 0x14ca80c20 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q2_K 0x14ca80e80 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q3_K 0x14ca810e0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q4_K 0x14ca814e0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q5_K 0x14ca81740 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_get_rows_q6_K 0x14ca819a0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rms_norm 0x14ca81c00 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_norm 0x14ca81e60 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x14ca82450 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x14ca828f0 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x14ca82d90 | th_max = 896 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q8_0_f32 0x14ca83230 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x14ca836d0 | th_max = 640 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x14ca83b70 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x14ca84010 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x14ca844b0 | th_max = 576 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x14ca84950 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x14ca84e30 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x14ca85310 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32 0x14ca857f0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x14ca85cd0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x14ca861b0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x14ca86690 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x1488391a0 | th_max = 768 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x148839d60 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x14883a240 | th_max = 704 | th_width = 32\n", - "ggml_metal_init: loaded kernel_rope 0x14883a4a0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_alibi_f32 0x14883aaa0 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f16 0x14883b190 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f32_f32 0x14883b880 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: loaded kernel_cpy_f16_f16 0x14883bf70 | th_max = 1024 | th_width = 32\n", - "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n", - "ggml_metal_init: hasUnifiedMemory = true\n", - "ggml_metal_init: maxTransferRate = built-in GPU\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "llama_new_context_with_model: compute buffer total size = 356.16 MB\n", - "llama_new_context_with_model: max tensor size = 128.17 MB\n", - "ggml_metal_add_buffer: allocated 'data ' buffer, size = 7024.61 MB, ( 7025.11 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.48 MB, ( 7026.59 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'kv ' buffer, size = 3048.88 MB, (10075.47 / 21845.34)\n", - "ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 354.70 MBAVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | \n", - ", (10430.17 / 21845.34)\n" - ] - } - ], - "source": [ - "llm = LlamaCPP(\n", - " model_path=\"../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin\",\n", - " temperature=0.1,\n", - " max_new_tokens=1024,\n", - " # llama2 has a context window of 4096 tokens,\n", - " # but we set it lower to allow for some wiggle room\n", - " context_window=3900,\n", - " # kwargs to pass to __call__()\n", - " generate_kwargs={},\n", - " # kwargs to pass to __init__()\n", - " # set to at least 1 to use GPU\n", - " model_kwargs={\"n_gpu_layers\": 1},\n", - " # transform inputs into Llama2 format\n", - " messages_to_prompt=messages_to_prompt,\n", - " completion_to_prompt=completion_to_prompt,\n", - " verbose=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "ad388e17", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LLMMetadata(context_window=3900, num_output=1024, is_chat_model=False, is_function_calling_model=False, model_name='../../gguf_models/llama-2-13b-chat.gguf.q4_K_S.bin')" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "llm.metadata" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "107331f3", - "metadata": {}, - "outputs": [], - "source": [ - "handbook = pd.read_csv(\"../../data/public/handbook-scraped.csv\")\n", - "wiki = pd.read_csv(\"../../data/turing_internal/wiki-scraped.csv\")\n", - "# turing = pd.read_csv(\"../../data/public/turingacuk-no-boilerplate.csv\")\n", - "\n", - "text_list = list(handbook[\"body\"].astype(\"str\")) + list(wiki[\"body\"].astype(\"str\"))\n", - "documents = [Document(text=t) for t in text_list]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6f0727c3", - "metadata": {}, - "outputs": [], - "source": [ - "hfemb = HuggingFaceEmbeddings()\n", - "embed_model = LangchainEmbedding(hfemb)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "ff676438", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "HuggingFaceEmbeddings(client=SentenceTransformer(\n", - " (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel \n", - " (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n", - " (2): Normalize()\n", - "), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hfemb" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "00032b04", - "metadata": {}, - "outputs": [], - "source": [ - "# set number of output tokens\n", - "num_output = 1024\n", - "# set maximum input size\n", - "context_window = 4096\n", - "# set maximum chunk overlap\n", - "chunk_size_limit = 512\n", - "chunk_overlap_ratio = 0\n", - "\n", - "prompt_helper = PromptHelper(\n", - " context_window=context_window,\n", - " num_output=num_output,\n", - " chunk_size_limit=chunk_size_limit,\n", - " chunk_overlap_ratio=chunk_overlap_ratio,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "a4f3d57e", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - " service_context = ServiceContext.from_defaults(\n", - " llm_predictor=LLMPredictor(llm=llm),\n", - " embed_model=embed_model,\n", - " prompt_helper=prompt_helper,\n", - " chunk_size=chunk_size_limit,\n", - ")\n", - "\n", - "index = VectorStoreIndex.from_documents(\n", - " documents, service_context=service_context\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "7ddbe81c", - "metadata": {}, - "outputs": [], - "source": [ - "response_mode = \"simple_summarize\"" - ] - }, - { - "cell_type": "markdown", - "id": "d12e01b1", - "metadata": {}, - "source": [ - "## \"React\" chat engine" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "8a8b7edc", - "metadata": {}, - "outputs": [], - "source": [ - "chat_engine = index.as_chat_engine(chat_mode=\"react\",\n", - " verbose=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "1695904a", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", - "Action: query_engine_tool\n", - "Action Input: {'input': 'hello world'}\n", - "\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 42.94 ms / 59 runs ( 0.73 ms per token, 1374.17 tokens per second)\n", - "llama_print_timings: prompt eval time = 7509.19 ms / 447 tokens ( 16.80 ms per token, 59.53 tokens per second)\n", - "llama_print_timings: eval time = 3475.79 ms / 58 runs ( 59.93 ms per token, 16.69 tokens per second)\n", - "llama_print_timings: total time = 11105.13 ms\n", - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36;1m\u001b[1;3mObservation: Hello! As a helpful, respectful, and honest assistant, I'm here to assist you with any questions or requests you may have. Based on the context information provided, it seems like you are looking for information about the Turing Institute and its various communications channels and events.\n", - "\n", - "To start, I can provide you with some general information about the Turing Institute and its activities. The Turing Institute is a research centre based in the UK that focuses on the development of algorithms and computational methods for solving complex problems in various fields, such as computer science, mathematics, and biology. The institute has a strong emphasis on interdisciplinary research and collaboration, and it hosts a variety of events and workshops to facilitate these interactions.\n", - "\n", - "In terms of communications channels, the Turing Institute uses a variety of platforms to keep its members and collaborators informed about its activities and research progress. These include email lists, Slack channels, and a website with information about ongoing projects, events, and research updates.\n", - "\n", - "Regarding events, the Turing Institute hosts a variety of activities throughout the year, including tech talks, workshops, and conferences. These events cover a range of topics related to the institute's research areas, and they are often open to members and non-members alike. You can find information about upcoming events on the Turing Institute's website or by checking the shared REG calendar.\n", - "\n", - "If you have any specific questions or requests, please feel free to ask, and I will do my best to assist you based on the information available to me.\n", - "\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 244.08 ms / 339 runs ( 0.72 ms per token, 1388.92 tokens per second)\n", - "llama_print_timings: prompt eval time = 8954.56 ms / 537 tokens ( 16.68 ms per token, 59.97 tokens per second)\n", - "llama_print_timings: eval time = 21652.52 ms / 338 runs ( 64.06 ms per token, 15.61 tokens per second)\n", - "llama_print_timings: total time = 31331.83 ms\n", - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[38;5;200m\u001b[1;3mThought: I need to use a tool to help me answer the question.\n", - "Action: query_engine_tool\n", - "Action Input: {'input': 'what are the research areas of the Turing Institute?'}\n", - "\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 32.78 ms / 46 runs ( 0.71 ms per token, 1403.21 tokens per second)\n", - "llama_print_timings: prompt eval time = 15197.21 ms / 832 tokens ( 18.27 ms per token, 54.75 tokens per second)\n", - "llama_print_timings: eval time = 2972.41 ms / 45 runs ( 66.05 ms per token, 15.14 tokens per second)\n", - "llama_print_timings: total time = 18262.37 ms\n", - "Llama.generate: prefix-match hit\n", - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 216.46 ms / 309 runs ( 0.70 ms per token, 1427.55 tokens per second)\n", - "llama_print_timings: prompt eval time = 11918.74 ms / 689 tokens ( 17.30 ms per token, 57.81 tokens per second)\n", - "llama_print_timings: eval time = 20660.28 ms / 308 runs ( 67.08 ms per token, 14.91 tokens per second)\n", - "llama_print_timings: total time = 33190.48 ms\n", - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36;1m\u001b[1;3mObservation: Based on the updated context, the research areas of the Turing Institute are focused on impact-driven research in data science and AI, with a particular emphasis on diversity and openness. The institute prioritizes projects that have realistic plans for meeting the priority of reproducible research and open source software development. Additionally, the institute values collaboration across academic, social, ethnic, and racial backgrounds, as well as cross-disciplinary and cross-cultural collaborations.\n", - "\n", - "Some specific research areas that may be of interest to the Turing Institute include:\n", - "\n", - "1. Data-driven approaches to addressing societal challenges, such as healthcare, education, and environmental sustainability.\n", - "2. Development of new AI technologies and techniques, such as machine learning, natural language processing, and computer vision.\n", - "3. Applications of data science and AI in various domains, such as finance, transportation, and culture.\n", - "4. Studies on the ethical, social, and economic implications of data science and AI.\n", - "5. Collaborative research projects that bring together diverse perspectives and expertise from academia, industry, and civil society.\n", - "\n", - "The Turing Institute is open to a wide range of research proposals that align with its guiding principles and priorities, including pioneering approaches and innovative methodologies. The institute encourages the sharing of outputs and methodologies as openly as possible to facilitate collaboration and reach a diverse audience.\n", - "\n", - "In addition to these general research areas, the Turing Institute is also interested in exploring new ways of tackling research, talent, and approaches. Project briefs will need to specify the extent to which the proposed approaches or methodologies are innovative in the context of similar research being undertaken elsewhere, and a pioneering score will be assigned to each project.\n", - "\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 277.50 ms / 396 runs ( 0.70 ms per token, 1427.03 tokens per second)\n", - "llama_print_timings: prompt eval time = 11135.08 ms / 627 tokens ( 17.76 ms per token, 56.31 tokens per second)\n", - "llama_print_timings: eval time = 26738.70 ms / 395 runs ( 67.69 ms per token, 14.77 tokens per second)\n", - "llama_print_timings: total time = 38670.27 ms\n", - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[38;5;200m\u001b[1;3mResponse: The research areas of the Turing Institute include data-driven approaches to addressing societal challenges, development of new AI technologies and techniques, applications of data science and AI in various domains, studies on the ethical, social, and economic implications of data science and AI, and collaborative research projects that bring together diverse perspectives and expertise from academia, industry, and civil society.\n", - "\u001b[0mThe research areas of the Turing Institute include data-driven approaches to addressing societal challenges, development of new AI technologies and techniques, applications of data science and AI in various domains, studies on the ethical, social, and economic implications of data science and AI, and collaborative research projects that bring together diverse perspectives and expertise from academia, industry, and civil society.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 72.06 ms / 103 runs ( 0.70 ms per token, 1429.42 tokens per second)\n", - "llama_print_timings: prompt eval time = 27985.35 ms / 1286 tokens ( 21.76 ms per token, 45.95 tokens per second)\n", - "llama_print_timings: eval time = 7628.91 ms / 102 runs ( 74.79 ms per token, 13.37 tokens per second)\n", - "llama_print_timings: total time = 35813.39 ms\n" - ] - } - ], - "source": [ - "response = chat_engine.chat(\n", - " \"hello\"\n", - ")\n", - "print(response)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "d6fa2d0f", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[38;5;200m\u001b[1;3mResponse: As a new starter in the Research Engineering Group (REG) at the Alan Turing Institute, there are several things you can do to get started and make the most of your time here:\n", - "\n", - "1. Familiarize yourself with the institute's research areas and ongoing projects. This will help you understand the scope of the work being done and how you can contribute.\n", - "2. Meet with your supervisor and other members of the REG to discuss your background, interests, and goals. They can provide valuable guidance and help you get settled into your new role.\n", - "3. Review the institute's policies and procedures to ensure you understand the expectations and requirements of your position.\n", - "4. Attend orientation sessions and training programs offered by the institute to learn more about the Turing Institute's culture, resources, and research practices.\n", - "5. Start exploring the tools and technologies used in the REG, such as query_engine_tool, to get a sense of the technical capabilities available to you.\n", - "6. Begin identifying potential research projects that align with your interests and skills, and reach out to relevant researchers to discuss possibilities.\n", - "7. Consider joining relevant working groups or workshops to connect with other researchers and stay up-to-date on the latest developments in the field.\n", - "8. Start building your network within the institute by attending social events, seminars, and other activities that promote collaboration and knowledge sharing.\n", - "9. Familiarize yourself with the Turing Institute's publication and dissemination processes to understand how research is shared and recognized within the institution.\n", - "10. Stay open-minded, curious, and willing to learn, as the Turing Institute is a dynamic and interdisciplinary environment that values collaboration and innovation.\n", - "\u001b[0m As a new starter in the Research Engineering Group (REG) at the Alan Turing Institute, there are several things you can do to get started and make the most of your time here:\n", - "\n", - "1. Familiarize yourself with the institute's research areas and ongoing projects. This will help you understand the scope of the work being done and how you can contribute.\n", - "2. Meet with your supervisor and other members of the REG to discuss your background, interests, and goals. They can provide valuable guidance and help you get settled into your new role.\n", - "3. Review the institute's policies and procedures to ensure you understand the expectations and requirements of your position.\n", - "4. Attend orientation sessions and training programs offered by the institute to learn more about the Turing Institute's culture, resources, and research practices.\n", - "5. Start exploring the tools and technologies used in the REG, such as query_engine_tool, to get a sense of the technical capabilities available to you.\n", - "6. Begin identifying potential research projects that align with your interests and skills, and reach out to relevant researchers to discuss possibilities.\n", - "7. Consider joining relevant working groups or workshops to connect with other researchers and stay up-to-date on the latest developments in the field.\n", - "8. Start building your network within the institute by attending social events, seminars, and other activities that promote collaboration and knowledge sharing.\n", - "9. Familiarize yourself with the Turing Institute's publication and dissemination processes to understand how research is shared and recognized within the institution.\n", - "10. Stay open-minded, curious, and willing to learn, as the Turing Institute is a dynamic and interdisciplinary environment that values collaboration and innovation.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 266.23 ms / 380 runs ( 0.70 ms per token, 1427.35 tokens per second)\n", - "llama_print_timings: prompt eval time = 2152.57 ms / 121 tokens ( 17.79 ms per token, 56.21 tokens per second)\n", - "llama_print_timings: eval time = 24885.95 ms / 379 runs ( 65.66 ms per token, 15.23 tokens per second)\n", - "llama_print_timings: total time = 27796.96 ms\n" - ] - } - ], - "source": [ - "response = chat_engine.chat(\"what should a new starter in the research engineering group (REG) at the Alan Turing Institute do?\")\n", - "print(response)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "3d028880", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[38;5;200m\u001b[1;3mResponse: No, I have not used the query engine yet. As a new starter in the Research Engineering Group at the Alan Turing Institute, I am still in the process of familiarizing myself with the tools and technologies available to me. However, I am eager to learn more about the query engine and how it can be used to support my research activities. Can you tell me more about the query engine and its capabilities?\n", - "\u001b[0m No, I have not used the query engine yet. As a new starter in the Research Engineering Group at the Alan Turing Institute, I am still in the process of familiarizing myself with the tools and technologies available to me. However, I am eager to learn more about the query engine and how it can be used to support my research activities. Can you tell me more about the query engine and its capabilities?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 61.06 ms / 87 runs ( 0.70 ms per token, 1424.85 tokens per second)\n", - "llama_print_timings: prompt eval time = 570.47 ms / 20 tokens ( 28.52 ms per token, 35.06 tokens per second)\n", - "llama_print_timings: eval time = 5989.95 ms / 86 runs ( 69.65 ms per token, 14.36 tokens per second)\n", - "llama_print_timings: total time = 6726.84 ms\n" - ] - } - ], - "source": [ - "response = chat_engine.chat(\"Have you used the query engine yet?\")\n", - "print(response)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "1a01fe16", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[38;5;200m\u001b[1;3mResponse: You have asked me the following questions so far:\n", - "\n", - "1. What should a new starter in the Research Engineering Group (REG) at the Alan Turing Institute do?\n", - "2. Have you used the query engine yet?\n", - "\u001b[0m You have asked me the following questions so far:\n", - "\n", - "1. What should a new starter in the Research Engineering Group (REG) at the Alan Turing Institute do?\n", - "2. Have you used the query engine yet?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 7509.25 ms\n", - "llama_print_timings: sample time = 34.37 ms / 49 runs ( 0.70 ms per token, 1425.79 tokens per second)\n", - "llama_print_timings: prompt eval time = 600.34 ms / 20 tokens ( 30.02 ms per token, 33.31 tokens per second)\n", - "llama_print_timings: eval time = 3407.13 ms / 48 runs ( 70.98 ms per token, 14.09 tokens per second)\n", - "llama_print_timings: total time = 4101.47 ms\n" - ] - } - ], - "source": [ - "response = chat_engine.chat(\"What have I asked you so far?\")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "id": "b9c86b3d", - "metadata": {}, - "source": [ - "Reset chat engine..." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "753b4c72", - "metadata": {}, - "outputs": [], - "source": [ - "chat_engine.reset()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "f7ca01f6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "********** prompt: **********\n", - " [INST] What did I ask you before? [/INST]\n", - "********** completion_response: **********\n", - " I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n", - "********** chat: chat_response: **********\n", - " assistant: I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n", - "\u001b[38;5;200m\u001b[1;3mResponse: I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n", - "\u001b[0m********** _process_actions: current_reasoning: **********\n", - " [ResponseReasoningStep(thought='I can answer without any tools.', response=\" I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\")]\n", - "********** _process_actions: is_done: **********\n", - " True\n", - "********** chat: reasoning_steps: **********\n", - " [ResponseReasoningStep(thought='I can answer without any tools.', response=\" I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\")]\n", - "********** chat: is_done: **********\n", - " True\n", - " I apologize, but I don't have the ability to remember previous conversations or keep track of what was asked. Each time you interact with me, it is a new and separate conversation. If you would like to ask something again, I will do my best to assist you.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 931.62 ms\n", - "llama_print_timings: sample time = 42.12 ms / 60 runs ( 0.70 ms per token, 1424.43 tokens per second)\n", - "llama_print_timings: prompt eval time = 288.19 ms / 14 tokens ( 20.59 ms per token, 48.58 tokens per second)\n", - "llama_print_timings: eval time = 3228.61 ms / 59 runs ( 54.72 ms per token, 18.27 tokens per second)\n", - "llama_print_timings: total time = 3630.86 ms\n" - ] - } - ], - "source": [ - "response = chat_engine.chat(\"What did I ask you before?\")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "id": "c3941e6f", - "metadata": {}, - "source": [ - "## React engine and asking it to use query\n", - "\n", - "We saw that it didn't use the query engine in the above, but maybe we could force it to use it..." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "a5629ffd", - "metadata": {}, - "outputs": [], - "source": [ - "chat_engine = index.as_chat_engine(chat_mode=\"react\",\n", - " response_mode=response_mode,\n", - " verbose=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "ded38211", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "********** prompt: **********\n", - " [INST] Please use the query engine. What should a new starter in the research engineering group do? [/INST]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "********** completion_response: **********\n", - " As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the research environment:\n", - "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the organization's policies and procedures:\n", - "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", - "6. Start contributing to research projects:\n", - "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and colleagues:\n", - "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n", - "********** chat_response: **********\n", - " assistant: As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the research environment:\n", - "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the organization's policies and procedures:\n", - "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", - "6. Start contributing to research projects:\n", - "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and colleagues:\n", - "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n", - "\u001b[38;5;200m\u001b[1;3mResponse: As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the research environment:\n", - "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the organization's policies and procedures:\n", - "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", - "6. Start contributing to research projects:\n", - "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and colleagues:\n", - "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n", - "\u001b[0m As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the research environment:\n", - "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the organization's policies and procedures:\n", - "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", - "6. Start contributing to research projects:\n", - "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and colleagues:\n", - "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 981.41 ms\n", - "llama_print_timings: sample time = 377.91 ms / 539 runs ( 0.70 ms per token, 1426.28 tokens per second)\n", - "llama_print_timings: prompt eval time = 307.84 ms / 23 tokens ( 13.38 ms per token, 74.72 tokens per second)\n", - "llama_print_timings: eval time = 31503.15 ms / 538 runs ( 58.56 ms per token, 17.08 tokens per second)\n", - "llama_print_timings: total time = 32916.09 ms\n" - ] - } - ], - "source": [ - "response = chat_engine.chat(\n", - " \"Please use the query engine. What should a new starter in the research engineering group do?\"\n", - ")\n", - "print(response)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "098a68a1", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "********** prompt: **********\n", - " [INST] <>\n", - "\n", - "You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.\n", - "\n", - "## Tools\n", - "You have access to a wide variety of tools. You are responsible for using\n", - "the tools in any sequence you deem appropriate to complete the task at hand.\n", - "This may require breaking the task into subtasks and using different tools\n", - "to complete each subtask.\n", - "\n", - "You have access to the following tools:\n", - "> Tool Name: query_engine_tool\n", - "Tool Description: Useful for running a natural language query\n", - "against a knowledge base and get back a natural language response.\n", - "\n", - "Tool Args: {'title': 'DefaultToolFnSchema', 'description': 'Default tool function Schema.', 'type': 'object', 'properties': {'input': {'title': 'Input', 'type': 'string'}}, 'required': ['input']}\n", - "\n", - "\n", - "## Output Format\n", - "To answer the question, please use the following format.\n", - "\n", - "```\n", - "Thought: I need to use a tool to help me answer the question.\n", - "Action: tool name (one of query_engine_tool)\n", - "Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {\"text\": \"hello world\", \"num_beams\": 5})\n", - "```\n", - "Please use a valid JSON format for the action input. Do NOT do this {'text': 'hello world', 'num_beams': 5}.\n", - "\n", - "If this format is used, the user will respond in the following format:\n", - "\n", - "```\n", - "Observation: tool response\n", - "```\n", - "\n", - "You should keep repeating the above format until you have enough information\n", - "to answer the question without using any more tools. At that point, you MUST respond\n", - "in the following format:\n", - "\n", - "```\n", - "Thought: I can answer without using any more tools.\n", - "Answer: [your answer here]\n", - "```\n", - "\n", - "## Current Conversation\n", - "Below is the current conversation consisting of interleaving human and assistant messages.\n", - "\n", - "\n", - "<>\n", - "\n", - "Please use the query engine. What should a new starter in the research engineering group do? [/INST] assistant: As a new starter in the research engineering group, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the research environment:\n", - "\t* Learn about the research projects that the group is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the organization.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the organization's policies and procedures:\n", - "\t* Review the organization's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the organization to help you get started with your role.\n", - "6. Start contributing to research projects:\n", - "\t* Begin contributing to ongoing research projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and colleagues:\n", - "\t* Regularly communicate with your supervisor and colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the organization and begin contributing to the success of the research projects. [INST] I want to specifically know about a new starter in the REG team at the Turing institute [/INST]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Llama.generate: prefix-match hit\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "********** completion_response: **********\n", - " As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the Turing Institute's research environment:\n", - "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and REG colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", - "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", - "6. Start contributing to REG projects:\n", - "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and REG colleagues:\n", - "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n", - "********** chat_response: **********\n", - " assistant: As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the Turing Institute's research environment:\n", - "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and REG colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", - "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", - "6. Start contributing to REG projects:\n", - "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and REG colleagues:\n", - "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n", - "\u001b[38;5;200m\u001b[1;3mResponse: As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the Turing Institute's research environment:\n", - "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and REG colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", - "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", - "6. Start contributing to REG projects:\n", - "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and REG colleagues:\n", - "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n", - "\u001b[0m As a new starter in the Research Engineering Group (REG) at the Turing Institute, there are several tasks and activities that you can focus on to get started with your role:\n", - "\n", - "1. Familiarize yourself with the Turing Institute's research environment:\n", - "\t* Learn about the research projects that the REG is currently working on, and the technologies and tools being used.\n", - "\t* Understand the goals and objectives of the research projects, and how they align with the overall research strategy of the Turing Institute.\n", - "2. Set up your workspace:\n", - "\t* Install any necessary software or tools required for your work.\n", - "\t* Set up your computer and other equipment to ensure that you have a comfortable and productive work environment.\n", - "3. Meet with your supervisor and REG colleagues:\n", - "\t* Schedule a meeting with your supervisor to discuss your role, responsibilities, and expectations.\n", - "\t* Introduce yourself to your REG colleagues and learn about their roles and areas of expertise.\n", - "4. Familiarize yourself with the Turing Institute's policies and procedures:\n", - "\t* Review the Turing Institute's policies and procedures related to research, engineering, and technology transfer.\n", - "\t* Understand the intellectual property policies and how they apply to your work.\n", - "5. Attend training and orientation sessions:\n", - "\t* Attend any training or orientation sessions that are provided by the Turing Institute to help you get started with your role.\n", - "6. Start contributing to REG projects:\n", - "\t* Begin contributing to ongoing REG projects, and start learning about the research process and how to work with the team.\n", - "7. Develop a plan for your research project:\n", - "\t* Work with your supervisor to develop a plan for your research project, including specific objectives, methodology, and timelines.\n", - "8. Start building your network:\n", - "\t* Attend relevant conferences, seminars, and workshops to learn about the latest developments in your field and to build relationships with other researchers and industry experts.\n", - "9. Keep up-to-date with relevant literature and trends:\n", - "\t* Regularly review scientific articles, conference proceedings, and other relevant publications to stay current with the latest developments in your field.\n", - "10. Communicate regularly with your supervisor and REG colleagues:\n", - "\t* Regularly communicate with your supervisor and REG colleagues to provide updates on your progress, ask questions, and seek feedback.\n", - "\n", - "By following these steps, you can get started with your role as a research engineer in the REG at the Turing Institute and begin contributing to the success of the research projects.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "llama_print_timings: load time = 981.41 ms\n", - "llama_print_timings: sample time = 403.86 ms / 577 runs ( 0.70 ms per token, 1428.72 tokens per second)\n", - "llama_print_timings: prompt eval time = 21240.71 ms / 1045 tokens ( 20.33 ms per token, 49.20 tokens per second)\n", - "llama_print_timings: eval time = 43054.91 ms / 576 runs ( 74.75 ms per token, 13.38 tokens per second)\n", - "llama_print_timings: total time = 65498.11 ms\n" - ] - } - ], - "source": [ - "response = chat_engine.chat(\n", - " \"I want to specifically know about a new starter in the REG team at the Turing institute\"\n", - ")\n", - "print(response)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "26d96826", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c8643927", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "reginald", - "language": "python", - "name": "reginald" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/models/llama-index-hack/Untitled1.ipynb b/models/llama-index-hack/Untitled1.ipynb deleted file mode 100644 index 7763533d..00000000 --- a/models/llama-index-hack/Untitled1.ipynb +++ /dev/null @@ -1,33 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "456e1ccd", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "reginald", - "language": "python", - "name": "reginald" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From ed802fe2256b96318c5dc83361eb881a805e4572 Mon Sep 17 00:00:00 2001 From: rchan Date: Fri, 8 Sep 2023 18:18:14 +0100 Subject: [PATCH 3/4] add meeting notes for 08/09 --- meeting_notes/Reginald 08_09_23.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 meeting_notes/Reginald 08_09_23.md diff --git a/meeting_notes/Reginald 08_09_23.md b/meeting_notes/Reginald 08_09_23.md new file mode 100644 index 00000000..b49aecb0 --- /dev/null +++ b/meeting_notes/Reginald 08_09_23.md @@ -0,0 +1,26 @@ +# Reginald 08/09/23 + +## Notes +- Ryan and Rosie almost finishing a PR contribution to llama-index for the following issue: https://github.com/jerryjliu/llama_index/issues/7596 + - ReAct engine seems to be a bit broken after this due to the default system prompt + - Potentially can make another issue and PR to fix +- Other potential llama-index contributions + - Some llama-cpp chat engine example notebooks - there's not many (if at all) that exist in the repo and they're very welcome to more examples + - Ryan to try fix old issue posted during Hackweek (https://github.com/jerryjliu/llama_index/issues/6465) +- Rosie been working on getting GitHub issues and files from within our Hut23 + - Will need to think about what is the personal token that we should use + - Maybe look at if there's an instituitional token that exists + - Will need to upload this to Azure as a secret +- Idea: look into logging how good the answers are and save into a database + - For future, we can maybe use this with a RLHF fine-tuning of our LLM + +## Actions +- Ryan & Rosie: look at putting this azure + - Figure out how to have multiple chat instances at the same time + - Can start with running the bot locally +- Rosie to think about using the reader for pure markdown files so no need to process them to csv + - Alternatively, to figure out how the csv files were created from pure markdown files + - Maybe it was from Andy's scripts to pull data from wiki and handbook +- Ryan to see if we can update the OpenAI code and compare with Llama2 +- Ryan & Rosie: Try to get a quantized Llama2-70b running (for Tomas) +- Merge Rosie's current PRs at some point next week From 3f8f8e59405261bedca5ee28b5ae25747044e0d6 Mon Sep 17 00:00:00 2001 From: rchan Date: Mon, 11 Sep 2023 18:20:40 +0100 Subject: [PATCH 4/4] update llama-index to latest --- poetry.lock | 49 +++++++++++++++++++++++++++++++++++++++++++------ pyproject.toml | 9 +++------ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/poetry.lock b/poetry.lock index 23414273..c014c423 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1252,6 +1252,7 @@ files = [ {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"}, {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, @@ -1260,6 +1261,7 @@ files = [ {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, @@ -1289,6 +1291,7 @@ files = [ {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, @@ -1297,6 +1300,7 @@ files = [ {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"}, {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, @@ -1636,7 +1640,7 @@ i18n = ["Babel (>=2.7)"] name = "joblib" version = "1.3.2" description = "Lightweight pipelining with Python functions" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, @@ -1934,13 +1938,13 @@ retrying = "*" [[package]] name = "llama-index" -version = "0.8.21" +version = "0.8.24.post1" description = "Interface between LLMs and your data" optional = false python-versions = "*" files = [ - {file = "llama_index-0.8.21-py3-none-any.whl", hash = "sha256:41cf1e5f7ff856c08f3ed70435a3e29b96226af43d45115276ae3fff57c5f973"}, - {file = "llama_index-0.8.21.tar.gz", hash = "sha256:1058c0ec574d964f7209cf923687994845aadce5947612cc05121ed16a5f5730"}, + {file = "llama_index-0.8.24.post1-py3-none-any.whl", hash = "sha256:4b7645a445d394640bad8c66a67483df29f7f0af25c53360cb382075be0c6c34"}, + {file = "llama_index-0.8.24.post1.tar.gz", hash = "sha256:7cd47cf6ba64d24dbc6db712bcd4834767e0d35890559feee139bd4fa90ad916"}, ] [package.dependencies] @@ -1949,6 +1953,7 @@ dataclasses-json = "*" fsspec = ">=2023.5.0" langchain = ">=0.0.262" nest-asyncio = "*" +nltk = "*" numpy = "*" openai = ">=0.26.4" pandas = "*" @@ -2390,7 +2395,7 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, @@ -3299,6 +3304,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3306,8 +3312,15 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3324,6 +3337,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3331,6 +3345,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -3725,35 +3740,57 @@ files = [ {file = "safetensors-0.3.3-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:17f41344d9a075f2f21b289a49a62e98baff54b5754240ba896063bce31626bf"}, {file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f1045f798e1a16a6ced98d6a42ec72936d367a2eec81dc5fade6ed54638cd7d2"}, {file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:eaf0e4bc91da13f21ac846a39429eb3f3b7ed06295a32321fa3eb1a59b5c70f3"}, + {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25149180d4dc8ca48bac2ac3852a9424b466e36336a39659b35b21b2116f96fc"}, + {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9e943bf78c39de8865398a71818315e7d5d1af93c7b30d4da3fc852e62ad9bc"}, + {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cccfcac04a010354e87c7a2fe16a1ff004fc4f6e7ef8efc966ed30122ce00bc7"}, {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a07121f427e646a50d18c1be0fa1a2cbf6398624c31149cd7e6b35486d72189e"}, {file = "safetensors-0.3.3-cp310-cp310-win32.whl", hash = "sha256:a85e29cbfddfea86453cc0f4889b4bcc6b9c155be9a60e27be479a34e199e7ef"}, + {file = "safetensors-0.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:e13adad4a3e591378f71068d14e92343e626cf698ff805f61cdb946e684a218e"}, {file = "safetensors-0.3.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:cbc3312f134baf07334dd517341a4b470b2931f090bd9284888acb7dfaf4606f"}, {file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d15030af39d5d30c22bcbc6d180c65405b7ea4c05b7bab14a570eac7d7d43722"}, {file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:f84a74cbe9859b28e3d6d7715ac1dd3097bebf8d772694098f6d42435245860c"}, {file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:10d637423d98ab2e6a4ad96abf4534eb26fcaf8ca3115623e64c00759374e90d"}, {file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:3b46f5de8b44084aff2e480874c550c399c730c84b2e8ad1bddb062c94aa14e9"}, + {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e76da691a82dfaf752854fa6d17c8eba0c8466370c5ad8cf1bfdf832d3c7ee17"}, + {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4e342fd54e66aa9512dd13e410f791e47aa4feeb5f4c9a20882c72f3d272f29"}, + {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:178fd30b5dc73bce14a39187d948cedd0e5698e2f055b7ea16b5a96c9b17438e"}, {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e8fdf7407dba44587ed5e79d5de3533d242648e1f2041760b21474bd5ea5c8c"}, {file = "safetensors-0.3.3-cp311-cp311-win32.whl", hash = "sha256:7d3b744cee8d7a46ffa68db1a2ff1a1a432488e3f7a5a97856fe69e22139d50c"}, + {file = "safetensors-0.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f579877d30feec9b6ba409d05fa174633a4fc095675a4a82971d831a8bb60b97"}, {file = "safetensors-0.3.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:2fff5b19a1b462c17322998b2f4b8bce43c16fe208968174d2f3a1446284ceed"}, {file = "safetensors-0.3.3-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:41adb1d39e8aad04b16879e3e0cbcb849315999fad73bc992091a01e379cb058"}, {file = "safetensors-0.3.3-cp37-cp37m-macosx_12_0_x86_64.whl", hash = "sha256:0f2b404250b3b877b11d34afcc30d80e7035714a1116a3df56acaca6b6c00096"}, {file = "safetensors-0.3.3-cp37-cp37m-macosx_13_0_x86_64.whl", hash = "sha256:b43956ef20e9f4f2e648818a9e7b3499edd6b753a0f5526d4f6a6826fbee8446"}, + {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d61a99b34169981f088ccfbb2c91170843efc869a0a0532f422db7211bf4f474"}, + {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0008aab36cd20e9a051a68563c6f80d40f238c2611811d7faa5a18bf3fd3984"}, + {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93d54166072b143084fdcd214a080a088050c1bb1651016b55942701b31334e4"}, {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c32ee08f61cea56a5d62bbf94af95df6040c8ab574afffaeb7b44ae5da1e9e3"}, {file = "safetensors-0.3.3-cp37-cp37m-win32.whl", hash = "sha256:351600f367badd59f7bfe86d317bb768dd8c59c1561c6fac43cafbd9c1af7827"}, + {file = "safetensors-0.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:034717e297849dae1af0a7027a14b8647bd2e272c24106dced64d83e10d468d1"}, {file = "safetensors-0.3.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8530399666748634bc0b301a6a5523756931b0c2680d188e743d16304afe917a"}, {file = "safetensors-0.3.3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:9d741c1f1621e489ba10aa3d135b54202684f6e205df52e219d5eecd673a80c9"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:0c345fd85b4d2093a5109596ff4cd9dfc2e84992e881b4857fbc4a93a3b89ddb"}, {file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:69ccee8d05f55cdf76f7e6c87d2bdfb648c16778ef8acfd2ecc495e273e9233e"}, + {file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:c08a9a4b7a4ca389232fa8d097aebc20bbd4f61e477abc7065b5c18b8202dede"}, {file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:a002868d2e3f49bbe81bee2655a411c24fa1f8e68b703dec6629cb989d6ae42e"}, + {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bd2704cb41faa44d3ec23e8b97330346da0395aec87f8eaf9c9e2c086cdbf13"}, + {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b2951bf3f0ad63df5e6a95263652bd6c194a6eb36fd4f2d29421cd63424c883"}, + {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07114cec116253ca2e7230fdea30acf76828f21614afd596d7b5438a2f719bd8"}, {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab43aeeb9eadbb6b460df3568a662e6f1911ecc39387f8752afcb6a7d96c087"}, {file = "safetensors-0.3.3-cp38-cp38-win32.whl", hash = "sha256:f2f59fce31dd3429daca7269a6b06f65e6547a0c248f5116976c3f1e9b73f251"}, + {file = "safetensors-0.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:c31ca0d8610f57799925bf08616856b39518ab772c65093ef1516762e796fde4"}, {file = "safetensors-0.3.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:59a596b3225c96d59af412385981f17dd95314e3fffdf359c7e3f5bb97730a19"}, {file = "safetensors-0.3.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:82a16e92210a6221edd75ab17acdd468dd958ef5023d9c6c1289606cc30d1479"}, {file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:98a929e763a581f516373ef31983ed1257d2d0da912a8e05d5cd12e9e441c93a"}, {file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:12b83f1986cd16ea0454c636c37b11e819d60dd952c26978310a0835133480b7"}, {file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:f439175c827c2f1bbd54df42789c5204a10983a30bc4242bc7deaf854a24f3f0"}, {file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:0085be33b8cbcb13079b3a8e131656e05b0bc5e6970530d4c24150f7afd76d70"}, + {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3ec70c87b1e910769034206ad5efc051069b105aac1687f6edcd02526767f4"}, + {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f490132383e5e490e710608f4acffcb98ed37f91b885c7217d3f9f10aaff9048"}, + {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79d1b6c7ed5596baf79c80fbce5198c3cdcc521ae6a157699f427aba1a90082d"}, {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad3cc8006e7a86ee7c88bd2813ec59cd7cc75b03e6fa4af89b9c7b235b438d68"}, {file = "safetensors-0.3.3-cp39-cp39-win32.whl", hash = "sha256:ab29f54c6b8c301ca05fa014728996bd83aac6e21528f893aaf8945c71f42b6d"}, + {file = "safetensors-0.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:0fa82004eae1a71e2aa29843ef99de9350e459a0fc2f65fc6ee0da9690933d2d"}, {file = "safetensors-0.3.3.tar.gz", hash = "sha256:edb7072d788c4f929d0f5735d3a2fb51e5a27f833587828583b7f5747af1a2b8"}, ] @@ -5016,4 +5053,4 @@ bot = ["adapter-transformers", "datasets", "einops", "faiss-cpu", "gradio", "lan [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "5a508f342d3f4a66cd8f72f34ed1559d05282c270c710aae40fcf94aa945f014" +content-hash = "56c1ceefcb939dd090c641de32bf25cf5ba2718cceb2148bcf2a24a255b2e945" diff --git a/pyproject.toml b/pyproject.toml index c48192d1..031015e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,9 @@ einops = { version="^0.6.1", optional=true } faiss-cpu = { version="^1.7.4", optional=true } gradio = { version="^3.34.0", optional=true } langchain = "^0.0.278" -llama-index = "^0.8.14" +llama-index = "^0.8.24" +llama-cpp-python = "^0.1.83" +llama-hub = "^0.0.26" nbconvert = { version="^7.5.0", optional=true } openai = { version="^0.27.8", optional=true } pandas = "^2.0.2" @@ -38,8 +40,6 @@ torch = [ transformers = "=4.30.2" ipykernel = "^6.23.2" xformers = { version="^0.0.20", optional=true } -llama-cpp-python = "^0.1.83" -llama-hub = "^0.0.26" [tool.poetry.group.dev.dependencies] black = "^23.3.0" @@ -79,9 +79,6 @@ url = "https://download.pytorch.org/whl/cpu/" priority = "explicit" -[tool.poetry.group.bot.dependencies] -llama-cpp-python = {version = "^0.1.83", optional = true} - [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api"