Skip to content

Commit

Permalink
Merge pull request #66 from alan-turing-institute/llama2
Browse files Browse the repository at this point in the history
Implement llama2 + llama_index
  • Loading branch information
rchan26 authored Sep 8, 2023
2 parents 3556575 + a238f80 commit 7cfeedc
Show file tree
Hide file tree
Showing 10 changed files with 9,283 additions and 3,444 deletions.
2,174 changes: 441 additions & 1,733 deletions data/handbook_qa/data/qAndA.jsonl

Large diffs are not rendered by default.

3,090 changes: 3,090 additions & 0 deletions models/llama-index-hack/llama2_ccp_chat.ipynb

Large diffs are not rendered by default.

692 changes: 692 additions & 0 deletions models/llama-index-hack/llama2_eval.ipynb

Large diffs are not rendered by default.

693 changes: 693 additions & 0 deletions models/llama-index-hack/llama2_github_collaborators.ipynb

Large diffs are not rendered by default.

874 changes: 874 additions & 0 deletions models/llama-index-hack/llama2_github_issues.ipynb

Large diffs are not rendered by default.

277 changes: 277 additions & 0 deletions models/llama-index-hack/llama2_qanda.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from llama_index.llms import LlamaCPP\n",
"from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt\n",
"from llama_index import Document\n",
"from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage\n",
"from llama_index import LLMPredictor, PromptHelper, ServiceContext\n",
"\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"llama.cpp: loading model from ../../../llama/llama-2-7b-chat/ggml-model-q4_0.bin\n",
"llama_model_load_internal: format = ggjt v3 (latest)\n",
"llama_model_load_internal: n_vocab = 32000\n",
"llama_model_load_internal: n_ctx = 3900\n",
"llama_model_load_internal: n_embd = 4096\n",
"llama_model_load_internal: n_mult = 256\n",
"llama_model_load_internal: n_head = 32\n",
"llama_model_load_internal: n_head_kv = 32\n",
"llama_model_load_internal: n_layer = 32\n",
"llama_model_load_internal: n_rot = 128\n",
"llama_model_load_internal: n_gqa = 1\n",
"llama_model_load_internal: rnorm_eps = 1.0e-06\n",
"llama_model_load_internal: n_ff = 11008\n",
"llama_model_load_internal: freq_base = 10000.0\n",
"llama_model_load_internal: freq_scale = 1\n",
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
"llama_model_load_internal: model size = 7B\n",
"llama_model_load_internal: ggml ctx size = 0.08 MB\n",
"llama_model_load_internal: mem required = 4160.96 MB (+ 1950.00 MB per state)\n",
"llama_new_context_with_model: kv self size = 1950.00 MB\n",
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
]
}
],
"source": [
"llm = LlamaCPP(\n",
" model_path=\"../../../llama/llama-2-7b-chat/gguf-model-q4_0.gguf\",\n",
" temperature=0.1,\n",
" max_new_tokens=256,\n",
" # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room\n",
" context_window=3900,\n",
" # kwargs to pass to __call__()\n",
" generate_kwargs={},\n",
" # kwargs to pass to __init__()\n",
" # set to at least 1 to use GPU\n",
" model_kwargs={\"n_gpu_layers\": 1},\n",
" # transform inputs into Llama2 format\n",
" messages_to_prompt=messages_to_prompt,\n",
" completion_to_prompt=completion_to_prompt,\n",
" verbose=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"def load_documents():\n",
" df = pd.read_json(\"../../data/handbook_qa/data/qAndA.jsonl\", lines=True)\n",
" df[\"prompt\"]=df[\"prompt\"]+\"?\"\n",
" documents = [Document(text=str(i)) for i in df.values]\n",
"\n",
" return documents"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"documents = load_documents()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'[\\'What are some of the shortcodes in the handbook theme ?\\'\\n \"Some of the shortcodes in the handbook theme include figure, gist, highlight, and KaTeX, as well as custom ones specific to the theme such as hints, expand, and tabs. Shortcodes are templates that can be parametrized and can be included in the content section of a Markdown file to enable more complex features than Markdown\\'s simple syntax allows.\"]'"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"documents[10].text"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"def create_service_context(\n",
" model, \n",
" max_input_size=1024,\n",
" num_output=128,\n",
" chunk_size_lim=256,\n",
" overlap_ratio=0.1\n",
" ):\n",
" llm_predictor=LLMPredictor(llm=model)\n",
" prompt_helper=PromptHelper(max_input_size,num_output,overlap_ratio,chunk_size_lim)\n",
" service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model=\"local\")\n",
" return service_context"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/rwood/Library/Caches/pypoetry/virtualenvs/reginald-slack-ZVq5BSHv-py3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"service_context = create_service_context(llm)\n",
"index = VectorStoreIndex.from_documents(documents, service_context=service_context)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"query_engine = index.as_query_engine()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Based on the provided context information, I understand that REG refers to the Research and Engineering Group (REG) within the Turing Institute.\n",
"According to the context, the REG newsletter is sent out monthly around a week before the monthly team meeting, and the all-REG meeting is also held monthly where new joiners are welcomed and updates from around REG or the Turing are presented, followed by a discussion on a topic of interest for the wider team.\n",
"Therefore, based on the information provided, REG appears to be a group within the Turing Institute that focuses on research and engineering activities, and has regular meetings to share updates and discuss topics of interest.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"llama_print_timings: load time = 5081.84 ms\n",
"llama_print_timings: sample time = 104.11 ms / 148 runs ( 0.70 ms per token, 1421.53 tokens per second)\n",
"llama_print_timings: prompt eval time = 5081.79 ms / 238 tokens ( 21.35 ms per token, 46.83 tokens per second)\n",
"llama_print_timings: eval time = 7902.76 ms / 147 runs ( 53.76 ms per token, 18.60 tokens per second)\n",
"llama_print_timings: total time = 13274.83 ms\n"
]
}
],
"source": [
"response = query_engine.query(\"Who are REG\")\n",
"print(response.response)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['What is the REG newsletter and how often is it sent out?'\n",
" 'The REG newsletter is a monthly email containing short project updates and other news/updates from around the team and the institute. It is sent around a week before the monthly team meeting and comes to the Hut23 mailing list.']\n"
]
}
],
"source": [
"print(response.source_nodes[0].node.text)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Llama.generate: prefix-match hit\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Thank you for asking! The Alan Turing Institute is based in London, UK.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"llama_print_timings: load time = 5081.84 ms\n",
"llama_print_timings: sample time = 13.43 ms / 19 runs ( 0.71 ms per token, 1415.27 tokens per second)\n",
"llama_print_timings: prompt eval time = 4391.42 ms / 212 tokens ( 20.71 ms per token, 48.28 tokens per second)\n",
"llama_print_timings: eval time = 957.65 ms / 18 runs ( 53.20 ms per token, 18.80 tokens per second)\n",
"llama_print_timings: total time = 5386.18 ms\n"
]
}
],
"source": [
"response = query_engine.query(\"Where is the Alan Turing Institute based?\")\n",
"print(response.response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llama",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 7cfeedc

Please sign in to comment.