diff --git a/packages/backend/src/ai-test.json b/packages/backend/src/ai-test.json deleted file mode 100644 index f998a9080..000000000 --- a/packages/backend/src/ai-test.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "recipes": [ - { - "id": "chatbot", - "description" : "Chat bot application", - "name" : "ChatBot", - "repository": "https://github.com/axel7083/locallm", - "icon": "natural-language-processing", - "categories": [ - "natural-language-processing" - ], - "config": "chatbot/ai-studio.yaml", - "readme": "# Locallm\n\nThis repo contains artifacts that can be used to build and run LLM (Large Language Model) services locally on your Mac using podman. These containerized LLM services can be used to help developers quickly prototype new LLM based applications, without the need for relying on any other externally hosted services. Since they are already containerized, it also helps developers move from their prototype to production quicker. \n\n## Current Locallm Services: \n\n* [Chatbot](#chatbot)\n* [Text Summarization](#text-summarization)\n* [Fine-tuning](#fine-tuning)\n\n### Chatbot\n\nA simple chatbot using the gradio UI. Learn how to build and run this model service here: [Chatbot](/chatbot/).\n\n### Text Summarization\n\nAn LLM app that can summarize arbitrarily long text inputs. Learn how to build and run this model service here: [Text Summarization](/summarizer/).\n\n### Fine Tuning \n\nThis application allows a user to select a model and a data set they'd like to fine-tune that model on. Once the application finishes, it outputs a new fine-tuned model for the user to apply to other LLM services. Learn how to build and run this model training job here: [Fine-tuning](/finetune/).\n\n## Architecture\n![](https://raw.githubusercontent.com/MichaelClifford/locallm/main/assets/arch.jpg)\n\nThe diagram above indicates the general architecture for each of the individual model services contained in this repo. The core code available here is the \"LLM Task Service\" and the \"API Server\", bundled together under `model_services`. With an appropriately chosen model downloaded onto your host, `model_services/builds` contains the Containerfiles required to build an ARM or an x86 (with CUDA) image depending on your need. These model services are intended to be light-weight and run with smaller hardware footprints (given the Locallm name), but they can be run on any hardware that supports containers and scaled up if needed.\n\nWe also provide demo \"AI Applications\" under `ai_applications` for each model service to provide an example of how a developers could interact with the model service for their own needs. ", - "models": [ - "llama-2-7b-chat.Q5_K_S", - "albedobase-xl-1.3", - "sdxl-turbo" - ] - } - ], - "models": [ - { - "id": "llama-2-7b-chat.Q5_K_S", - "name": "Llama-2-7B-Chat-GGUF", - "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥", - "hw": "CPU", - "registry": "Hugging Face", - "license": "?", - "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf" - }, - { - "id": "albedobase-xl-1.3", - "name": "AlbedoBase XL 1.3", - "description": "Stable Diffusion XL has 6.6 billion parameters, which is about 6.6 times more than the SD v1.5 version. I believe that this is not just a number, but a number that can lead to a significant improvement in performance. It has been a while since we realized that the overall performance of SD v1.5 has improved beyond imagination thanks to the explosive contributions of our community. Therefore, I am working on completing this AlbedoBase XL model in order to optimally reproduce the performance improvement that occurred in v1.5 in this XL version as well. My goal is to directly test the performance of all Checkpoints and LoRAs that are publicly uploaded to Civitai, and merge only the resources that are judged to be optimal after passing through several filters. This will surpass the performance of image-generating AI of companies such as Midjourney. As of now, AlbedoBase XL v0.4 has merged exactly 55 selected checkpoints and 138 LoRAs.", - "hw": "CPU", - "registry": "Civital", - "license": "openrail++", - "url": "" - }, - { - "id": "sdxl-turbo", - "name": "SDXL Turbo", - "description": "SDXL Turbo achieves state-of-the-art performance with a new distillation technology, enabling single-step image generation with unprecedented quality, reducing the required step count from 50 to just one.", - "hw": "CPU", - "registry": "Hugging Face", - "license": "sai-c-community", - "url": "" - } - ], - "categories": [ - { - "id": "natural-language-processing", - "name": "Natural Language Processing", - "description" : "Models that work with text: classify, summarize, translate, or generate text." - }, - { - "id": "computer-vision", - "description" : "Process images, from classification to object detection and segmentation.", - "name" : "Computer Vision" - }, - { - "id": "audio", - "description" : "Recognize speech or classify audio with audio models.", - "name" : "Audio" - }, - { - "id": "multimodal", - "description" : "Stuff about multimodal models goes here omg yes amazing.", - "name" : "Multimodal" - } - ] -} diff --git a/packages/backend/src/ai-user-test.json b/packages/backend/src/ai-user-test.json deleted file mode 100644 index 20d8f31f0..000000000 --- a/packages/backend/src/ai-user-test.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "recipes": [ - { - "id": "recipe 1", - "description" : "Recipe 1", - "name" : "Recipe 1", - "repository": "https://recipe1.example.com", - "icon": "natural-language-processing", - "categories": [ - "category1" - ], - "config": "chatbot/ai-studio.yaml", - "readme": "Readme for recipe 1", - "models": [ - "model1", - "model2" - ] - } - ], - "models": [ - { - "id": "model1", - "name": "Model 1", - "description": "Readme for model 1", - "hw": "CPU", - "registry": "Hugging Face", - "license": "?", - "url": "https://model1.example.com" - }, - { - "id": "model2", - "name": "Model 2", - "description": "Readme for model 2", - "hw": "CPU", - "registry": "Civital", - "license": "?", - "url": "" - } - ], - "categories": [ - { - "id": "category1", - "name": "Category 1", - "description" : "Readme for category 1" - } - ] -} diff --git a/packages/backend/src/ai.json b/packages/backend/src/ai.json deleted file mode 100644 index 8dc3375aa..000000000 --- a/packages/backend/src/ai.json +++ /dev/null @@ -1,97 +0,0 @@ -{ - "recipes": [ - { - "id": "chatbot", - "description" : "Chat bot application", - "name" : "ChatBot", - "repository": "https://github.com/redhat-et/locallm", - "ref": "b0b2eca", - "icon": "natural-language-processing", - "categories": [ - "natural-language-processing" - ], - "config": "chatbot-langchain/ai-studio.yaml", - "readme": "# Chat Application\n\nThis model service is intended be used as the basis for a chat application. It is capable of having arbitrarily long conversations\nwith users and retains a history of the conversation until it reaches the maximum context length of the model.\nAt that point, the service will remove the earliest portions of the conversation from its memory.\n\nTo use this model service, please follow the steps below:\n\n* [Download Model](#download-models)\n* [Build Image](#build-the-image)\n* [Run Image](#run-the-image)\n* [Interact with Service](#interact-with-the-app)\n* [Deploy on Openshift](#deploy-on-openshift)\n\n## Build and Deploy Locally\n\n### Download model(s)\n\nThe two models that we have tested and recommend for this example are Llama2 and Mistral. The locations of the GGUF variants\nare listed below:\n\n* Llama2 - https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/tree/main\n* Mistral - https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/tree/main\n\n_For a full list of supported model variants, please see the \"Supported models\" section of the\n[llama.cpp repository](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description)._\n\nThis example assumes that the developer already has a copy of the model that they would like to use downloaded onto their host machine and located in the `/models` directory of this repo. \n\nThis can be accomplished with:\n\n```bash\ncd models\nwget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf\ncd ../\n```\n\n## Deploy from Local Container\n\n### Build the image\n\nBuild the `model-service` image.\n\n```bash\ncd chatbot/model_services\npodman build -t chatbot:service -f base/Containerfile .\n```\n\nAfter the image is created it should be run with the model mounted as volume, as shown below.\nThis prevents large model files from being loaded into the container image which can cause a significant slowdown\nwhen transporting the images. If it is required that a model-service image contains the model,\nthe Containerfiles can be modified to copy the model into the image.\n\nWith the model-service image, in addition to a volume mounted model file, an environment variable, $MODEL_PATH,\nshould be set at runtime. If not set, the default location where the service expects a model is at \n`/locallm/models/llama-2-7b-chat.Q5_K_S.gguf` inside the running container. This file can be downloaded from the URL\n`https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf`.\n\n### Run the image\n\nOnce the model service image is built, it can be run with the following:\nBy assuming that we want to mount the model `llama-2-7b-chat.Q5_K_S.gguf`\n\n```bash\nexport MODEL_FILE=llama-2-7b-chat.Q5_K_S.gguf\npodman run --rm -d -it \\n -v /local/path/to/$MODEL_FILE:/locallm/models/$MODEL_FILE:Z \\n --env MODEL_PATH=/locallm/models/$MODEL_FILE \\n -p 7860:7860 \\n chatbot:service\n```\n\n### Interact with the app\n\nNow the service can be interacted with by going to `0.0.0.0:7860` in your browser.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/app.png)\n\n\nYou can also use the example [chatbot/ai_applications/ask.py](ask.py) to interact with the model-service in a terminal.\nIf the `--prompt` argument is left blank, it will default to \"Hello\".\n\n```bash\ncd chatbot/ai_applications\n\npython ask.py --prompt \n```\n\nOr, you can build the `ask.py` into a container image and run it alongside the model-service container, like so:\n\n```bash\ncd chatbot/ai_applications\npodman build -t chatbot -f builds/Containerfile .\npodman run --rm -d -it -p 8080:8080 chatbot # then interact with the application at 0.0.0.0:8080 in your browser\n```\n\n## Deploy on Openshift\n\nNow that we've developed an application locally that leverages an LLM, we'll want to share it with a wider audience.\nLet's get it off our machine and run it on OpenShift.\n\n### Rebuild for x86\n\nIf you are on a Mac, you'll need to rebuild the model-service image for the x86 architecture for most use case outside of Mac.\nSince this is an AI workload, you may also want to take advantage of Nvidia GPU's available outside our local machine.\nIf so, build the model-service with a base image that contains CUDA and builds llama.cpp specifically for a CUDA environment.\n\n```bash\ncd chatbot/model_services/cuda\npodman build --platform linux/amd64 -t chatbot:service-cuda -f cuda/Containerfile .\n```\n\nThe CUDA environment significantly increases the size of the container image.\nIf you are not utilizing a GPU to run this application, you can create an image\nwithout the CUDA layers for an x86 architecture machine with the following:\n\n```bash\ncd chatbot/model_services\npodman build --platform linux/amd64 -t chatbot:service-amd64 -f base/Containerfile .\n```\n\n### Push to Quay\n\nOnce you login to [quay.io](quay.io) you can push your own newly built version of this LLM application to your repository\nfor use by others.\n\n```bash\npodman login quay.io\n```\n\n```bash\npodman push localhost/chatbot:service-amd64 quay.io//\n```\n\n### Deploy\n\nNow that your model lives in a remote repository we can deploy it.\nGo to your OpenShift developer dashboard and select \"+Add\" to use the Openshift UI to deploy the application.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/add_image.png)\n\nSelect \"Container images\"\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/container_images.png)\n\nThen fill out the form on the Deploy page with your [quay.io](quay.io) image name and make sure to set the \"Target port\" to 7860.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/deploy.png)\n\nHit \"Create\" at the bottom and watch your application start.\n\nOnce the pods are up and the application is working, navigate to the \"Routes\" section and click on the link created for you\nto interact with your app.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/app.png)", - "models": [ - "hf.TheBloke.llama-2-7b-chat.Q5_K_S", - "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M" - ] - }, - { - "id": "summarizer", - "description" : "Summarizer application", - "name" : "Summarizer", - "repository": "https://github.com/redhat-et/locallm", - "ref": "4ac7950", - "icon": "natural-language-processing", - "categories": [ - "natural-language-processing" - ], - "config": "summarizer-langchain/ai-studio.yaml", - "readme": "# Summarizer\n\nThis model service is intended be be used for text summarization tasks. This service can ingest an arbitrarily long text input. If the input length is less than the models maximum context window it will summarize the input directly. If the input is longer than the maximum context window, the input will be divided into appropriately sized chunks. Each chunk will be summarized and a final \"summary of summaries\" will be the services final output. ", - "models": [ - "hf.TheBloke.llama-2-7b-chat.Q5_K_S", - "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M" - ] - }, - { - "id": "codegeneration", - "description" : "Code Generation application", - "name" : "Code Generation", - "repository": "https://github.com/redhat-et/locallm", - "ref": "a1ee3db", - "icon": "natural-language-processing", - "categories": [ - "natural-language-processing" - ], - "config": "code-generation/ai-studio.yaml", - "readme": "# Code Generation\n\nThis example will deploy a local code-gen application using a llama.cpp model server and a python app built with langchain. \n\n### Download Model\n\n- **codellama**\n\n - Download URL: `wget https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf` \n\n```\n\ncd ../models\n\nwget \n\ncd ../\n\n```\n\n### Deploy Model Service\n\nTo start the model service, refer to [the playground model-service document](../playground/README.md). Deploy the LLM server and volumn mount the model of choice.\n\n```\n\npodman run --rm -it -d \\ \n\n -p 8001:8001 \\ \n\n -v Local/path/to/locallm/models:/locallm/models:ro,Z \\ \n\n -e MODEL_PATH=models/ \\ \n\n -e HOST=0.0.0.0 \\ \n\n -e PORT=8001 \\ \n\n playground:image\n\n```\n\n### Build Container Image\n\nOnce the model service is deployed, then follow the instruction below to build your container image and run it locally. \n\n- `podman build -t codegen-app code-generation -f code-generation/builds/Containerfile`\n\n- `podman run -it -p 8501:8501 codegen-app -- -m http://10.88.0.1:8001/v1` ", - "models": [ - "hf.TheBloke.llama-2-7b-chat.Q5_K_S", - "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M" - ] - } - ], - "models": [ - { - "id": "hf.TheBloke.llama-2-7b-chat.Q5_K_S", - "name": "TheBloke/Llama-2-7B-Chat-GGUF", - "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥", - "hw": "CPU", - "registry": "Hugging Face", - "license": "?", - "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf" - }, - { - "id": "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M", - "name": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", - "description": "The Mistral-7B-Instruct-v0.1 Large Language Model (LLM) is a instruct fine-tuned version of the [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) generative text model using a variety of publicly available conversation datasets. For full details of this model please read our [release blog post](https://mistral.ai/news/announcing-mistral-7b/)", - "hw": "CPU", - "registry": "Hugging Face", - "license": "Apache-2.0", - "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf" - } - ], - "categories": [ - { - "id": "natural-language-processing", - "name": "Natural Language Processing", - "description" : "Models that work with text: classify, summarize, translate, or generate text." - }, - { - "id": "computer-vision", - "description" : "Process images, from classification to object detection and segmentation.", - "name" : "Computer Vision" - }, - { - "id": "audio", - "description" : "Recognize speech or classify audio with audio models.", - "name" : "Audio" - }, - { - "id": "multimodal", - "description" : "Stuff about multimodal models goes here omg yes amazing.", - "name" : "Multimodal" - } - ] -} diff --git a/packages/backend/src/assets/applications-catalog.json b/packages/backend/src/assets/applications-catalog.json new file mode 100644 index 000000000..efb8ea4f2 --- /dev/null +++ b/packages/backend/src/assets/applications-catalog.json @@ -0,0 +1,53 @@ +[ + { + "id": "chatbot", + "description" : "Chat bot application", + "name" : "ChatBot", + "repository": "https://github.com/redhat-et/locallm", + "ref": "b0b2eca", + "icon": "natural-language-processing", + "categories": [ + "natural-language-processing" + ], + "config": "chatbot-langchain/ai-studio.yaml", + "readme": "# Chat Application\n\nThis model service is intended be used as the basis for a chat application. It is capable of having arbitrarily long conversations\nwith users and retains a history of the conversation until it reaches the maximum context length of the model.\nAt that point, the service will remove the earliest portions of the conversation from its memory.\n\nTo use this model service, please follow the steps below:\n\n* [Download Model](#download-models)\n* [Build Image](#build-the-image)\n* [Run Image](#run-the-image)\n* [Interact with Service](#interact-with-the-app)\n* [Deploy on Openshift](#deploy-on-openshift)\n\n## Build and Deploy Locally\n\n### Download model(s)\n\nThe two models that we have tested and recommend for this example are Llama2 and Mistral. The locations of the GGUF variants\nare listed below:\n\n* Llama2 - https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/tree/main\n* Mistral - https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/tree/main\n\n_For a full list of supported model variants, please see the \"Supported models\" section of the\n[llama.cpp repository](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description)._\n\nThis example assumes that the developer already has a copy of the model that they would like to use downloaded onto their host machine and located in the `/models` directory of this repo. \n\nThis can be accomplished with:\n\n```bash\ncd models\nwget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf\ncd ../\n```\n\n## Deploy from Local Container\n\n### Build the image\n\nBuild the `model-service` image.\n\n```bash\ncd chatbot/model_services\npodman build -t chatbot:service -f base/Containerfile .\n```\n\nAfter the image is created it should be run with the model mounted as volume, as shown below.\nThis prevents large model files from being loaded into the container image which can cause a significant slowdown\nwhen transporting the images. If it is required that a model-service image contains the model,\nthe Containerfiles can be modified to copy the model into the image.\n\nWith the model-service image, in addition to a volume mounted model file, an environment variable, $MODEL_PATH,\nshould be set at runtime. If not set, the default location where the service expects a model is at \n`/locallm/models/llama-2-7b-chat.Q5_K_S.gguf` inside the running container. This file can be downloaded from the URL\n`https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf`.\n\n### Run the image\n\nOnce the model service image is built, it can be run with the following:\nBy assuming that we want to mount the model `llama-2-7b-chat.Q5_K_S.gguf`\n\n```bash\nexport MODEL_FILE=llama-2-7b-chat.Q5_K_S.gguf\npodman run --rm -d -it \\n -v /local/path/to/$MODEL_FILE:/locallm/models/$MODEL_FILE:Z \\n --env MODEL_PATH=/locallm/models/$MODEL_FILE \\n -p 7860:7860 \\n chatbot:service\n```\n\n### Interact with the app\n\nNow the service can be interacted with by going to `0.0.0.0:7860` in your browser.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/app.png)\n\n\nYou can also use the example [chatbot/ai_applications/ask.py](ask.py) to interact with the model-service in a terminal.\nIf the `--prompt` argument is left blank, it will default to \"Hello\".\n\n```bash\ncd chatbot/ai_applications\n\npython ask.py --prompt \n```\n\nOr, you can build the `ask.py` into a container image and run it alongside the model-service container, like so:\n\n```bash\ncd chatbot/ai_applications\npodman build -t chatbot -f builds/Containerfile .\npodman run --rm -d -it -p 8080:8080 chatbot # then interact with the application at 0.0.0.0:8080 in your browser\n```\n\n## Deploy on Openshift\n\nNow that we've developed an application locally that leverages an LLM, we'll want to share it with a wider audience.\nLet's get it off our machine and run it on OpenShift.\n\n### Rebuild for x86\n\nIf you are on a Mac, you'll need to rebuild the model-service image for the x86 architecture for most use case outside of Mac.\nSince this is an AI workload, you may also want to take advantage of Nvidia GPU's available outside our local machine.\nIf so, build the model-service with a base image that contains CUDA and builds llama.cpp specifically for a CUDA environment.\n\n```bash\ncd chatbot/model_services/cuda\npodman build --platform linux/amd64 -t chatbot:service-cuda -f cuda/Containerfile .\n```\n\nThe CUDA environment significantly increases the size of the container image.\nIf you are not utilizing a GPU to run this application, you can create an image\nwithout the CUDA layers for an x86 architecture machine with the following:\n\n```bash\ncd chatbot/model_services\npodman build --platform linux/amd64 -t chatbot:service-amd64 -f base/Containerfile .\n```\n\n### Push to Quay\n\nOnce you login to [quay.io](quay.io) you can push your own newly built version of this LLM application to your repository\nfor use by others.\n\n```bash\npodman login quay.io\n```\n\n```bash\npodman push localhost/chatbot:service-amd64 quay.io//\n```\n\n### Deploy\n\nNow that your model lives in a remote repository we can deploy it.\nGo to your OpenShift developer dashboard and select \"+Add\" to use the Openshift UI to deploy the application.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/add_image.png)\n\nSelect \"Container images\"\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/container_images.png)\n\nThen fill out the form on the Deploy page with your [quay.io](quay.io) image name and make sure to set the \"Target port\" to 7860.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/deploy.png)\n\nHit \"Create\" at the bottom and watch your application start.\n\nOnce the pods are up and the application is working, navigate to the \"Routes\" section and click on the link created for you\nto interact with your app.\n\n![](https://raw.githubusercontent.com/redhat-et/locallm/main/assets/app.png)", + "models": [ + "hf.TheBloke.llama-2-7b-chat.Q5_K_S", + "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M" + ] + }, + { + "id": "summarizer", + "description" : "Summarizer application", + "name" : "Summarizer", + "repository": "https://github.com/redhat-et/locallm", + "ref": "4ac7950", + "icon": "natural-language-processing", + "categories": [ + "natural-language-processing" + ], + "config": "summarizer-langchain/ai-studio.yaml", + "readme": "# Summarizer\n\nThis model service is intended be be used for text summarization tasks. This service can ingest an arbitrarily long text input. If the input length is less than the models maximum context window it will summarize the input directly. If the input is longer than the maximum context window, the input will be divided into appropriately sized chunks. Each chunk will be summarized and a final \"summary of summaries\" will be the services final output. ", + "models": [ + "hf.TheBloke.llama-2-7b-chat.Q5_K_S", + "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M" + ] + }, + { + "id": "codegeneration", + "description" : "Code Generation application", + "name" : "Code Generation", + "repository": "https://github.com/redhat-et/locallm", + "ref": "a1ee3db", + "icon": "natural-language-processing", + "categories": [ + "natural-language-processing" + ], + "config": "code-generation/ai-studio.yaml", + "readme": "# Code Generation\n\nThis example will deploy a local code-gen application using a llama.cpp model server and a python app built with langchain. \n\n### Download Model\n\n- **codellama**\n\n - Download URL: `wget https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf` \n\n```\n\ncd ../models\n\nwget \n\ncd ../\n\n```\n\n### Deploy Model Service\n\nTo start the model service, refer to [the playground model-service document](../playground/README.md). Deploy the LLM server and volumn mount the model of choice.\n\n```\n\npodman run --rm -it -d \\ \n\n -p 8001:8001 \\ \n\n -v Local/path/to/locallm/models:/locallm/models:ro,Z \\ \n\n -e MODEL_PATH=models/ \\ \n\n -e HOST=0.0.0.0 \\ \n\n -e PORT=8001 \\ \n\n playground:image\n\n```\n\n### Build Container Image\n\nOnce the model service is deployed, then follow the instruction below to build your container image and run it locally. \n\n- `podman build -t codegen-app code-generation -f code-generation/builds/Containerfile`\n\n- `podman run -it -p 8501:8501 codegen-app -- -m http://10.88.0.1:8001/v1` ", + "models": [ + "hf.TheBloke.llama-2-7b-chat.Q5_K_S", + "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M" + ] + } +] diff --git a/packages/backend/src/assets/categories-catalog.json b/packages/backend/src/assets/categories-catalog.json new file mode 100644 index 000000000..722d6ed20 --- /dev/null +++ b/packages/backend/src/assets/categories-catalog.json @@ -0,0 +1,22 @@ +[ + { + "id": "natural-language-processing", + "name": "Natural Language Processing", + "description" : "Models that work with text: classify, summarize, translate, or generate text." + }, + { + "id": "computer-vision", + "description" : "Process images, from classification to object detection and segmentation.", + "name" : "Computer Vision" + }, + { + "id": "audio", + "description" : "Recognize speech or classify audio with audio models.", + "name" : "Audio" + }, + { + "id": "multimodal", + "description" : "Stuff about multimodal models goes here omg yes amazing.", + "name" : "Multimodal" + } +] diff --git a/packages/backend/src/assets/models-catalog.json b/packages/backend/src/assets/models-catalog.json new file mode 100644 index 000000000..d1cd72aa9 --- /dev/null +++ b/packages/backend/src/assets/models-catalog.json @@ -0,0 +1,20 @@ +[ + { + "id": "hf.TheBloke.llama-2-7b-chat.Q5_K_S", + "name": "TheBloke/Llama-2-7B-Chat-GGUF", + "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥", + "hw": "CPU", + "registry": "Hugging Face", + "license": "?", + "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf" + }, + { + "id": "hf.TheBloke.mistral-7b-instruct-v0.1.Q4_K_M", + "name": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", + "description": "The Mistral-7B-Instruct-v0.1 Large Language Model (LLM) is a instruct fine-tuned version of the [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) generative text model using a variety of publicly available conversation datasets. For full details of this model please read our [release blog post](https://mistral.ai/news/announcing-mistral-7b/)", + "hw": "CPU", + "registry": "Hugging Face", + "license": "Apache-2.0", + "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf" + } +] diff --git a/packages/backend/src/managers/catalogManager.spec.ts b/packages/backend/src/managers/catalogManager.spec.ts deleted file mode 100644 index 5f9e0a0e7..000000000 --- a/packages/backend/src/managers/catalogManager.spec.ts +++ /dev/null @@ -1,142 +0,0 @@ -/********************************************************************** - * Copyright (C) 2024 Red Hat, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * SPDX-License-Identifier: Apache-2.0 - ***********************************************************************/ - -/* eslint-disable @typescript-eslint/no-explicit-any */ - -import { beforeEach, describe, expect, test, vi } from 'vitest'; -import content from '../ai-test.json'; -import userContent from '../ai-user-test.json'; -import { type Webview, EventEmitter } from '@podman-desktop/api'; -import { CatalogManager } from './catalogManager'; - -import * as fs from 'node:fs'; - -vi.mock('./ai.json', () => { - return { - default: content, - }; -}); - -vi.mock('node:fs', () => { - return { - existsSync: vi.fn(), - promises: { - readFile: vi.fn(), - }, - }; -}); - -const mocks = vi.hoisted(() => ({ - withProgressMock: vi.fn(), -})); - -vi.mock('@podman-desktop/api', async () => { - return { - EventEmitter: vi.fn(), - window: { - withProgress: mocks.withProgressMock, - }, - ProgressLocation: { - TASK_WIDGET: 'TASK_WIDGET', - }, - fs: { - createFileSystemWatcher: () => ({ - onDidCreate: vi.fn(), - onDidDelete: vi.fn(), - onDidChange: vi.fn(), - }), - }, - }; -}); - -let catalogManager: CatalogManager; - -beforeEach(async () => { - vi.resetAllMocks(); - - const appUserDirectory = '.'; - // Creating CatalogManager - catalogManager = new CatalogManager( - { - postMessage: vi.fn().mockResolvedValue(undefined), - } as unknown as Webview, - appUserDirectory, - ); - - vi.mock('node:fs'); - - const listeners: ((value: unknown) => void)[] = []; - - vi.mocked(EventEmitter).mockReturnValue({ - event: vi.fn().mockImplementation(callback => { - listeners.push(callback); - }), - fire: vi.fn().mockImplementation((content: unknown) => { - listeners.forEach(listener => listener(content)); - }), - } as unknown as EventEmitter); -}); - -describe('invalid user catalog', () => { - beforeEach(async () => { - vi.spyOn(fs.promises, 'readFile').mockResolvedValue('invalid json'); - catalogManager.init(); - }); - - test('expect correct model is returned with valid id', () => { - const model = catalogManager.getModelById('hf.TheBloke.llama-2-7b-chat.Q5_K_S'); - expect(model).toBeDefined(); - expect(model.name).toEqual('TheBloke/Llama-2-7B-Chat-GGUF'); - expect(model.registry).toEqual('Hugging Face'); - expect(model.url).toEqual( - 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf', - ); - }); - - test('expect error if id does not correspond to any model', () => { - expect(() => catalogManager.getModelById('unknown')).toThrowError('No model found having id unknown'); - }); -}); - -test('expect correct model is returned from default catalog with valid id when no user catalog exists', async () => { - vi.spyOn(fs, 'existsSync').mockReturnValue(false); - catalogManager.init(); - await vi.waitUntil(() => catalogManager.getRecipes().length > 0); - - const model = catalogManager.getModelById('hf.TheBloke.llama-2-7b-chat.Q5_K_S'); - expect(model).toBeDefined(); - expect(model.name).toEqual('TheBloke/Llama-2-7B-Chat-GGUF'); - expect(model.registry).toEqual('Hugging Face'); - expect(model.url).toEqual( - 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf', - ); -}); - -test('expect correct model is returned with valid id when the user catalog is valid', async () => { - vi.spyOn(fs, 'existsSync').mockReturnValue(true); - vi.spyOn(fs.promises, 'readFile').mockResolvedValue(JSON.stringify(userContent)); - - catalogManager.init(); - await vi.waitUntil(() => catalogManager.getRecipes().length > 0); - - const model = catalogManager.getModelById('model1'); - expect(model).toBeDefined(); - expect(model.name).toEqual('Model 1'); - expect(model.registry).toEqual('Hugging Face'); - expect(model.url).toEqual('https://model1.example.com'); -}); diff --git a/packages/backend/src/managers/catalogManager.ts b/packages/backend/src/managers/catalogManager.ts index 80cdf19ac..d12a1b058 100644 --- a/packages/backend/src/managers/catalogManager.ts +++ b/packages/backend/src/managers/catalogManager.ts @@ -17,63 +17,80 @@ ***********************************************************************/ import type { Catalog } from '@shared/src/models/ICatalog'; -import path from 'node:path'; -import defaultCatalog from '../ai.json'; import type { Recipe } from '@shared/src/models/IRecipe'; import type { ModelInfo } from '@shared/src/models/IModelInfo'; -import { MSG_NEW_CATALOG_STATE } from '@shared/Messages'; +import type { Category } from '@shared/src/models/ICategory'; import { type Disposable, type Webview } from '@podman-desktop/api'; -import { JsonWatcher } from '../utils/JsonWatcher'; -import { Publisher } from '../utils/Publisher'; - -export class CatalogManager extends Publisher implements Disposable { - private catalog: Catalog; - #disposables: Disposable[]; - - constructor( - webview: Webview, - private appUserDirectory: string, - ) { - super(webview, MSG_NEW_CATALOG_STATE, () => this.getCatalog()); - // We start with an empty catalog, for the methods to work before the catalog is loaded - this.catalog = { - categories: [], - models: [], - recipes: [], - }; +import { BaseCatalog } from './catalogs/BaseCatalog'; +import { MESSAGES } from '@shared/Messages'; +import path from 'node:path'; +import defaultModels from '../assets/models-catalog.json'; +import defaultApplications from '../assets/applications-catalog.json'; +import defaultCategories from '../assets/categories-catalog.json'; - this.#disposables = []; - } +/** + * @deprecated + */ +export class CatalogManager implements Disposable { + #modelCatalog: BaseCatalog; + #applicationCatalog: BaseCatalog; + #categoryCatalog: BaseCatalog; - init(): void { - // Creating a json watcher - const jsonWatcher: JsonWatcher = new JsonWatcher( - path.resolve(this.appUserDirectory, 'catalog.json'), - defaultCatalog, + constructor(webview: Webview, appUserDirectory: string) { + this.#modelCatalog = new BaseCatalog( + webview, + MESSAGES.UPDATE_MODEL_CATALOG, + path.resolve(appUserDirectory, 'models-catalog.json'), + defaultModels, + ); + this.#applicationCatalog = new BaseCatalog( + webview, + MESSAGES.UPDATE_APP_CATALOG, + path.resolve(appUserDirectory, 'applications-catalog.json'), + defaultApplications, + ); + this.#categoryCatalog = new BaseCatalog( + webview, + MESSAGES.UPDATE_CATEGORY_CATALOG, + path.resolve(appUserDirectory, 'categories-catalog.json'), + defaultCategories, ); - jsonWatcher.onContentUpdated(content => this.onCatalogUpdated(content)); - jsonWatcher.init(); - - this.#disposables.push(jsonWatcher); } - private onCatalogUpdated(content: Catalog): void { - this.catalog = content; - this.notify(); + /** + * @deprecated + */ + init(): void { + this.#modelCatalog.init(); + this.#applicationCatalog.init(); } dispose(): void { - this.#disposables.forEach(watcher => watcher.dispose()); + this.#modelCatalog.dispose(); + this.#applicationCatalog.dispose(); } + /** + * @deprecated + */ public getCatalog(): Catalog { - return this.catalog; + return { + recipes: this.#applicationCatalog.getAll(), + models: this.#modelCatalog.getAll(), + categories: this.#categoryCatalog.getAll(), + }; } + /** + * @deprecated + */ public getModels(): ModelInfo[] { - return this.catalog.models; + return this.#modelCatalog.getAll(); } + /** + * @deprecated + */ public getModelById(modelId: string): ModelInfo { const model = this.getModels().find(m => modelId === m.id); if (!model) { @@ -82,10 +99,16 @@ export class CatalogManager extends Publisher implements Disposable { return model; } + /** + * @deprecated + */ public getRecipes(): Recipe[] { - return this.catalog.recipes; + return this.#applicationCatalog.getAll(); } + /** + * @deprecated + */ public getRecipeById(recipeId: string): Recipe { const recipe = this.getRecipes().find(r => recipeId === r.id); if (!recipe) { diff --git a/packages/backend/src/managers/catalogs/BaseCatalog.spec.ts b/packages/backend/src/managers/catalogs/BaseCatalog.spec.ts new file mode 100644 index 000000000..f69ecbc74 --- /dev/null +++ b/packages/backend/src/managers/catalogs/BaseCatalog.spec.ts @@ -0,0 +1,81 @@ +import { beforeEach, expect, test, vi } from 'vitest'; +import modelsCatalogTest from '../../tests/models-catalog-test.json'; +import { existsSync, promises } from 'node:fs'; +import { BaseCatalog } from './BaseCatalog'; +import { EventEmitter, type Webview } from '@podman-desktop/api'; +import { MESSAGES } from '@shared/Messages'; + +vi.mock('@podman-desktop/api', async () => { + return { + EventEmitter: vi.fn(), + fs: { + createFileSystemWatcher: () => ({ + onDidCreate: vi.fn(), + onDidDelete: vi.fn(), + onDidChange: vi.fn(), + }), + }, + }; +}); + +vi.mock('node:fs', () => { + return { + existsSync: vi.fn(), + promises: { + readFile: vi.fn(), + }, + }; +}); + +const webviewMock = { + postMessage: vi.fn(), +} as unknown as Webview; + +beforeEach(async () => { + vi.resetAllMocks(); + + vi.mocked(webviewMock.postMessage).mockResolvedValue(undefined); + + // Mock EventEmitter + const listeners: ((value: unknown) => void)[] = []; + vi.mocked(EventEmitter).mockReturnValue({ + event: vi.fn().mockImplementation(callback => { + listeners.push(callback); + }), + fire: vi.fn().mockImplementation((content: unknown) => { + listeners.forEach(listener => listener(content)); + }), + } as unknown as EventEmitter); +}); + +test('models array should be empty when init not called', async () => { + vi.mocked(existsSync).mockReturnValue(false); + const catalog = new BaseCatalog(webviewMock, MESSAGES.UPDATE_MODEL_CATALOG, '.', []); + expect(catalog.getAll().length).toBe(0); +}); + +test('models array should not be empty when init called', async () => { + vi.mocked(existsSync).mockReturnValue(false); + const catalog = new BaseCatalog(webviewMock, MESSAGES.UPDATE_MODEL_CATALOG, '.', [{ + id: 'dummyId', + }]); + catalog.init(); + await vi.waitFor(() => { + expect(catalog.getAll().length).toBeGreaterThan(0); + }); + expect(webviewMock.postMessage).toHaveBeenCalledWith({ + id: MESSAGES.UPDATE_MODEL_CATALOG, + body: [{id: 'dummyId'}], + }); +}); + +test('models should contain test data', async () => { + vi.mocked(existsSync).mockReturnValue(true); + vi.mocked(promises.readFile).mockResolvedValue(JSON.stringify(modelsCatalogTest)); + const catalog = new BaseCatalog(webviewMock, MESSAGES.UPDATE_MODEL_CATALOG, '.', []); + catalog.init(); + await vi.waitFor(() => { + expect(catalog.getAll().length).toBeGreaterThan(0); + }); + expect(catalog.getAll().some(model => model.id === 'test-llama-2-7b-chat.Q5_K_S')).toBeTruthy(); +}); diff --git a/packages/backend/src/managers/catalogs/BaseCatalog.ts b/packages/backend/src/managers/catalogs/BaseCatalog.ts new file mode 100644 index 000000000..d22e8bb2b --- /dev/null +++ b/packages/backend/src/managers/catalogs/BaseCatalog.ts @@ -0,0 +1,67 @@ +/********************************************************************** + * Copyright (C) 2024 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ***********************************************************************/ +import { Publisher } from '../../utils/Publisher'; +import type { MESSAGES } from '@shared/Messages'; +import type { Disposable, Webview } from '@podman-desktop/api'; +import { JsonWatcher } from '../../utils/JsonWatcher'; + +export class BaseCatalog extends Publisher implements Disposable { + #items: Map; + #disposables: Disposable[]; + + constructor( + webview: Webview, + channel: MESSAGES, + private catalog: string, + private defaultItems: T[], + ) { + super(webview, channel, () => this.getAll()); + this.#items = new Map(); + this.#disposables = []; + } + + init(): void { + // Creating a json watcher + const jsonWatcher: JsonWatcher = new JsonWatcher( + this.catalog, + this.defaultItems, + ); + jsonWatcher.onContentUpdated(content => this.onCatalogUpdate(content)); + jsonWatcher.init(); + + this.#disposables.push(jsonWatcher); + } + + private onCatalogUpdate(items: T[]): void { + this.#items = new Map(items.map(item => [item.id, item])); + this.notify(); + } + + dispose(): void { + this.#items.clear(); + this.#disposables.forEach(watcher => watcher.dispose()); + } + + get(id: string): T | undefined { + return this.#items.get(id); + } + + getAll(): T[] { + return Array.from(this.#items.values()); + } +} diff --git a/packages/backend/src/models-catalog.json b/packages/backend/src/models-catalog.json new file mode 100644 index 000000000..20e2fa7cf --- /dev/null +++ b/packages/backend/src/models-catalog.json @@ -0,0 +1,672 @@ +[ + { + "_descriptorVersion": "0.0.1", + "datePublished": "2024-02-21T16:54:57.000Z", + "name": "Google's Gemma 2B Instruct", + "description": "** Requires LM Studio 0.2.15 or newer ** Gemma is a family of lightweight LLMs built from the same research and technology Google used to create the Gemini models. Gemma models are available in two sizes, 2 billion and 7 billion parameters. These models are trained on up to 6T tokens of primarily English web documents, mathematics, and code, using a transformer architecture with enhancements like Multi-Query Attention, RoPE Embeddings, GeGLU Activations, and advanced normalization techniques.", + "author": { + "name": "Google DeepMind", + "url": "https://deepmind.google", + "blurb": "We’re a team of scientists, engineers, ethicists and more, working to build the next generation of AI systems safely and responsibly." + }, + "numParameters": "2B", + "resources": { + "canonicalUrl": "https://huggingface.co/google/gemma-2b-it", + "paperUrl": "https://blog.google/technology/developers/gemma-open-models/", + "downloadUrl": "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF" + }, + "trainedFor": "chat", + "arch": "gemma", + "files": { + "highlighted": { + "economical": { + "name": "gemma-2b-it-q8_0.gguf" + } + }, + "all": [ + { + "name": "gemma-2b-it-q8_0.gguf", + "url": "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q8_0.gguf", + "sizeBytes": 2669351840, + "quantization": "Q8_0", + "format": "gguf", + "sha256checksum": "ec68b50d23469882716782da8b680402246356c3f984e9a3b9bcc5bc15273140", + "publisher": { + "name": "LM Studio", + "socialUrl": "https://twitter.com/LMStudioAI" + }, + "respository": "lmstudio-ai/gemma-2b-it-GGUF", + "repositoryUrl": "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-12-12T10:12:59", + "name": "Mistral 7B Instruct v0.2", + "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1. For full details of this model read MistralAI's blog post and paper.", + "author": { + "name": "Mistral AI", + "url": "https://mistral.ai/", + "blurb": "Mistral AI's mission is to spearhead the revolution of open models." + }, + "numParameters": "7B", + "resources": { + "canonicalUrl": "https://mistral.ai/news/la-plateforme/", + "paperUrl": "https://arxiv.org/abs/2310.06825", + "downloadUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF" + }, + "trainedFor": "chat", + "arch": "mistral", + "files": { + "highlighted": { + "economical": { + "name": "mistral-7b-instruct-v0.2.Q4_K_S.gguf" + }, + "most_capable": { + "name": "mistral-7b-instruct-v0.2.Q6_K.gguf" + } + }, + "all": [ + { + "name": "mistral-7b-instruct-v0.2.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_S.gguf", + "sizeBytes": 4140374304, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "1213e19b3e103932fdfdc82e3b6dee765f57ad5756e0f673e7d36514a5b60d0a", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF" + }, + { + "name": "mistral-7b-instruct-v0.2.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q6_K.gguf", + "sizeBytes": 5942065440, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "a4643671c92f47eb6027d0eff50b9875562e8e172128a4b10b2be250bb4264de", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-10-29T21:27:30", + "name": "OpenHermes 2.5 Mistral 7B", + "description": "OpenHermes 2.5 Mistral 7B is an advanced iteration of the OpenHermes 2 language model, enhanced by training on a significant proportion of code datasets. This additional training improved performance across several benchmarks, notably TruthfulQA, AGIEval, and the GPT4All suite, while slightly decreasing the BigBench score. Notably, the model's ability to handle code-related tasks, measured by the humaneval score, increased from 43% to 50.7%. The training data consisted of one million entries, primarily sourced from GPT-4 outputs and other high-quality open datasets. This data was rigorously filtered and standardized to the ShareGPT format and subsequently processed using ChatML by the axolotl tool.", + "author": { + "name": "Teknium", + "url": "https://twitter.com/Teknium1", + "blurb": "Creator of numerous chart topping fine-tunes and a Co-founder of NousResearch" + }, + "numParameters": "7B", + "resources": { + "canonicalUrl": "https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B", + "downloadUrl": "https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF" + }, + "trainedFor": "chat", + "arch": "mistral", + "files": { + "highlighted": { + "economical": { + "name": "openhermes-2.5-mistral-7b.Q4_K_S.gguf" + }, + "most_capable": { + "name": "openhermes-2.5-mistral-7b.Q6_K.gguf" + } + }, + "all": [ + { + "name": "openhermes-2.5-mistral-7b.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF/resolve/main/openhermes-2.5-mistral-7b.Q4_K_S.gguf", + "sizeBytes": 4140385024, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "5ae9c3c11ce520a2360dcfca1f4e38392dc0b7a49413ce6695857a5148a71d35", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF" + }, + { + "name": "openhermes-2.5-mistral-7b.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF/resolve/main/openhermes-2.5-mistral-7b.Q6_K.gguf", + "sizeBytes": 5942078272, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "cd4caa42229e973636e9d4c8db50a89593353c521e0342ca615756ded2b977a2", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-08-24T21:39:59", + "name": "CodeLlama 7B Instruct", + "description": "MetaAI has released Code Llama, a comprehensive family of large language models for code. These models are based on Llama 2 and exhibit state-of-the-art performance among openly available models. They offer advanced infilling capabilities, can accommodate large input contexts, and have the ability to follow instructions for programming tasks without prior training. There are various versions available to cater to a wide array of applications: foundation models (Code Llama), Python-specific models (Code Llama - Python), and models for following instructions (Code Llama - Instruct). These versions come with 7B, 13B, and 34B parameters respectively. All models are trained on 16k token sequences and show improvements even on inputs with up to 100k tokens. The 7B and 13B models of Code Llama and Code Llama - Instruct have the ability to infill based on surrounding content. In terms of performance, Code Llama has set new standards among open models on several code benchmarks, achieving scores of up to 53% on HumanEval and 55% on MBPP. Notably, the Python version of Code Llama 7B surpasses the performance of Llama 2 70B on HumanEval and MBPP. All of MetaAI's models outperform every other publicly available model on MultiPL-E. Code Llama has been released under a permissive license that enables both research and commercial use.", + "author": { + "name": "Meta AI", + "url": "https://ai.meta.com", + "blurb": "Pushing the boundaries of AI through research, infrastructure and product innovation." + }, + "numParameters": "7B", + "resources": { + "canonicalUrl": "https://ai.meta.com/blog/code-llama-large-language-model-coding/", + "paperUrl": "https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/", + "downloadUrl": "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF" + }, + "trainedFor": "chat", + "arch": "llama", + "files": { + "highlighted": { + "economical": { + "name": "codellama-7b-instruct.Q4_K_S.gguf" + }, + "most_capable": { + "name": "codellama-7b-instruct.Q6_K.gguf" + } + }, + "all": [ + { + "name": "codellama-7b-instruct.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_S.gguf", + "sizeBytes": 3856831168, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "2e44d2b7ae28bbe3a2ed698e259cbd3a6bf7fe8f9d351e14b2be17fb690d7f95", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/CodeLlama-7B-Instruct-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF" + }, + { + "name": "codellama-7b-instruct.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q6_K.gguf", + "sizeBytes": 5529302208, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "2f516cd9c16181832ffceaf94b13e8600d88c9bc8d7f75717d25d8c9cf9aa973", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/CodeLlama-7B-Instruct-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-10-26T11:25:50", + "name": "Zephyr 7B β", + "description": "The Zephyr-7B-β is the second model in the Zephyr series, designed to function as an assistant. It is a fine-tuned version of the mistralai/Mistral-7B-v0.1 model, leveraging a 7B parameter GPT-like architecture. The model has been trained on a combination of synthetic datasets and publicly available data using Direct Preference Optimization (DPO), a technique that improved its performance on the MT Bench. An important aspect to note is that the in-built alignment of the training datasets was deliberately omitted during the training process, a decision that, while enhancing the model's helpfulness, also makes it prone to generating potentially problematic outputs when prompted. Therefore, it is advised to use the model strictly for research and educational purposes. The model primarily supports the English language and is licensed under the MIT License. Additional details can be found in the associated technical report.", + "author": { + "name": "Hugging Face H4", + "url": "https://huggingface.co/HuggingFaceH4", + "blurb": "Hugging Face H4 team, focused on aligning language models to be helpful, honest, harmless, and huggy 🤗" + }, + "numParameters": "7B", + "resources": { + "canonicalUrl": "https://huggingface.co/HuggingFaceH4/zephyr-7b-beta", + "paperUrl": "https://arxiv.org/abs/2310.16944", + "downloadUrl": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF" + }, + "trainedFor": "chat", + "arch": "mistral", + "files": { + "highlighted": { + "economical": { + "name": "zephyr-7b-beta.Q4_K_S.gguf" + }, + "most_capable": { + "name": "zephyr-7b-beta.Q6_K.gguf" + } + }, + "all": [ + { + "name": "zephyr-7b-beta.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_S.gguf", + "sizeBytes": 4140373696, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "cafa0b85b2efc15ca33023f3b87f8d0c44ddcace16b3fb608280e0eb8f425cb1", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/zephyr-7B-beta-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF" + }, + { + "name": "zephyr-7b-beta.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf", + "sizeBytes": 5942064832, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "39b52e291eea6040de078283ee5316ff2a317e2b6f59be56724d9b29bada6cfe", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/zephyr-7B-beta-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-11-21T16:28:30", + "name": "StableLM Zephyr 3B", + "description": "StableLM Zephyr 3B is an English-language, auto-regressive language model with 3 billion parameters, developed by Stability AI. It's an instruction-tuned model influenced by HuggingFace's Zephyr 7B training approach and is built on transformer decoder architecture. It was trained using a mix of public and synthetic datasets, including SFT and Preference Datasets from the HuggingFace Hub with Direct Preference Optimization (DPO). Its performance has been evaluated using the MT Bench and Alpaca Benchmark, achieving a score of 6.64 and a win rate of 76% respectively. For fine-tuning, it utilizes the StabilityAI's stablelm-3b-4e1t model and is available under the StabilityAI Non-Commercial Research Community License. Commercial use requires contacting Stability AI for more information. The model was trained on a Stability AI cluster with 8 nodes, each equipped with 8 A100 80GB GPUs, using internal scripts for SFT steps and HuggingFace's Alignment Handbook scripts for DPO training.", + "author": { + "name": "Stability AI", + "url": "https://stability.ai/", + "blurb": "Stability AI is developing cutting-edge open AI models for Image, Language, Audio, Video, 3D and Biology." + }, + "numParameters": "3B", + "resources": { + "canonicalUrl": "https://huggingface.co/stabilityai/stablelm-zephyr-3b", + "downloadUrl": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF" + }, + "trainedFor": "chat", + "arch": "stablelm", + "files": { + "highlighted": { + "economical": { + "name": "stablelm-zephyr-3b.Q4_K_S.gguf" + }, + "most_capable": { + "name": "stablelm-zephyr-3b.Q6_K.gguf" + } + }, + "all": [ + { + "name": "stablelm-zephyr-3b.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q4_K_S.gguf", + "sizeBytes": 1620695488, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "748f9fa7b893df8383467c7f28affef3489e20f2da351441b0dd112c43ddb587", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/stablelm-zephyr-3b-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF" + }, + { + "name": "stablelm-zephyr-3b.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q6_K.gguf", + "sizeBytes": 2295985088, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "d51685399c77b1dfe2dafa53ac7e6272b414bbc529c0f3bf0bdd15f90559c049", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/stablelm-zephyr-3b-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2024-02-03T16:59:54.000Z", + "name": "Qwen 1.5", + "description": "Qwen1.5 is the large language model series developed by Qwen Team, Alibaba Group. It is a transformer-based decoder-only language model pretrained on large-scale multilingual data covering a wide range of domains and it is aligned with human preferences.", + "author": { + "name": "Qwen Team, Alibaba Group", + "url": "https://huggingface.co/Qwen", + "blurb": "Qwen (abbr. for Tongyi Qianwen 通义千问) refers to the large language model family built by Alibaba Cloud" + }, + "numParameters": "7B", + "resources": { + "canonicalUrl": "https://github.com/QwenLM/Qwen1.5", + "paperUrl": "https://qwenlm.github.io/blog/qwen1.5/", + "downloadUrl": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF" + }, + "trainedFor": "chat", + "arch": "qwen2", + "files": { + "highlighted": { + "most_capable": { + "name": "qwen1_5-7b-chat-q5_k_m.gguf" + } + }, + "all": [ + { + "name": "qwen1_5-7b-chat-q5_k_m.gguf", + "url": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF/resolve/main/qwen1_5-7b-chat-q5_k_m.gguf", + "sizeBytes": 5530664160, + "quantization": "Q5_K_M", + "format": "gguf", + "sha256checksum": "06ab8a96c4da98f2e692c8b376cf8e9d34a7365259ae7a78cbc4218b5a5b35ae", + "publisher": { + "name": "Qwen", + "socialUrl": "https://huggingface.co/Qwen" + }, + "respository": "Qwen/Qwen1.5-7B-Chat-GGUF", + "repositoryUrl": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-12-13T21:22:37", + "name": "Phi 2", + "description": "Phi-2 is a 2.7 billion parameter Transformer model, an extension of Phi-1.5, with additional training data including synthetic NLP texts and curated web content. It demonstrates near state-of-the-art performance in benchmarks for common sense, language understanding, and logical reasoning within its parameter class. Phi-2 has not undergone reinforcement learning fine-tuning and is open-source, aimed at enabling safety research like toxicity reduction and bias understanding. It is designed for QA, chat, and code formats and has a context length of 2048 tokens. The model was trained on 250 billion tokens from a dataset combining AOAI GPT-3.5 synthetic data and filtered web data, using 1.4 trillion training tokens. It utilized 96xA100-80G GPUs over a span of 14 days. Phi-2 is released under the MIT license.", + "author": { + "name": "Microsoft Research", + "url": "https://www.microsoft.com/en-us/research/", + "blurb": "Advancing science and technology to benefit humanity" + }, + "numParameters": "3B", + "resources": { + "canonicalUrl": "https://huggingface.co/microsoft/phi-2", + "paperUrl": "https://arxiv.org/abs/2309.05463", + "downloadUrl": "https://huggingface.co/TheBloke/phi-2-GGUF" + }, + "trainedFor": "chat", + "arch": "phi2", + "files": { + "highlighted": { + "economical": { + "name": "phi-2.Q4_K_S.gguf" + }, + "most_capable": { + "name": "phi-2.Q6_K.gguf" + } + }, + "all": [ + { + "name": "phi-2.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_S.gguf", + "sizeBytes": 1615568736, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "67df519f789817dee8c9b927227e7795ac07e1b20b58eb21fe109a2af328928a", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/phi-2-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/phi-2-GGUF" + }, + { + "name": "phi-2.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q6_K.gguf", + "sizeBytes": 2285059936, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "9a654a17bee234d85b726bbdaec8e9a3365bbc187238998bc4f84c89afb046d6", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/phi-2-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/phi-2-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-12-11T06:26:58", + "name": "NexusRaven-V2-13B", + "description": "NexusRaven-V2 accepts a list of python functions. These python functions can do anything (e.g. sending GET/POST requests to external APIs). The two requirements include the python function signature and the appropriate docstring to generate the function call. *** Follow NexusRaven's prompting guide found on the model's Hugging Face page. ***", + "author": { + "name": "Nexusflow", + "url": "https://nexusflow.ai", + "blurb": "Nexusflow is democratizing Cyber Intelligence with Generative AI, fully on top of open-source large language models (LLMs)" + }, + "numParameters": "13B", + "resources": { + "canonicalUrl": "https://huggingface.co/Nexusflow/NexusRaven-V2-13B", + "downloadUrl": "https://huggingface.co/TheBloke/NexusRaven-V2-13B-GGUF" + }, + "trainedFor": "other", + "arch": "llama", + "files": { + "highlighted": { + "economical": { + "name": "nexusraven-v2-13b.Q4_K_S.gguf" + }, + "most_capable": { + "name": "nexusraven-v2-13b.Q6_K.gguf" + } + }, + "all": [ + { + "name": "nexusraven-v2-13b.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/NexusRaven-V2-13B-GGUF/resolve/main/nexusraven-v2-13b.Q4_K_S.gguf", + "sizeBytes": 7414501952, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "bc2e1ce9fa064e675690d4c6f2c441d922f24241764241aa013d0ca8a87ecbfe", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/NexusRaven-V2-13B-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/NexusRaven-V2-13B-GGUF" + }, + { + "name": "nexusraven-v2-13b.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/NexusRaven-V2-13B-GGUF/resolve/main/nexusraven-v2-13b.Q6_K.gguf", + "sizeBytes": 10679342592, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "556ae244f4c69c603b7cda762d003d09f68058c671f304c2e011214ce754acb4", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/NexusRaven-V2-13B-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/NexusRaven-V2-13B-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-10-29T11:30:13", + "name": "Deepseek Coder", + "description": "Deepseek Coder is a collection of code language models with sizes ranging from 1B to 33B parameters, trained on a dataset comprising 2 trillion tokens (87% code, 13% natural language in English and Chinese). It is designed for project-level code completion and infilling, utilizing a 16K token window size and an additional fill-in-the-blank task. The models demonstrate leading performance on several programming benchmarks. The 6.7B parameter variant, deepseek-coder-6.7b-instruct, is fine-tuned on 2 billion tokens of instructional data. The code repository is MIT licensed, and the models support commercial use under the Model License.", + "author": { + "name": "DeepSeek", + "url": "https://huggingface.co/deepseek-ai", + "blurb": "DeepSeek (深度求索), founded in 2023, is a Chinese company dedicated to making AGI a reality" + }, + "numParameters": "6.7B", + "resources": { + "canonicalUrl": "https://github.com/deepseek-ai/deepseek-coder", + "downloadUrl": "https://huggingface.co/TheBloke/deepseek-coder-6.7B-instruct-GGUF" + }, + "trainedFor": "chat", + "arch": "llama", + "files": { + "highlighted": { + "economical": { + "name": "deepseek-coder-6.7b-instruct.Q4_K_S.gguf" + }, + "most_capable": { + "name": "deepseek-coder-6.7b-instruct.Q6_K.gguf" + } + }, + "all": [ + { + "name": "deepseek-coder-6.7b-instruct.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/deepseek-coder-6.7B-instruct-GGUF/resolve/main/deepseek-coder-6.7b-instruct.Q4_K_S.gguf", + "sizeBytes": 3858751712, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "d5d4b757645ce359a52d25584d29f1ff0d89580075edc35d87a20b89e65a5313", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/deepseek-coder-6.7B-instruct-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/deepseek-coder-6.7B-instruct-GGUF" + }, + { + "name": "deepseek-coder-6.7b-instruct.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/deepseek-coder-6.7B-instruct-GGUF/resolve/main/deepseek-coder-6.7b-instruct.Q6_K.gguf", + "sizeBytes": 5531476192, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "113fba500e4feb1313ce80d72cf381330b51460d265a7719bba626d6a461f9eb", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/deepseek-coder-6.7B-instruct-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/deepseek-coder-6.7B-instruct-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-08-27T18:17:14.000Z", + "name": "WizardCoder-Python-13B-V1.0-GGUF", + "description": "WizardCoder: Empowering Code Large Language Models with Evol-Instruct. To develop our WizardCoder model, we begin by adapting the Evol-Instruct method specifically for coding tasks. This involves tailoring the prompt to the domain of code-related instructions. Subsequently, we fine-tune the Code LLM, StarCoder, utilizing the newly created instruction-following training set.", + "author": { + "name": "WizardLM", + "url": "https://huggingface.co/WizardLM", + "blurb": "WizardLM: An Instruction-following LLM Using Evol-Instruct" + }, + "numParameters": "13B", + "resources": { + "canonicalUrl": "https://huggingface.co/WizardLM/WizardCoder-Python-13B-V1.0", + "downloadUrl": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF", + "paperUrl": "https://arxiv.org/abs/2306.08568" + }, + "trainedFor": "instruct", + "arch": "llama", + "files": { + "highlighted": { + "economical": { + "name": "wizardcoder-python-13b-v1.0.Q4_K_S.gguf" + }, + "most_capable": { + "name": "wizardcoder-python-13b-v1.0.Q6_K.gguf" + } + }, + "all": [ + { + "name": "wizardcoder-python-13b-v1.0.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q4_K_S.gguf", + "sizeBytes": 7414338464, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "828983ea69d9cb58a63243a803c79402323620b0fc320bf9df4e9be52cbc4a01", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/WizardCoder-Python-13B-V1.0-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF" + }, + { + "name": "wizardcoder-python-13b-v1.0.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q6_K.gguf", + "sizeBytes": 10679148768, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "a20f795d17d64e487b6b3446227ba2931bbcb3bc7bb7ebd652b9663efb1f090b", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/WizardCoder-Python-13B-V1.0-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF" + } + ] + } + }, + { + "_descriptorVersion": "0.0.1", + "datePublished": "2023-09-27T16:12:57", + "name": "Mistral 7B Instruct v0.1", + "description": "The Mistral-7B-Instruct-v0.1 is a Large Language Model (LLM) developed by Mistral AI. This LLM is an instruct fine-tuned version of a generative text model, leveraging a variety of publicly available conversation datasets. The model's architecture is based on a transformer model, featuring Grouped-Query Attention, Sliding-Window Attention, and a Byte-fallback BPE tokenizer. To utilize the instruction fine-tuning capabilities, prompts should be enclosed within [INST] and [/INST] tokens. The initial instruction should commence with a beginning-of-sentence id, whereas subsequent instructions should not. The generation process by the assistant will terminate with the end-of-sentence token id. For detailed information about this model, refer to the release blog posts by Mistral AI.", + "author": { + "name": "Mistral AI", + "url": "https://mistral.ai/", + "blurb": "Mistral AI's mission is to spearhead the revolution of open models." + }, + "numParameters": "7B", + "resources": { + "canonicalUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1", + "paperUrl": "https://mistral.ai/news/announcing-mistral-7b/", + "downloadUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF" + }, + "trainedFor": "chat", + "arch": "mistral", + "files": { + "highlighted": { + "economical": { + "name": "mistral-7b-instruct-v0.1.Q4_K_S.gguf" + }, + "most_capable": { + "name": "mistral-7b-instruct-v0.1.Q6_K.gguf" + } + }, + "all": [ + { + "name": "mistral-7b-instruct-v0.1.Q4_K_S.gguf", + "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf", + "sizeBytes": 4140373664, + "quantization": "Q4_K_S", + "format": "gguf", + "sha256checksum": "f1b7f1885029080be49aff49c83f87333449ef727089546e0d887e2f17f0d02e", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF" + }, + { + "name": "mistral-7b-instruct-v0.1.Q6_K.gguf", + "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q6_K.gguf", + "sizeBytes": 5942064800, + "quantization": "Q6_K", + "format": "gguf", + "sha256checksum": "dfb053cb8d5f56abde8f56899ffe0d23e1285a423df0b65ea3f3adbb263b22c2", + "publisher": { + "name": "TheBloke", + "socialUrl": "https://twitter.com/TheBlokeAI" + }, + "respository": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", + "repositoryUrl": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF" + } + ] + } + } +] diff --git a/packages/backend/src/tests/models-catalog-test.json b/packages/backend/src/tests/models-catalog-test.json new file mode 100644 index 000000000..430b424d9 --- /dev/null +++ b/packages/backend/src/tests/models-catalog-test.json @@ -0,0 +1,29 @@ +[ + { + "id": "test-llama-2-7b-chat.Q5_K_S", + "name": "Llama-2-7B-Chat-GGUF", + "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥", + "hw": "CPU", + "registry": "Hugging Face", + "license": "?", + "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf" + }, + { + "id": "albedobase-xl-1.3", + "name": "AlbedoBase XL 1.3", + "description": "Stable Diffusion XL has 6.6 billion parameters, which is about 6.6 times more than the SD v1.5 version. I believe that this is not just a number, but a number that can lead to a significant improvement in performance. It has been a while since we realized that the overall performance of SD v1.5 has improved beyond imagination thanks to the explosive contributions of our community. Therefore, I am working on completing this AlbedoBase XL model in order to optimally reproduce the performance improvement that occurred in v1.5 in this XL version as well. My goal is to directly test the performance of all Checkpoints and LoRAs that are publicly uploaded to Civitai, and merge only the resources that are judged to be optimal after passing through several filters. This will surpass the performance of image-generating AI of companies such as Midjourney. As of now, AlbedoBase XL v0.4 has merged exactly 55 selected checkpoints and 138 LoRAs.", + "hw": "CPU", + "registry": "Civital", + "license": "openrail++", + "url": "" + }, + { + "id": "sdxl-turbo", + "name": "SDXL Turbo", + "description": "SDXL Turbo achieves state-of-the-art performance with a new distillation technology, enabling single-step image generation with unprecedented quality, reducing the required step count from 50 to just one.", + "hw": "CPU", + "registry": "Hugging Face", + "license": "sai-c-community", + "url": "" + } +] diff --git a/packages/frontend/src/stores/catalog.ts b/packages/frontend/src/stores/catalog.ts index b9642815c..4fd787c55 100644 --- a/packages/frontend/src/stores/catalog.ts +++ b/packages/frontend/src/stores/catalog.ts @@ -18,7 +18,7 @@ import type { Readable } from 'svelte/store'; import { readable } from 'svelte/store'; -import { MSG_NEW_CATALOG_STATE } from '@shared/Messages'; +import { MESSAGES } from '@shared/Messages'; import { rpcBrowser, studioClient } from '/@/utils/client'; import type { Catalog } from '@shared/src/models/ICatalog'; @@ -29,14 +29,27 @@ const emptyCatalog = { }; export const catalog: Readable = readable(emptyCatalog, set => { - const sub = rpcBrowser.subscribe(MSG_NEW_CATALOG_STATE, msg => { - set(msg); + const Update = () => { + studioClient.getCatalog().then(state => { + set(state); + }); + }; + const subModels = rpcBrowser.subscribe(MESSAGES.UPDATE_MODEL_CATALOG, () => { + Update(); }); - // Initialize the store manually - studioClient.getCatalog().then(state => { - set(state); + const subApps = rpcBrowser.subscribe(MESSAGES.UPDATE_APP_CATALOG, () => { + Update(); + }); + const subCategories = rpcBrowser.subscribe(MESSAGES.UPDATE_CATEGORY_CATALOG, () => { + Update(); }); + + // Initialize the store manually + Update(); + return () => { - sub.unsubscribe(); + subModels.unsubscribe(); + subApps.unsubscribe(); + subCategories.unsubscribe(); }; }); diff --git a/packages/shared/Messages.ts b/packages/shared/Messages.ts index 0a4b61909..5be53fb4f 100644 --- a/packages/shared/Messages.ts +++ b/packages/shared/Messages.ts @@ -18,10 +18,14 @@ export const MSG_PLAYGROUNDS_STATE_UPDATE = 'playgrounds-state-update'; export const MSG_NEW_PLAYGROUND_QUERIES_STATE = 'new-playground-queries-state'; -export const MSG_NEW_CATALOG_STATE = 'new-catalog-state'; -export const MSG_NEW_RECIPE_STATE = 'new-recipe-state'; export const MSG_TASKS_UPDATE = 'tasks-update'; export const MSG_NEW_MODELS_STATE = 'new-models-state'; export const MSG_APPLICATIONS_STATE_UPDATE = 'applications-state-update'; export const MSG_LOCAL_REPOSITORY_UPDATE = 'local-repository-update'; +export enum MESSAGES { + UPDATE_MODEL_CATALOG = 'update-model-catalog', + UPDATE_CATEGORY_CATALOG = 'update-category-catalog', + UPDATE_APP_CATALOG = 'update-app-catalog', +} + diff --git a/packages/shared/src/models/IRecipe.ts b/packages/shared/src/models/IRecipe.ts index 6bad3848a..29e627d25 100644 --- a/packages/shared/src/models/IRecipe.ts +++ b/packages/shared/src/models/IRecipe.ts @@ -17,7 +17,7 @@ ***********************************************************************/ export interface Recipe { - id?: string; + id: string; name: string; categories: string[]; description: string;