Merge pull request #58 from projectatomic/i31

fix: separate models from recipes
containers · Jan 17, 2024 · 0f1feda · 0f1feda
2 parents 9705b2b + 7f5097e
commit 0f1feda
Show file tree

Hide file tree

Showing 9 changed files with 229 additions and 56 deletions.
diff --git a/packages/backend/src/ai-test.json b/packages/backend/src/ai-test.json
@@ -0,0 +1,75 @@
+{
+  "recipes": [
+    {
+      "id": "chatbot",
+      "description" : "Chat bot application",
+      "name" : "ChatBot",
+      "repository": "https://github.com/axel7083/locallm",
+      "icon": "natural-language-processing",
+      "categories": [
+        "natural-language-processing"
+      ],
+      "config": "chatbot/ai-studio.yaml",
+      "readme": "# Locallm\n\nThis repo contains artifacts that can be used to build and run LLM (Large Language Model) services locally on your Mac using podman. These containerized LLM services can be used to help developers quickly prototype new LLM based applications, without the need for relying on any other externally hosted services. Since they are already containerized, it also helps developers move from their prototype to production quicker.     \n\n## Current Locallm Services: \n\n* [Chatbot](#chatbot)\n* [Text Summarization](#text-summarization)\n* [Fine-tuning](#fine-tuning)\n\n### Chatbot\n\nA simple chatbot using the gradio UI. Learn how to build and run this model service here: [Chatbot](/chatbot/).\n\n### Text Summarization\n\nAn LLM app that can summarize arbitrarily long text inputs. Learn how to build and run this model service here: [Text Summarization](/summarizer/).\n\n### Fine Tuning \n\nThis application allows a user to select a model and a data set they'd like to fine-tune that model on. Once the application finishes, it outputs a new fine-tuned model for the user to apply to other LLM services. Learn how to build and run this model training job here: [Fine-tuning](/finetune/).\n\n## Architecture\n![](https://raw.githubusercontent.com/MichaelClifford/locallm/main/assets/arch.jpg)\n\nThe diagram above indicates the general architecture for each of the individual model services contained in this repo. The core code available here is the \"LLM Task Service\" and the \"API Server\", bundled together under `model_services`. With an appropriately chosen model downloaded onto your host,`model_services/builds` contains the Containerfiles required to build an ARM or an x86 (with CUDA) image depending on your need. These model services are intended to be light-weight and run with smaller hardware footprints (given the Locallm name), but they can be run on any hardware that supports containers and scaled up if needed.\n\nWe also provide demo \"AI Applications\" under `ai_applications` for each model service to provide an example of how a developers could interact with the model service for their own needs. ",
+      "models": [
+        "llama-2-7b-chat.Q5_K_S",
+        "albedobase-xl-1.3",
+        "sdxl-turbo"
+      ]
+    }
+  ],
+  "models": [
+    {
+      "id": "llama-2-7b-chat.Q5_K_S",
+      "name": "Llama-2-7B-Chat-GGUF",
+      "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥",
+      "hw": "CPU",
+      "registry": "Hugging Face",
+      "popularity": 3,
+      "license": "?",
+      "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf"
+    },
+    {
+      "id": "albedobase-xl-1.3",
+      "name": "AlbedoBase XL 1.3",
+      "description": "Stable Diffusion XL has 6.6 billion parameters, which is about 6.6 times more than the SD v1.5 version. I believe that this is not just a number, but a number that can lead to a significant improvement in performance. It has been a while since we realized that the overall performance of SD v1.5 has improved beyond imagination thanks to the explosive contributions of our community. Therefore, I am working on completing this AlbedoBase XL model in order to optimally reproduce the performance improvement that occurred in v1.5 in this XL version as well. My goal is to directly test the performance of all Checkpoints and LoRAs that are publicly uploaded to Civitai, and merge only the resources that are judged to be optimal after passing through several filters. This will surpass the performance of image-generating AI of companies such as Midjourney. As of now, AlbedoBase XL v0.4 has merged exactly 55 selected checkpoints and 138 LoRAs.",
+      "hw": "CPU",
+      "registry": "Civital",
+      "popularity": 3,
+      "license": "openrail++",
+      "url": ""
+    },
+    {
+      "id": "sdxl-turbo",
+      "name": "SDXL Turbo",
+      "description": "SDXL Turbo achieves state-of-the-art performance with a new distillation technology, enabling single-step image generation with unprecedented quality, reducing the required step count from 50 to just one.",
+      "hw": "CPU",
+      "registry": "Hugging Face",
+      "popularity": 3,
+      "license": "sai-c-community",
+      "url": ""
+    }
+  ],
+  "categories": [
+    {
+      "id": "natural-language-processing",
+      "name": "Natural Language Processing",
+      "description" : "Models that work with text: classify, summarize, translate, or generate text."
+    },
+    {
+      "id": "computer-vision",
+      "description" : "Process images, from classification to object detection and segmentation.",
+      "name" : "Computer Vision"
+    },
+    {
+      "id": "audio",
+      "description" : "Recognize speech or classify audio with audio models.",
+      "name" : "Audio"
+    },
+    {
+      "id": "multimodal",
+      "description" : "Stuff about multimodal models goes here omg yes amazing.",
+      "name" : "Multimodal"
+    }
+  ]
+}
diff --git a/packages/backend/src/ai.json b/packages/backend/src/ai.json
@@ -12,39 +12,44 @@
       "config": "chatbot/ai-studio.yaml",
       "readme": "# Locallm\n\nThis repo contains artifacts that can be used to build and run LLM (Large Language Model) services locally on your Mac using podman. These containerized LLM services can be used to help developers quickly prototype new LLM based applications, without the need for relying on any other externally hosted services. Since they are already containerized, it also helps developers move from their prototype to production quicker.     \n\n## Current Locallm Services: \n\n* [Chatbot](#chatbot)\n* [Text Summarization](#text-summarization)\n* [Fine-tuning](#fine-tuning)\n\n### Chatbot\n\nA simple chatbot using the gradio UI. Learn how to build and run this model service here: [Chatbot](/chatbot/).\n\n### Text Summarization\n\nAn LLM app that can summarize arbitrarily long text inputs. Learn how to build and run this model service here: [Text Summarization](/summarizer/).\n\n### Fine Tuning \n\nThis application allows a user to select a model and a data set they'd like to fine-tune that model on. Once the application finishes, it outputs a new fine-tuned model for the user to apply to other LLM services. Learn how to build and run this model training job here: [Fine-tuning](/finetune/).\n\n## Architecture\n![](https://raw.githubusercontent.com/MichaelClifford/locallm/main/assets/arch.jpg)\n\nThe diagram above indicates the general architecture for each of the individual model services contained in this repo. The core code available here is the \"LLM Task Service\" and the \"API Server\", bundled together under `model_services`. With an appropriately chosen model downloaded onto your host,`model_services/builds` contains the Containerfiles required to build an ARM or an x86 (with CUDA) image depending on your need. These model services are intended to be light-weight and run with smaller hardware footprints (given the Locallm name), but they can be run on any hardware that supports containers and scaled up if needed.\n\nWe also provide demo \"AI Applications\" under `ai_applications` for each model service to provide an example of how a developers could interact with the model service for their own needs. ",
       "models": [
-        {
-          "id": "llama-2-7b-chat.Q5_K_S",
-          "name": "Llama-2-7B-Chat-GGUF",
-          "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥",
-          "hw": "CPU",
-          "registry": "Hugging Face",
-          "popularity": 3,
-          "license": "?",
-          "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf"
-        },
-        {
-          "id": "albedobase-xl-1.3",
-          "name": "AlbedoBase XL 1.3",
-          "description": "Stable Diffusion XL has 6.6 billion parameters, which is about 6.6 times more than the SD v1.5 version. I believe that this is not just a number, but a number that can lead to a significant improvement in performance. It has been a while since we realized that the overall performance of SD v1.5 has improved beyond imagination thanks to the explosive contributions of our community. Therefore, I am working on completing this AlbedoBase XL model in order to optimally reproduce the performance improvement that occurred in v1.5 in this XL version as well. My goal is to directly test the performance of all Checkpoints and LoRAs that are publicly uploaded to Civitai, and merge only the resources that are judged to be optimal after passing through several filters. This will surpass the performance of image-generating AI of companies such as Midjourney. As of now, AlbedoBase XL v0.4 has merged exactly 55 selected checkpoints and 138 LoRAs.",
-          "hw": "CPU",
-          "registry": "Civital",
-          "popularity": 3,
-          "license": "openrail++",
-          "url": ""
-        },
-        {
-          "id": "sdxl-turbo",
-          "name": "SDXL Turbo",
-          "description": "SDXL Turbo achieves state-of-the-art performance with a new distillation technology, enabling single-step image generation with unprecedented quality, reducing the required step count from 50 to just one.",
-          "hw": "CPU",
-          "registry": "Hugging Face",
-          "popularity": 3,
-          "license": "sai-c-community",
-          "url": ""
-        }
+        "llama-2-7b-chat.Q5_K_S",
+        "albedobase-xl-1.3",
+        "sdxl-turbo"
       ]
     }
   ],
+  "models": [
+    {
+      "id": "llama-2-7b-chat.Q5_K_S",
+      "name": "Llama-2-7B-Chat-GGUF",
+      "description": "Llama 2 is a family of state-of-the-art open-access large language models released by Meta today, and we’re excited to fully support the launch with comprehensive integration in Hugging Face. Llama 2 is being released with a very permissive community license and is available for commercial use. The code, pretrained models, and fine-tuned models are all being released today 🔥",
+      "hw": "CPU",
+      "registry": "Hugging Face",
+      "popularity": 3,
+      "license": "?",
+      "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf"
+    },
+    {
+      "id": "albedobase-xl-1.3",
+      "name": "AlbedoBase XL 1.3",
+      "description": "Stable Diffusion XL has 6.6 billion parameters, which is about 6.6 times more than the SD v1.5 version. I believe that this is not just a number, but a number that can lead to a significant improvement in performance. It has been a while since we realized that the overall performance of SD v1.5 has improved beyond imagination thanks to the explosive contributions of our community. Therefore, I am working on completing this AlbedoBase XL model in order to optimally reproduce the performance improvement that occurred in v1.5 in this XL version as well. My goal is to directly test the performance of all Checkpoints and LoRAs that are publicly uploaded to Civitai, and merge only the resources that are judged to be optimal after passing through several filters. This will surpass the performance of image-generating AI of companies such as Midjourney. As of now, AlbedoBase XL v0.4 has merged exactly 55 selected checkpoints and 138 LoRAs.",
+      "hw": "CPU",
+      "registry": "Civital",
+      "popularity": 3,
+      "license": "openrail++",
+      "url": ""
+    },
+    {
+      "id": "sdxl-turbo",
+      "name": "SDXL Turbo",
+      "description": "SDXL Turbo achieves state-of-the-art performance with a new distillation technology, enabling single-step image generation with unprecedented quality, reducing the required step count from 50 to just one.",
+      "hw": "CPU",
+      "registry": "Hugging Face",
+      "popularity": 3,
+      "license": "sai-c-community",
+      "url": ""
+    }
+  ],
   "categories": [
     {
       "id": "natural-language-processing",

diff --git a/packages/backend/src/managers/applicationManager.ts b/packages/backend/src/managers/applicationManager.ts
@@ -12,6 +12,7 @@ import { Task } from '@shared/models/ITask';
 import { RecipeStatusUtils } from '../utils/recipeStatusUtils';
 import { getParentDirectory } from '../utils/pathUtils';
 import type { LocalModelInfo } from '@shared/models/ILocalModelInfo';
+import { ModelInfo } from '@shared/models/IModelInfo';
 
 // TODO: Need to be configured
 export const AI_STUDIO_FOLDER = path.join('podman-desktop', 'ai-studio');
@@ -29,7 +30,7 @@ export class ApplicationManager {
     this.homeDirectory = os.homedir();
   }
 
-  async pullApplication(recipe: Recipe) {
+  async pullApplication(recipe: Recipe, model: ModelInfo) {
     // Create a TaskUtils object to help us
     const taskUtil = new RecipeStatusUtils(recipe.id, this.recipeStatusRegistry);
 
@@ -120,20 +121,18 @@ export class ApplicationManager {
     const filteredContainers = aiConfig.application.containers
       .filter((container) => container.arch === undefined || container.arch === arch())
 
-    // Download first model available (if exist)
-    if(recipe.models && recipe.models.length > 0) {
-      const model = recipe.models[0];
-      taskUtil.setTask({
-        id: model.id,
-        state: 'loading',
-        name: `Downloading model ${model.name}`,
-        labels: {
-          "model-pulling": model.id,
-        }
-      });
+    // Download model
+    taskUtil.setTask({
+      id: model.id,
+      state: 'loading',
+      name: `Downloading model ${model.name}`,
+      labels: {
+        "model-pulling": model.id,
+      }
+    });
 
-      await this.downloadModelMain(model.id, model.url, taskUtil)
-    }
+    await this.downloadModelMain(model.id, model.url, taskUtil)
+
 
     filteredContainers.forEach((container) => {
       taskUtil.setTask({

diff --git a/packages/backend/src/studio-api-impl.spec.ts b/packages/backend/src/studio-api-impl.spec.ts
@@ -0,0 +1,78 @@
+/**********************************************************************
+ * Copyright (C) 2024 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ***********************************************************************/
+
+/* eslint-disable @typescript-eslint/no-explicit-any */
+
+import { expect, test, vi } from 'vitest';
+import content from './ai-test.json';
+import { ApplicationManager } from './managers/applicationManager';
+import { RecipeStatusRegistry } from './registries/RecipeStatusRegistry';
+import { StudioApiImpl } from './studio-api-impl';
+import { PlayGroundManager } from './playground';
+import { TaskRegistry } from './registries/TaskRegistry';
+
+vi.mock('./ai.json', () => {
+    return {
+        default: content
+    };
+});
+
+const studioApiImpl = new StudioApiImpl(
+    {} as unknown as ApplicationManager,
+    {} as unknown as RecipeStatusRegistry,
+    {} as unknown as TaskRegistry,
+    {} as unknown as PlayGroundManager,
+)
+
+test('expect correct model is returned with valid id', async () => {    
+    const model = await studioApiImpl.getModelById('llama-2-7b-chat.Q5_K_S');
+    expect(model).toBeDefined();
+    expect(model.name).toEqual('Llama-2-7B-Chat-GGUF');
+    expect(model.registry).toEqual('Hugging Face');
+    expect(model.url).toEqual('https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf')
+});
+
+test('expect error if id does not correspond to any model', async () => {
+    await expect(() => studioApiImpl.getModelById('unknown')).rejects.toThrowError(
+        'No model found having id unknown',
+    );
+})
+
+test('expect array of models based on list of ids', async () => {    
+    const models = await studioApiImpl.getModelsByIds(['llama-2-7b-chat.Q5_K_S', 'albedobase-xl-1.3']);
+    expect(models).toBeDefined();
+    expect(models.length).toBe(2);
+    expect(models[0].name).toEqual('Llama-2-7B-Chat-GGUF');
+    expect(models[0].registry).toEqual('Hugging Face');
+    expect(models[0].url).toEqual('https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf');
+    expect(models[1].name).toEqual('AlbedoBase XL 1.3');
+    expect(models[1].registry).toEqual('Civital');
+    expect(models[1].url).toEqual('');
+});
+
+test('expect empty array if input list is empty', async () => {
+    const models = await studioApiImpl.getModelsByIds([]);
+    expect(models).toBeDefined();
+    expect(models.length).toBe(0);
+});
+
+test('expect empty array if input list has ids that are not in the catalog', async () => {
+    const models = await studioApiImpl.getModelsByIds(['1', '2']);
+    expect(models).toBeDefined();
+    expect(models.length).toBe(0);
+});
diff --git a/packages/backend/src/studio-api-impl.ts b/packages/backend/src/studio-api-impl.ts
@@ -8,7 +8,6 @@ import { RecipeStatus } from '@shared/models/IRecipeStatus';
 import { ModelInfo } from '@shared/models/IModelInfo';
 import { TaskRegistry } from './registries/TaskRegistry';
 import { Task } from '@shared/models/ITask';
-import { Studio } from './studio';
 import * as path from 'node:path';
 import { ModelResponse } from '@shared/models/IModelResponse';
 import { PlayGroundManager } from './playground';
@@ -59,10 +58,15 @@ export class StudioApiImpl implements StudioAPI {
   }
 
   async getModelById(modelId: string): Promise<ModelInfo> {
-    const model = content.recipes.flatMap(r => (r.models as ModelInfo[]).filter(m => modelId === m.id));
-    if (model.length === 1) return model[0];
-    if (model.length === 0) throw new Error('Not found');
-    throw new Error('several models with same id');
+    const model = content.models.find(m => modelId === m.id);
+    if (!model) {
+      throw new Error(`No model found having id ${modelId}`);
+    }
+    return model;
+  }
+
+  async getModelsByIds(ids: string[]): Promise<ModelInfo[]> {
+    return content.models.filter(m => ids.includes(m.id)) ?? [];
   }
 
   async searchRecipes(query: string): Promise<Recipe[]> {
@@ -74,9 +78,13 @@ export class StudioApiImpl implements StudioAPI {
     const recipe: Recipe = await this.getRecipeById(recipeId);
     console.log('StudioApiImpl recipe', recipe);
 
+    // the user should have selected one model, we use the first one for the moment
+    const modelId = recipe.models[0];
+    const model = await this.getModelById(modelId);
+
     // Do not wait for the pull application, run it separately
     new Promise(() => {
-      this.applicationManager.pullApplication(recipe);
+      this.applicationManager.pullApplication(recipe, model);
     });
 
     return Promise.resolve(undefined);
@@ -85,7 +93,7 @@ export class StudioApiImpl implements StudioAPI {
   async getLocalModels(): Promise<ModelInfo[]> {
     const local = this.applicationManager.getLocalModels();
     const localIds = local.map(l => l.id);
-    return content.recipes.flatMap(r => r.models.filter(m => localIds.includes(m.id)));
+    return content.models.filter(m => localIds.includes(m.id));
   }
 
   async getTasksByLabel(label: string): Promise<Task[]> {