Skip to content

Commit

Permalink
feat: basic vllm support for hf cached models
Browse files Browse the repository at this point in the history
Signed-off-by: axel7083 <[email protected]>
  • Loading branch information
axel7083 committed Dec 13, 2024
1 parent 1b8b482 commit 0af8659
Show file tree
Hide file tree
Showing 8 changed files with 247 additions and 15 deletions.
1 change: 1 addition & 0 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
},
"dependencies": {
"@huggingface/gguf": "^0.1.12",
"@huggingface/hub": "^0.21.0",
"express": "^4.21.2",
"express-openapi-validator": "^5.3.9",
"isomorphic-git": "^1.27.2",
Expand Down
3 changes: 3 additions & 0 deletions packages/backend/src/assets/inference-images.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@
"default": "ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat@sha256:20734e9d60f047d27e4c9cf6a3b663e0627d48bd06d0a73b968f9d81c82de2f1",
"cuda": "ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat-cuda@sha256:798acced911527254601d0e39a90c5a29ecad82755f28594bea9a587ea9e6043",
"vulkan": "ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat-vulkan@sha256:22e11661fe66ace7c30b419703305b803eb937da10e19c23cb6767f03578256c"
},
"vllm": {
"default": "quay.io/rh-ee-astefani/vllm:cpu-1734105797"
}
}
73 changes: 61 additions & 12 deletions packages/backend/src/managers/modelsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,14 @@ import { gguf } from '@huggingface/gguf';
import type { PodmanConnection } from './podmanConnection';
import { VMType } from '@shared/src/models/IPodman';
import type { ConfigurationRegistry } from '../registries/ConfigurationRegistry';
import { InferenceType } from '@shared/src/models/IInference';
import { scanCacheDir } from '@huggingface/hub';
import { basename, join } from 'node:path';

export class ModelsManager implements Disposable {
#models: Map<string, ModelInfo>;
#hfCache: Map<string, ModelInfo>;

#watcher?: podmanDesktopApi.FileSystemWatcher;
#disposables: Disposable[];

Expand All @@ -58,6 +63,7 @@ export class ModelsManager implements Disposable {
private configurationRegistry: ConfigurationRegistry,
) {
this.#models = new Map();
this.#hfCache = new Map();
this.#disposables = [];
}

Expand All @@ -72,6 +78,44 @@ export class ModelsManager implements Disposable {
this.loadLocalModels().catch((err: unknown) => {
console.error('Something went wrong while trying to load local models', err);
});

scanCacheDir()
.then(results => {
this.#hfCache.clear();
results.repos.forEach(repo => {
if (repo.revisions.length === 0) {
console.warn(`found hugging face cache repository ${repo.id} without any revision`);
return;
}

// ensure at least one safetensor is available
if (!repo.revisions[0].files.some(file => file.path.endsWith('.safetensors'))) {
console.warn(
`hugging face cache repository ${repo.id.name} do not contain any .safetensors file: ignoring`,
);
return;
}

const id = basename(repo.path);
this.#hfCache.set(id, {
id: id,
backend: InferenceType.VLLM,
file: {
file: repo.revisions[0].commitOid,
path: join(repo.path, 'snapshots'),
creation: repo.lastModifiedAt,
size: repo.size,
},
name: repo.id.name,
description: repo.id.name,
properties: {
origin: 'HF_CACHE',
},
});
});
this.notify();
})
.catch(console.error);
}

dispose(): void {
Expand All @@ -85,7 +129,7 @@ export class ModelsManager implements Disposable {
this.catalogManager.getModels().forEach(m => this.#models.set(m.id, m));
const reloadLocalModels = async (): Promise<void> => {
this.getLocalModelsFromDisk();
await this.sendModelsInfo();
this.notify();
};
if (this.#watcher === undefined) {
this.#watcher = apiFs.createFileSystemWatcher(this.modelsDir);
Expand All @@ -99,15 +143,17 @@ export class ModelsManager implements Disposable {
}

getModelsInfo(): ModelInfo[] {
return [...this.#models.values()];
return [...this.#models.values(), ...this.#hfCache.values()];
}

async sendModelsInfo(): Promise<void> {
notify(): void {
const models = this.getModelsInfo();
await this.webview.postMessage({
id: Messages.MSG_NEW_MODELS_STATE,
body: models,
});
this.webview
.postMessage({

Check failure on line 152 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

Unhandled error

TypeError: this.webview.postMessage is not a function ❯ ModelsManager.notify src/managers/modelsManager.ts:152:8 ❯ src/managers/modelsManager.ts:380:14 ❯ ModelsManager.onDownloadUploadEvent src/managers/modelsManager.ts:352:11 ❯ src/managers/modelsManager.ts:462:38 ❯ Timeout._onTimeout src/managers/modelsManager.spec.ts:836:9 ❯ listOnTimeout ../../node:internal/timers:581:17 ❯ processTimers ../../node:internal/timers:519:7 This error originated in "src/managers/modelsManager.spec.ts" test file. It doesn't mean the error was thrown inside the file itself, but while it was running. The latest test that might've caused the error is "multiple download request same model - second call before first completed". It might mean one of the following: - The error was thrown, while Vitest was running this test. - If the error occurred after the test had been completed, this was the last documented test before it was thrown.

Check failure on line 152 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

Unhandled error

TypeError: this.webview.postMessage is not a function ❯ ModelsManager.notify src/managers/modelsManager.ts:152:8 ❯ src/managers/modelsManager.ts:380:14 ❯ ModelsManager.onDownloadUploadEvent src/managers/modelsManager.ts:352:11 ❯ src/managers/modelsManager.ts:462:38 ❯ Timeout._onTimeout src/managers/modelsManager.spec.ts:836:9 ❯ listOnTimeout ../../node:internal/timers:581:17 ❯ processTimers ../../node:internal/timers:519:7 This error originated in "src/managers/modelsManager.spec.ts" test file. It doesn't mean the error was thrown inside the file itself, but while it was running. The latest test that might've caused the error is "multiple download request same model - second call before first completed". It might mean one of the following: - The error was thrown, while Vitest was running this test. - If the error occurred after the test had been completed, this was the last documented test before it was thrown.

Check failure on line 152 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

Unhandled error

TypeError: this.webview.postMessage is not a function ❯ ModelsManager.notify src/managers/modelsManager.ts:152:8 ❯ src/managers/modelsManager.ts:380:14 ❯ ModelsManager.onDownloadUploadEvent src/managers/modelsManager.ts:352:11 ❯ src/managers/modelsManager.ts:462:38 ❯ Timeout._onTimeout src/managers/modelsManager.spec.ts:836:9 ❯ listOnTimeout ../../node:internal/timers:581:17 ❯ processTimers ../../node:internal/timers:519:7 This error originated in "src/managers/modelsManager.spec.ts" test file. It doesn't mean the error was thrown inside the file itself, but while it was running. The latest test that might've caused the error is "multiple download request same model - second call before first completed". It might mean one of the following: - The error was thrown, while Vitest was running this test. - If the error occurred after the test had been completed, this was the last documented test before it was thrown.
id: Messages.MSG_NEW_MODELS_STATE,
body: models,
})

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > getModelsInfo should get models in local directory

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:215:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should return undefined Date and size when stat fail

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:308:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should skip folders containing tmp files

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:368:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > loadLocalModels should post a message with the message on disk and on catalog

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:410:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > deleteModel deletes the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:462:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > deleting models > deleteModel fails to delete the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:528:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

src/managers/modelsManager.spec.ts > deleting models > delete local model should call catalogManager

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:595:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > getModelsInfo should get models in local directory

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:215:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should return undefined Date and size when stat fail

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:308:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should skip folders containing tmp files

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:368:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > loadLocalModels should post a message with the message on disk and on catalog

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:410:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > deleteModel deletes the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:462:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > deleting models > deleteModel fails to delete the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:528:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

src/managers/modelsManager.spec.ts > deleting models > delete local model should call catalogManager

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:595:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > getModelsInfo should get models in local directory

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:215:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should return undefined Date and size when stat fail

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:308:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > getLocalModelsFromDisk should skip folders containing tmp files

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:368:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > loadLocalModels should post a message with the message on disk and on catalog

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:410:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > deleteModel deletes the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:462:17

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > deleting models > deleteModel fails to delete the model folder

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:528:19

Check failure on line 155 in packages/backend/src/managers/modelsManager.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-14

src/managers/modelsManager.spec.ts > deleting models > delete local model should call catalogManager

TypeError: Cannot read properties of undefined (reading 'catch') ❯ ModelsManager.notify src/managers/modelsManager.ts:155:8 ❯ reloadLocalModels src/managers/modelsManager.ts:132:12 ❯ ModelsManager.loadLocalModels src/managers/modelsManager.ts:142:11 ❯ src/managers/modelsManager.spec.ts:595:19
.catch(console.error);
}

getModelsDirectory(): string {
Expand Down Expand Up @@ -186,7 +232,7 @@ export class ModelsManager implements Disposable {
}

model.state = 'deleting';
await this.sendModelsInfo();
this.notify();
try {
await this.deleteRemoteModel(model);
let modelPath;
Expand Down Expand Up @@ -214,7 +260,7 @@ export class ModelsManager implements Disposable {
model.state = undefined;
this.getLocalModelsFromDisk();
} finally {
await this.sendModelsInfo();
this.notify();
}
}

Expand Down Expand Up @@ -331,9 +377,7 @@ export class ModelsManager implements Disposable {

// refresh model lists on event completion
this.getLocalModelsFromDisk();
this.sendModelsInfo().catch((err: unknown) => {
console.error('Something went wrong while sending models info.', err);
});
this.notify();

// cleanup downloader
this.#downloaders.delete(event.id);
Expand Down Expand Up @@ -433,6 +477,11 @@ export class ModelsManager implements Disposable {
return getLocalModelFile(model);
}

if (model.backend === InferenceType.VLLM) {
console.warn('Model upload for vllm is disabled');
return getLocalModelFile(model);
}

this.taskRegistry.createTask(`Copying model ${model.name} to ${connection.name}`, 'loading', {
...labels,
'model-uploading': model.id,
Expand Down
5 changes: 5 additions & 0 deletions packages/backend/src/studio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import { InstructlabApiImpl } from './instructlab-api-impl';
import { NavigationRegistry } from './registries/NavigationRegistry';
import { StudioAPI } from '@shared/src/StudioAPI';
import { InstructlabAPI } from '@shared/src/InstructlabAPI';
import { VLLM } from './workers/provider/VLLM';

export class Studio {
readonly #extensionContext: ExtensionContext;
Expand Down Expand Up @@ -260,6 +261,10 @@ export class Studio {
this.#inferenceProviderRegistry.register(new WhisperCpp(this.#taskRegistry, this.#podmanConnection)),
);

this.#extensionContext.subscriptions.push(
this.#inferenceProviderRegistry.register(new VLLM(this.#taskRegistry, this.#podmanConnection)),
);

/**
* The inference manager create, stop, manage Inference servers
*/
Expand Down
148 changes: 148 additions & 0 deletions packages/backend/src/workers/provider/VLLM.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/

import { InferenceProvider } from './InferenceProvider';
import type { TaskRegistry } from '../../registries/TaskRegistry';
import type { PodmanConnection } from '../../managers/podmanConnection';
import { type InferenceServer, InferenceType } from '@shared/src/models/IInference';
import type { InferenceServerConfig } from '@shared/src/models/InferenceServerConfig';
import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/api';
import * as images from '../../assets/inference-images.json';
import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils';
import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils';
import { basename, dirname } from 'node:path';
import { join as joinposix } from 'node:path/posix';
import { getLocalModelFile } from '../../utils/modelsUtils';

export class VLLM extends InferenceProvider {
constructor(
taskRegistry: TaskRegistry,
private podmanConnection: PodmanConnection,
) {
super(taskRegistry, InferenceType.VLLM, 'vllm');
}

dispose(): void {}

public enabled = (): boolean => true;

/**
* Here is an example
*
* podman run -it --rm
* -v C:\Users\axels\.cache\huggingface\hub\models--mistralai--Mistral-7B-v0.1:/cache/models--mistralai--Mistral-7B-v0.1
* -e HF_HUB_CACHE=/cache
* localhost/vllm-cpu-env:latest
* --model=/cache/models--mistralai--Mistral-7B-v0.1/snapshots/7231864981174d9bee8c7687c24c8344414eae6b
*
* @param config
*/
override async perform(config: InferenceServerConfig): Promise<InferenceServer> {
if (config.modelsInfo.length !== 1)
throw new Error(`only one model is supported, received ${config.modelsInfo.length}`);

const modelInfo = config.modelsInfo[0];
if (modelInfo.backend !== InferenceType.VLLM) {
throw new Error(`VLLM requires models with backend type ${InferenceType.VLLM} got ${modelInfo.backend}.`);
}

if (modelInfo.file === undefined) {
throw new Error('The model info file provided is undefined');
}

console.log('[VLLM]', config);
console.log('[VLLM] modelInfo.file', modelInfo.file);

const fullPath = getLocalModelFile(modelInfo);

// modelInfo.file.path must be under the form $(HF_HUB_CACHE)/<repo-type>--<repo-id>/snapshots/<commit-hash>
const parent = dirname(fullPath);
const commitHash = basename(fullPath);
const name = basename(parent);
if (name !== 'snapshots') throw new Error('you must provide snapshot path for vllm');
const modelCache = dirname(parent);

let connection: ContainerProviderConnection | undefined;
if (config.connection) {
connection = this.podmanConnection.getContainerProviderConnection(config.connection);
} else {
connection = this.podmanConnection.findRunningContainerProviderConnection();
}

if (!connection) throw new Error('no running connection could be found');

const labels: Record<string, string> = {
...config.labels,
[LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)),
};

const imageInfo = await this.pullImage(connection, config.image ?? images.vllm.default, labels);
// https://huggingface.co/docs/transformers/main/en/installation#offline-mode
// HF_HUB_OFFLINE in main
// TRANSFORMERS_OFFLINE for legacy
const envs: string[] = [`HF_HUB_CACHE=/cache`, 'TRANSFORMERS_OFFLINE=1', 'HF_HUB_OFFLINE=1'];

labels['api'] = `http://localhost:${config.port}/inference`;

const mounts: MountConfig = [
{
Target: `/cache/${modelInfo.id}`,
Source: modelCache,
Type: 'bind',
},
];

const containerInfo = await this.createContainer(
imageInfo.engineId,
{
Image: imageInfo.Id,
Detach: true,
Labels: labels,
HostConfig: {
AutoRemove: false,
Mounts: mounts,
PortBindings: {
'8000/tcp': [
{
HostPort: `${config.port}`,
},
],
},
SecurityOpt: [DISABLE_SELINUX_LABEL_SECURITY_OPTION],
},
Env: envs,
Cmd: [`--model=${joinposix('/cache', modelInfo.id, 'snapshots', commitHash)}`],
},
labels,
);

return {
models: [modelInfo],
status: 'running',
connection: {
port: config.port,
},
container: {
containerId: containerInfo.id,
engineId: containerInfo.engineId,
},
type: InferenceType.VLLM,
labels: labels,
};
}
}
15 changes: 12 additions & 3 deletions packages/frontend/src/lib/table/model/ModelColumnName.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@
import type { ModelInfo } from '@shared/src/models/IModelInfo';
import { router } from 'tinro';
export let object: ModelInfo;
interface Props {
object: ModelInfo;
}
let { object }: Props = $props();
let hf: boolean = $state(object.properties?.['origin'] === 'HF_CACHE');
function openDetails(): void {
router.goto(`/model/${object.id}`);
}
</script>

<button class="flex flex-col w-full" title={object.name} on:click={openDetails} aria-label="Open Model Details">
<button class="flex flex-col w-full" title={object.name} onclick={openDetails} aria-label="Open Model Details">
<div
class="text-[var(--pd-table-body-text-highlight)] overflow-hidden text-ellipsis w-full text-left"
aria-label="Model Name">
Expand All @@ -19,7 +25,10 @@ function openDetails(): void {
<span class="text-sm text-[var(--pd-table-body-text)]" aria-label="Model Info"
>{object.registry} - {object.license}</span>
{/if}
{#if !object.registry && !object.license && !object.url}
{#if hf}
<span class="text-sm text-[var(--pd-table-body-text)]" aria-label="Imported Model Info"
>Loaded from hugging face cache</span>
{:else if !object.registry && !object.license && !object.url}
<span class="text-sm text-[var(--pd-table-body-text)]" aria-label="Imported Model Info">Imported by User</span>
{/if}
</button>
1 change: 1 addition & 0 deletions packages/shared/src/models/IInference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export enum InferenceType {
LLAMA_CPP = 'llama-cpp',
WHISPER_CPP = 'whisper-cpp',
NONE = 'none',
VLLM = 'vllm',
}

export type InferenceServerStatus = 'stopped' | 'running' | 'deleting' | 'stopping' | 'error' | 'starting';
Expand Down
Loading

0 comments on commit 0af8659

Please sign in to comment.