diff --git a/PACKAGING-GUIDE.md b/PACKAGING-GUIDE.md index 296e13e5d..595aa0c4a 100644 --- a/PACKAGING-GUIDE.md +++ b/PACKAGING-GUIDE.md @@ -38,6 +38,7 @@ A model has the following attributes: - ```license```: the license under which the model is available - ```url```: the URL used to download the model - ```memory```: the memory footprint of the model in bytes, as computed by the workflow `.github/workflows/compute-model-sizes.yaml` +- ```sha256```: the SHA-256 checksum to be used to verify the downloaded model is identical to the original. It is optional and it must be HEX encoded #### Recipes diff --git a/packages/backend/src/assets/ai.json b/packages/backend/src/assets/ai.json index 3b1976276..c1696336f 100644 --- a/packages/backend/src/assets/ai.json +++ b/packages/backend/src/assets/ai.json @@ -123,7 +123,8 @@ "memory": 4080218931, "properties": { "chatFormat": "openchat" - } + }, + "sha256": "6adeaad8c048b35ea54562c55e454cc32c63118a32c7b8152cf706b290611487" }, { "id": "hf.instructlab.merlinite-7b-lab-GGUF", @@ -136,7 +137,8 @@ "memory": 4370129224, "properties": { "chatFormat": "openchat" - } + }, + "sha256": "9ca044d727db34750e1aeb04e3b18c3cf4a8c064a9ac96cf00448c506631d16c" }, { "id": "hf.TheBloke.mistral-7b-instruct-v0.2.Q4_K_M", @@ -146,7 +148,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf", - "memory": 4370129224 + "memory": 4370129224, + "sha256": "3e0039fd0273fcbebb49228943b17831aadd55cbcbf56f0af00499be2040ccf9" }, { "id": "hf.NousResearch.Hermes-2-Pro-Mistral-7B.Q4_K_M", @@ -156,7 +159,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf", - "memory": 4370129224 + "memory": 4370129224, + "sha256": "e1e4253b94e3c04c7b6544250f29ad864a56eb2126e61eb440991a8284453674" }, { "id": "hf.ibm.merlinite-7b-Q4_K_M", @@ -169,7 +173,8 @@ "memory": 4370129224, "properties": { "chatFormat": "openchat" - } + }, + "sha256": "94f3a16321c9604ca22e970f3b89931ae5b4bbfd4c5d996e2bb606c506590666" }, { "id": "hf.TheBloke.mistral-7b-codealpaca-lora.Q4_K_M", @@ -179,7 +184,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/TheBloke/Mistral-7B-codealpaca-lora-GGUF/resolve/main/mistral-7b-codealpaca-lora.Q4_K_M.gguf", - "memory": 4370129224 + "memory": 4370129224, + "sha256": "69c07f27f682ca8da59fcd8a981335876882a2577f0f9df51b49cf6b97fd470f" }, { "id": "hf.TheBloke.mistral-7b-code-16k-qlora.Q4_K_M", @@ -189,7 +195,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf", - "memory": 4370129224 + "memory": 4370129224, + "sha256": "0f3c9aced2de6caad52323fea5a92a22fba0b4efddb564fda7a3071e0614443f" }, { "id": "hf.froggeric.Cerebrum-1.0-7b-Q4_KS", @@ -202,7 +209,8 @@ "memory": 4144643441, "properties": { "chatFormat": "openchat" - } + }, + "sha256": "98861462a0a80e08704631df23ffee860bd5634551c48d069d4daa3c8931bc52" }, { "id": "hf.TheBloke.openchat-3.5-0106.Q4_K_M", @@ -212,7 +220,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf", - "memory": 4370129224 + "memory": 4370129224, + "sha256": "49190d4d039e6dea463e567ebce707eb001648f4ba01e43eb7fa88d9975fc0ce" }, { "id": "hf.TheBloke.mistral-7b-openorca.Q4_K_M", @@ -222,7 +231,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q4_K_M.gguf", - "memory": 4370129224 + "memory": 4370129224, + "sha256": "83967e58c10c25fbe9d358b6d9e9a8212ca8a292061110dcb68511d39133407b" }, { "id": "hf.MaziyarPanahi.phi-2.Q4_K_M", @@ -232,7 +242,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/MaziyarPanahi/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf", - "memory": 1739461755 + "memory": 1739461755, + "sha256": "013e0e421b70dc169adb0c0010171202371e907e5f648084e4ddc8ad9985127a" }, { "id": "hf.llmware.dragon-mistral-7b-q4_k_m", @@ -245,7 +256,8 @@ "memory": 4370129224, "properties": { "chatFormat": "openchat" - } + }, + "sha256": "1d8f463c4917480b770db5d7921f3d144471891c45a0d25ba3ab3dd753ec620f" }, { "id": "hf.MaziyarPanahi.MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M", @@ -255,7 +267,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/resolve/main/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf", - "memory": 7784628224 + "memory": 7784628224, + "sha256": "f5fcf04c77a5b69ae37791b48df90daa553e40b5a39efc9068258bedef373182" }, { "id": "hf.ggerganov.whisper.cpp", @@ -265,7 +278,8 @@ "registry": "Hugging Face", "license": "Apache-2.0", "url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin", - "memory": 487010000 + "memory": 487010000, + "sha256": "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b" }, { "id": "hf.facebook.detr-resnet-101", @@ -278,7 +292,8 @@ "memory": 242980000, "properties": { "name": "facebook/detr-resnet-101" - } + }, + "sha256": "0943b5a9085a95a0e3ecc1c99a7db0451ecb9d79f4dcb543b0939c1a12481a5d" } ], "categories": [ diff --git a/packages/backend/src/managers/modelsManager.spec.ts b/packages/backend/src/managers/modelsManager.spec.ts index eb566d17b..d7e9b3889 100644 --- a/packages/backend/src/managers/modelsManager.spec.ts +++ b/packages/backend/src/managers/modelsManager.spec.ts @@ -28,6 +28,7 @@ import type { ModelInfo } from '@shared/src/models/IModelInfo'; import * as utils from '../utils/utils'; import { TaskRegistry } from '../registries/TaskRegistry'; import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry'; +import * as sha from '../utils/sha'; const mocks = vi.hoisted(() => { return { @@ -731,6 +732,34 @@ describe('downloadModel', () => { state: 'success', }); }); + test('fail if model on disk has different sha of the expected value', async () => { + const manager = new ModelsManager( + 'appdir', + {} as Webview, + { + getModels(): ModelInfo[] { + return []; + }, + } as CatalogManager, + telemetryLogger, + taskRegistry, + cancellationTokenRegistryMock, + ); + vi.spyOn(taskRegistry, 'updateTask'); + vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(true); + vi.spyOn(manager, 'getLocalModelPath').mockReturnValue('path'); + vi.spyOn(sha, 'hasValidSha').mockResolvedValue(false); + await expect(() => + manager.requestDownloadModel({ + id: 'id', + url: 'url', + name: 'name', + sha256: 'sha', + } as ModelInfo), + ).rejects.toThrowError( + 'Model name is already present on disk at path but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.', + ); + }); test('multiple download request same model - second call after first completed', async () => { mocks.getDownloaderCompleter.mockReturnValue(true); diff --git a/packages/backend/src/managers/modelsManager.ts b/packages/backend/src/managers/modelsManager.ts index 1f5691582..00ee3ac41 100644 --- a/packages/backend/src/managers/modelsManager.ts +++ b/packages/backend/src/managers/modelsManager.ts @@ -33,6 +33,7 @@ import { Uploader } from '../utils/uploader'; import { deleteRemoteModel, getLocalModelFile, isModelUploaded } from '../utils/modelsUtils'; import { getFirstRunningMachineName } from '../utils/podman'; import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry'; +import { hasValidSha } from '../utils/sha'; export class ModelsManager implements Disposable { #modelsDir: string = ''; @@ -348,7 +349,7 @@ export class ModelsManager implements Disposable { const target = path.resolve(destDir, path.basename(model.url)); // Create a downloader - const downloader = new Downloader(model.url, target, abortSignal); + const downloader = new Downloader(model.url, target, model.sha256, abortSignal); this.#downloaders.set(model.id, downloader); @@ -356,6 +357,16 @@ export class ModelsManager implements Disposable { } private createDownloadTask(model: ModelInfo, labels?: { [key: string]: string }): Task { + // it may happen that the taskRegistry contains old entries representing an old failing download, we delete them as we are starting a new download + const failedPullingTaskIds = this.taskRegistry + .getTasksByLabels({ + 'model-pulling': model.id, + }) + .filter(t => t.state === 'error') + .map(t => t.id); + if (failedPullingTaskIds.length > 0) { + this.taskRegistry.deleteAll(failedPullingTaskIds); + } return this.taskRegistry.createTask(`Downloading model ${model.name}`, 'loading', { ...labels, 'model-pulling': model.id, @@ -365,12 +376,26 @@ export class ModelsManager implements Disposable { private async downloadModel(model: ModelInfo, task: Task): Promise { // Check if the model is already on disk. if (this.isModelOnDisk(model.id)) { - task.state = 'success'; task.name = `Model ${model.name} already present on disk`; + + const modelPath = this.getLocalModelPath(model.id); + if (model.sha256) { + const isValid = await hasValidSha(modelPath, model.sha256); + if (!isValid) { + task.state = 'error'; + task.error = `Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`; + this.taskRegistry.updateTask(task); // update task + throw new Error( + `Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`, + ); + } + } + + task.state = 'success'; this.taskRegistry.updateTask(task); // update task // return model path - return this.getLocalModelPath(model.id); + return modelPath; } const abortController = new AbortController(); diff --git a/packages/backend/src/utils/downloader.spec.ts b/packages/backend/src/utils/downloader.spec.ts index 2702ed622..146a38f62 100644 --- a/packages/backend/src/utils/downloader.spec.ts +++ b/packages/backend/src/utils/downloader.spec.ts @@ -96,8 +96,10 @@ test('perform download failed', async () => { const listenerMock = vi.fn(); downloader.onEvent(listenerMock); + const rejectSpy = vi.fn(); + // perform download logic (do not wait) - void downloader.perform('followUpId'); + downloader.perform('followUpId').catch((e: unknown) => rejectSpy(e)); // wait for listener to be registered await vi.waitFor(() => { @@ -122,6 +124,8 @@ test('perform download failed', async () => { status: 'error', }); expect(promises.rm).toHaveBeenCalledWith('dummyTarget.tmp'); + + expect(rejectSpy).toHaveBeenCalledWith('dummyError'); }); test('perform download successfully', async () => { diff --git a/packages/backend/src/utils/downloader.ts b/packages/backend/src/utils/downloader.ts index ffceaf85a..c9883a00f 100644 --- a/packages/backend/src/utils/downloader.ts +++ b/packages/backend/src/utils/downloader.ts @@ -18,6 +18,7 @@ import { getDurationSecondsSince } from './utils'; import { createWriteStream, promises } from 'node:fs'; +import crypto from 'node:crypto'; import https from 'node:https'; import { EventEmitter, type Event } from '@podman-desktop/api'; import type { CompletionEvent, ProgressEvent, BaseEvent } from '../models/baseEvent'; @@ -32,6 +33,7 @@ export class Downloader { constructor( private url: string, private target: string, + private sha256?: string, private abortSignal?: AbortSignal, ) {} @@ -39,7 +41,7 @@ export class Downloader { return this.target; } - async perform(id: string) { + async perform(id: string): Promise { this.requestedIdentifier = id; const startTime = performance.now(); @@ -66,6 +68,7 @@ export class Downloader { message: `Request cancelled: ${String(err)}.`, }); } + throw err; } finally { this.completed = true; } @@ -90,6 +93,10 @@ export class Downloader { let totalFileSize = 0; let progress = 0; let previousProgressValue = -1; + let checkSum: crypto.Hash; + if (this.sha256) { + checkSum = crypto.createHash('sha256'); + } https.get(url, { signal: this.abortSignal }, resp => { // Determine the total size @@ -113,6 +120,9 @@ export class Downloader { // On data resp.on('data', chunk => { + if (checkSum) { + checkSum.update(chunk); + } progress += chunk.length; const progressValue = (progress * 100) / totalFileSize; @@ -150,6 +160,19 @@ export class Downloader { return; } + if (checkSum) { + const actualSha = checkSum.digest('hex'); + if (this.sha256 !== actualSha) { + callback({ + error: `The file's security hash (SHA-256) does not match the expected value. The file may have been altered or corrupted during the download process`, + }); + promises.rm(tmpFile).catch((err: unknown) => { + console.error(`Something went wrong while trying to delete ${tmpFile}`, err); + }); + return; + } + } + // If everything is fine we simply rename the tmp file to the expected one promises .rename(tmpFile, this.target) diff --git a/packages/backend/src/utils/sha.spec.ts b/packages/backend/src/utils/sha.spec.ts new file mode 100644 index 000000000..a30ef9933 --- /dev/null +++ b/packages/backend/src/utils/sha.spec.ts @@ -0,0 +1,42 @@ +/********************************************************************** + * Copyright (C) 2024 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ***********************************************************************/ +import { beforeEach, expect, test, vi } from 'vitest'; +import { promises } from 'node:fs'; +import { hasValidSha } from './sha'; + +beforeEach(() => { + vi.resetAllMocks(); +}); + +test('return true if file has same hash of the expected one', () => { + vi.mock('node:fs'); + vi.spyOn(promises, 'readFile').mockImplementation(() => Promise.resolve(Buffer.from('test'))); + + // sha of test => 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 + const isValid = hasValidSha('file', '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08'); + expect(isValid).toBeTruthy(); +}); + +test('return false if file has different hash of the expected one', () => { + vi.mock('node:fs'); + vi.spyOn(promises, 'readFile').mockImplementation(() => Promise.resolve(Buffer.from('test'))); + + // sha of test => 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08 + const isValid = hasValidSha('file', 'fakeSha'); + expect(isValid).toBeTruthy(); +}); diff --git a/packages/backend/src/utils/sha.ts b/packages/backend/src/utils/sha.ts new file mode 100644 index 000000000..4b574262a --- /dev/null +++ b/packages/backend/src/utils/sha.ts @@ -0,0 +1,28 @@ +/********************************************************************** + * Copyright (C) 2024 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ***********************************************************************/ +import crypto from 'node:crypto'; +import { promises } from 'node:fs'; + +export async function hasValidSha(filePath: string, expectedSha: string): Promise { + const checkSum = crypto.createHash('sha256'); + const readStream = await promises.readFile(filePath); + + checkSum.update(readStream); + const actualSha = checkSum.digest('hex'); + return actualSha === expectedSha; +} diff --git a/packages/frontend/src/lib/progress/TaskItem.svelte b/packages/frontend/src/lib/progress/TaskItem.svelte index 43ab469c0..0bc1a99b7 100644 --- a/packages/frontend/src/lib/progress/TaskItem.svelte +++ b/packages/frontend/src/lib/progress/TaskItem.svelte @@ -16,7 +16,7 @@ const cancel = () => {
-
+
{#if task.state === 'success'} !model.file); $: importedModels = filteredModels.filter(model => !model.url); function filterModels(): void { - // Let's collect the models we do not want to show (loading, error). + // Let's collect the models we do not want to show (loading). const modelsId: string[] = pullingTasks.reduce((previousValue, currentValue) => { - if (currentValue.labels !== undefined) { + if (currentValue.labels !== undefined && currentValue.state !== 'error') { previousValue.push(currentValue.labels['model-pulling']); } return previousValue; @@ -73,7 +72,7 @@ onMount(() => { const modelIds = new Set(); pullingTasks = value.reduce((filtered: Task[], task: Task) => { if ( - task.state === 'loading' && + (task.state === 'loading' || task.state === 'error') && task.labels !== undefined && 'model-pulling' in task.labels && !modelIds.has(task.labels['model-pulling']) diff --git a/packages/shared/src/models/IModelInfo.ts b/packages/shared/src/models/IModelInfo.ts index ebcb3b09c..aee396ceb 100644 --- a/packages/shared/src/models/IModelInfo.ts +++ b/packages/shared/src/models/IModelInfo.ts @@ -32,6 +32,7 @@ export interface ModelInfo { properties?: { [key: string]: string; }; + sha256?: string; } export type ModelCheckerContext = 'inference' | 'recipe';