Skip to content

Commit

Permalink
feat: add checksum verification for downloaded models (#1078)
Browse files Browse the repository at this point in the history
* feat: add checksum verification for downloaded models

Signed-off-by: lstocchi <[email protected]>

* fix: fix lint

Signed-off-by: lstocchi <[email protected]>

* chore: update packaging-guide and rename sha to sha256

Signed-off-by: lstocchi <[email protected]>

* fix: remove unused property in Tooltip

Signed-off-by: lstocchi <[email protected]>

* chore: show error into downloading top area

Signed-off-by: lstocchi <[email protected]>

* chore: enhance doc

Signed-off-by: lstocchi <[email protected]>

---------

Signed-off-by: lstocchi <[email protected]>
  • Loading branch information
lstocchi authored May 16, 2024
1 parent 0ff14c7 commit e22af21
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 25 deletions.
1 change: 1 addition & 0 deletions PACKAGING-GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ A model has the following attributes:
- ```license```: the license under which the model is available
- ```url```: the URL used to download the model
- ```memory```: the memory footprint of the model in bytes, as computed by the workflow `.github/workflows/compute-model-sizes.yaml`
- ```sha256```: the SHA-256 checksum to be used to verify the downloaded model is identical to the original. It is optional and it must be HEX encoded

#### Recipes

Expand Down
45 changes: 30 additions & 15 deletions packages/backend/src/assets/ai.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@
"memory": 4080218931,
"properties": {
"chatFormat": "openchat"
}
},
"sha256": "6adeaad8c048b35ea54562c55e454cc32c63118a32c7b8152cf706b290611487"
},
{
"id": "hf.instructlab.merlinite-7b-lab-GGUF",
Expand All @@ -136,7 +137,8 @@
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
"sha256": "9ca044d727db34750e1aeb04e3b18c3cf4a8c064a9ac96cf00448c506631d16c"
},
{
"id": "hf.TheBloke.mistral-7b-instruct-v0.2.Q4_K_M",
Expand All @@ -146,7 +148,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha256": "3e0039fd0273fcbebb49228943b17831aadd55cbcbf56f0af00499be2040ccf9"
},
{
"id": "hf.NousResearch.Hermes-2-Pro-Mistral-7B.Q4_K_M",
Expand All @@ -156,7 +159,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha256": "e1e4253b94e3c04c7b6544250f29ad864a56eb2126e61eb440991a8284453674"
},
{
"id": "hf.ibm.merlinite-7b-Q4_K_M",
Expand All @@ -169,7 +173,8 @@
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
"sha256": "94f3a16321c9604ca22e970f3b89931ae5b4bbfd4c5d996e2bb606c506590666"
},
{
"id": "hf.TheBloke.mistral-7b-codealpaca-lora.Q4_K_M",
Expand All @@ -179,7 +184,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-codealpaca-lora-GGUF/resolve/main/mistral-7b-codealpaca-lora.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha256": "69c07f27f682ca8da59fcd8a981335876882a2577f0f9df51b49cf6b97fd470f"
},
{
"id": "hf.TheBloke.mistral-7b-code-16k-qlora.Q4_K_M",
Expand All @@ -189,7 +195,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha256": "0f3c9aced2de6caad52323fea5a92a22fba0b4efddb564fda7a3071e0614443f"
},
{
"id": "hf.froggeric.Cerebrum-1.0-7b-Q4_KS",
Expand All @@ -202,7 +209,8 @@
"memory": 4144643441,
"properties": {
"chatFormat": "openchat"
}
},
"sha256": "98861462a0a80e08704631df23ffee860bd5634551c48d069d4daa3c8931bc52"
},
{
"id": "hf.TheBloke.openchat-3.5-0106.Q4_K_M",
Expand All @@ -212,7 +220,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha256": "49190d4d039e6dea463e567ebce707eb001648f4ba01e43eb7fa88d9975fc0ce"
},
{
"id": "hf.TheBloke.mistral-7b-openorca.Q4_K_M",
Expand All @@ -222,7 +231,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha256": "83967e58c10c25fbe9d358b6d9e9a8212ca8a292061110dcb68511d39133407b"
},
{
"id": "hf.MaziyarPanahi.phi-2.Q4_K_M",
Expand All @@ -232,7 +242,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/MaziyarPanahi/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf",
"memory": 1739461755
"memory": 1739461755,
"sha256": "013e0e421b70dc169adb0c0010171202371e907e5f648084e4ddc8ad9985127a"
},
{
"id": "hf.llmware.dragon-mistral-7b-q4_k_m",
Expand All @@ -245,7 +256,8 @@
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
"sha256": "1d8f463c4917480b770db5d7921f3d144471891c45a0d25ba3ab3dd753ec620f"
},
{
"id": "hf.MaziyarPanahi.MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M",
Expand All @@ -255,7 +267,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/resolve/main/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf",
"memory": 7784628224
"memory": 7784628224,
"sha256": "f5fcf04c77a5b69ae37791b48df90daa553e40b5a39efc9068258bedef373182"
},
{
"id": "hf.ggerganov.whisper.cpp",
Expand All @@ -265,7 +278,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
"memory": 487010000
"memory": 487010000,
"sha256": "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b"
},
{
"id": "hf.facebook.detr-resnet-101",
Expand All @@ -278,7 +292,8 @@
"memory": 242980000,
"properties": {
"name": "facebook/detr-resnet-101"
}
},
"sha256": "0943b5a9085a95a0e3ecc1c99a7db0451ecb9d79f4dcb543b0939c1a12481a5d"
}
],
"categories": [
Expand Down
29 changes: 29 additions & 0 deletions packages/backend/src/managers/modelsManager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import type { ModelInfo } from '@shared/src/models/IModelInfo';
import * as utils from '../utils/utils';
import { TaskRegistry } from '../registries/TaskRegistry';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import * as sha from '../utils/sha';

const mocks = vi.hoisted(() => {
return {
Expand Down Expand Up @@ -731,6 +732,34 @@ describe('downloadModel', () => {
state: 'success',
});
});
test('fail if model on disk has different sha of the expected value', async () => {
const manager = new ModelsManager(
'appdir',
{} as Webview,
{
getModels(): ModelInfo[] {
return [];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
);
vi.spyOn(taskRegistry, 'updateTask');
vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(true);
vi.spyOn(manager, 'getLocalModelPath').mockReturnValue('path');
vi.spyOn(sha, 'hasValidSha').mockResolvedValue(false);
await expect(() =>
manager.requestDownloadModel({
id: 'id',
url: 'url',
name: 'name',
sha256: 'sha',
} as ModelInfo),
).rejects.toThrowError(
'Model name is already present on disk at path but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.',
);
});
test('multiple download request same model - second call after first completed', async () => {
mocks.getDownloaderCompleter.mockReturnValue(true);

Expand Down
31 changes: 28 additions & 3 deletions packages/backend/src/managers/modelsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import { Uploader } from '../utils/uploader';
import { deleteRemoteModel, getLocalModelFile, isModelUploaded } from '../utils/modelsUtils';
import { getFirstRunningMachineName } from '../utils/podman';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import { hasValidSha } from '../utils/sha';

export class ModelsManager implements Disposable {
#modelsDir: string = '';
Expand Down Expand Up @@ -348,14 +349,24 @@ export class ModelsManager implements Disposable {

const target = path.resolve(destDir, path.basename(model.url));
// Create a downloader
const downloader = new Downloader(model.url, target, abortSignal);
const downloader = new Downloader(model.url, target, model.sha256, abortSignal);

this.#downloaders.set(model.id, downloader);

return downloader;
}

private createDownloadTask(model: ModelInfo, labels?: { [key: string]: string }): Task {
// it may happen that the taskRegistry contains old entries representing an old failing download, we delete them as we are starting a new download
const failedPullingTaskIds = this.taskRegistry
.getTasksByLabels({
'model-pulling': model.id,
})
.filter(t => t.state === 'error')
.map(t => t.id);
if (failedPullingTaskIds.length > 0) {
this.taskRegistry.deleteAll(failedPullingTaskIds);
}
return this.taskRegistry.createTask(`Downloading model ${model.name}`, 'loading', {
...labels,
'model-pulling': model.id,
Expand All @@ -365,12 +376,26 @@ export class ModelsManager implements Disposable {
private async downloadModel(model: ModelInfo, task: Task): Promise<string> {
// Check if the model is already on disk.
if (this.isModelOnDisk(model.id)) {
task.state = 'success';
task.name = `Model ${model.name} already present on disk`;

const modelPath = this.getLocalModelPath(model.id);
if (model.sha256) {
const isValid = await hasValidSha(modelPath, model.sha256);
if (!isValid) {
task.state = 'error';
task.error = `Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`;
this.taskRegistry.updateTask(task); // update task
throw new Error(
`Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA-256) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`,
);
}
}

task.state = 'success';
this.taskRegistry.updateTask(task); // update task

// return model path
return this.getLocalModelPath(model.id);
return modelPath;
}

const abortController = new AbortController();
Expand Down
6 changes: 5 additions & 1 deletion packages/backend/src/utils/downloader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,10 @@ test('perform download failed', async () => {
const listenerMock = vi.fn();
downloader.onEvent(listenerMock);

const rejectSpy = vi.fn();

// perform download logic (do not wait)
void downloader.perform('followUpId');
downloader.perform('followUpId').catch((e: unknown) => rejectSpy(e));

// wait for listener to be registered
await vi.waitFor(() => {
Expand All @@ -122,6 +124,8 @@ test('perform download failed', async () => {
status: 'error',
});
expect(promises.rm).toHaveBeenCalledWith('dummyTarget.tmp');

expect(rejectSpy).toHaveBeenCalledWith('dummyError');
});

test('perform download successfully', async () => {
Expand Down
25 changes: 24 additions & 1 deletion packages/backend/src/utils/downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import { getDurationSecondsSince } from './utils';
import { createWriteStream, promises } from 'node:fs';
import crypto from 'node:crypto';
import https from 'node:https';
import { EventEmitter, type Event } from '@podman-desktop/api';
import type { CompletionEvent, ProgressEvent, BaseEvent } from '../models/baseEvent';
Expand All @@ -32,14 +33,15 @@ export class Downloader {
constructor(
private url: string,
private target: string,
private sha256?: string,
private abortSignal?: AbortSignal,
) {}

getTarget(): string {
return this.target;
}

async perform(id: string) {
async perform(id: string): Promise<void> {
this.requestedIdentifier = id;
const startTime = performance.now();

Expand All @@ -66,6 +68,7 @@ export class Downloader {
message: `Request cancelled: ${String(err)}.`,
});
}
throw err;
} finally {
this.completed = true;
}
Expand All @@ -90,6 +93,10 @@ export class Downloader {
let totalFileSize = 0;
let progress = 0;
let previousProgressValue = -1;
let checkSum: crypto.Hash;
if (this.sha256) {
checkSum = crypto.createHash('sha256');
}

https.get(url, { signal: this.abortSignal }, resp => {
// Determine the total size
Expand All @@ -113,6 +120,9 @@ export class Downloader {

// On data
resp.on('data', chunk => {
if (checkSum) {
checkSum.update(chunk);
}
progress += chunk.length;
const progressValue = (progress * 100) / totalFileSize;

Expand Down Expand Up @@ -150,6 +160,19 @@ export class Downloader {
return;
}

if (checkSum) {
const actualSha = checkSum.digest('hex');
if (this.sha256 !== actualSha) {
callback({
error: `The file's security hash (SHA-256) does not match the expected value. The file may have been altered or corrupted during the download process`,
});
promises.rm(tmpFile).catch((err: unknown) => {
console.error(`Something went wrong while trying to delete ${tmpFile}`, err);
});
return;
}
}

// If everything is fine we simply rename the tmp file to the expected one
promises
.rename(tmpFile, this.target)
Expand Down
Loading

0 comments on commit e22af21

Please sign in to comment.