Skip to content

Commit

Permalink
feat: add checksum verification for downloaded models
Browse files Browse the repository at this point in the history
Signed-off-by: lstocchi <[email protected]>
  • Loading branch information
lstocchi committed May 13, 2024
1 parent f523579 commit b4979a3
Show file tree
Hide file tree
Showing 23 changed files with 322 additions and 104 deletions.
45 changes: 30 additions & 15 deletions packages/backend/src/assets/ai.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@
"memory": 4080218931,
"properties": {
"chatFormat": "openchat"
}
},
"sha": "6adeaad8c048b35ea54562c55e454cc32c63118a32c7b8152cf706b290611487"
},
{
"id": "hf.instructlab.merlinite-7b-lab-GGUF",
Expand All @@ -136,7 +137,8 @@
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
"sha": "9ca044d727db34750e1aeb04e3b18c3cf4a8c064a9ac96cf00448c506631d16c"
},
{
"id": "hf.TheBloke.mistral-7b-instruct-v0.2.Q4_K_M",
Expand All @@ -146,7 +148,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha": "3e0039fd0273fcbebb49228943b17831aadd55cbcbf56f0af00499be2040ccf9"
},
{
"id": "hf.NousResearch.Hermes-2-Pro-Mistral-7B.Q4_K_M",
Expand All @@ -156,7 +159,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/resolve/main/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha": "e1e4253b94e3c04c7b6544250f29ad864a56eb2126e61eb440991a8284453674"
},
{
"id": "hf.ibm.merlinite-7b-Q4_K_M",
Expand All @@ -169,7 +173,8 @@
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
"sha": "94f3a16321c9604ca22e970f3b89931ae5b4bbfd4c5d996e2bb606c506590666"
},
{
"id": "hf.TheBloke.mistral-7b-codealpaca-lora.Q4_K_M",
Expand All @@ -179,7 +184,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-codealpaca-lora-GGUF/resolve/main/mistral-7b-codealpaca-lora.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha": "69c07f27f682ca8da59fcd8a981335876882a2577f0f9df51b49cf6b97fd470f"
},
{
"id": "hf.TheBloke.mistral-7b-code-16k-qlora.Q4_K_M",
Expand All @@ -189,7 +195,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha": "0f3c9aced2de6caad52323fea5a92a22fba0b4efddb564fda7a3071e0614443f"
},
{
"id": "hf.froggeric.Cerebrum-1.0-7b-Q4_KS",
Expand All @@ -202,7 +209,8 @@
"memory": 4144643441,
"properties": {
"chatFormat": "openchat"
}
},
"sha": "98861462a0a80e08704631df23ffee860bd5634551c48d069d4daa3c8931bc52"
},
{
"id": "hf.TheBloke.openchat-3.5-0106.Q4_K_M",
Expand All @@ -212,7 +220,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha": "49190d4d039e6dea463e567ebce707eb001648f4ba01e43eb7fa88d9975fc0ce"
},
{
"id": "hf.TheBloke.mistral-7b-openorca.Q4_K_M",
Expand All @@ -222,7 +231,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"sha": "83967e58c10c25fbe9d358b6d9e9a8212ca8a292061110dcb68511d39133407b"
},
{
"id": "hf.MaziyarPanahi.phi-2.Q4_K_M",
Expand All @@ -232,7 +242,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/MaziyarPanahi/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf",
"memory": 1739461755
"memory": 1739461755,
"sha": "013e0e421b70dc169adb0c0010171202371e907e5f648084e4ddc8ad9985127a"
},
{
"id": "hf.llmware.dragon-mistral-7b-q4_k_m",
Expand All @@ -245,7 +256,8 @@
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
"sha": "1d8f463c4917480b770db5d7921f3d144471891c45a0d25ba3ab3dd753ec620f"
},
{
"id": "hf.MaziyarPanahi.MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M",
Expand All @@ -255,7 +267,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/resolve/main/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf",
"memory": 7784628224
"memory": 7784628224,
"sha": "f5fcf04c77a5b69ae37791b48df90daa553e40b5a39efc9068258bedef373182"
},
{
"id": "hf.ggerganov.whisper.cpp",
Expand All @@ -265,7 +278,8 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
"memory": 487010000
"memory": 487010000,
"sha": "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b"
},
{
"id": "hf.facebook.detr-resnet-101",
Expand All @@ -278,7 +292,8 @@
"memory": 242980000,
"properties": {
"name": "facebook/detr-resnet-101"
}
},
"sha": "0943b5a9085a95a0e3ecc1c99a7db0451ecb9d79f4dcb543b0939c1a12481a5d"
}
],
"categories": [
Expand Down
29 changes: 29 additions & 0 deletions packages/backend/src/managers/modelsManager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import type { ModelInfo } from '@shared/src/models/IModelInfo';
import * as utils from '../utils/utils';
import { TaskRegistry } from '../registries/TaskRegistry';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import * as sha from '../utils/sha';

const mocks = vi.hoisted(() => {
return {
Expand Down Expand Up @@ -690,6 +691,34 @@ describe('downloadModel', () => {
state: 'success',
});
});
test('fail if model on disk has different sha of the expected value', async () => {
const manager = new ModelsManager(
'appdir',
{} as Webview,
{
getModels(): ModelInfo[] {
return [];
},
} as CatalogManager,
telemetryLogger,
taskRegistry,
cancellationTokenRegistryMock,
);
vi.spyOn(taskRegistry, 'updateTask');
vi.spyOn(manager, 'isModelOnDisk').mockReturnValue(true);
vi.spyOn(manager, 'getLocalModelPath').mockReturnValue('path');
vi.spyOn(sha, 'hasValidSha').mockResolvedValue(false);
await expect(() =>
manager.requestDownloadModel({
id: 'id',
url: 'url',
name: 'name',
sha: 'sha',
} as ModelInfo),
).rejects.toThrowError(
'Model name is already present on disk at path but its security hash (SHA) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.',
);
});
test('multiple download request same model - second call after first completed', async () => {
mocks.getDownloaderCompleter.mockReturnValue(true);

Expand Down
21 changes: 18 additions & 3 deletions packages/backend/src/managers/modelsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import { Uploader } from '../utils/uploader';
import { deleteRemoteModel, getLocalModelFile, isModelUploaded } from '../utils/modelsUtils';
import { getFirstRunningMachineName } from '../utils/podman';
import type { CancellationTokenRegistry } from '../registries/CancellationTokenRegistry';
import { hasValidSha } from '../utils/sha';

export class ModelsManager implements Disposable {
#modelsDir: string;
Expand Down Expand Up @@ -340,7 +341,7 @@ export class ModelsManager implements Disposable {

const target = path.resolve(destDir, path.basename(model.url));
// Create a downloader
const downloader = new Downloader(model.url, target, abortSignal);
const downloader = new Downloader(model.url, target, model.sha, abortSignal);

this.#downloaders.set(model.id, downloader);

Expand All @@ -357,12 +358,26 @@ export class ModelsManager implements Disposable {
private async downloadModel(model: ModelInfo, task: Task): Promise<string> {
// Check if the model is already on disk.
if (this.isModelOnDisk(model.id)) {
task.state = 'success';
task.name = `Model ${model.name} already present on disk`;

const modelPath = this.getLocalModelPath(model.id);
if (model.sha) {
const isValid = await hasValidSha(modelPath, model.sha);
if (!isValid) {
task.state = 'error';
task.error = `Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`;
this.taskRegistry.updateTask(task); // update task
throw new Error(
`Model ${model.name} is already present on disk at ${modelPath} but its security hash (SHA) does not match the expected value. This may indicate the file has been altered or corrupted. Please delete it and try again.`,
);
}
}

task.state = 'success';
this.taskRegistry.updateTask(task); // update task

// return model path
return this.getLocalModelPath(model.id);
return modelPath;
}

const abortController = new AbortController();
Expand Down
6 changes: 5 additions & 1 deletion packages/backend/src/utils/downloader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,10 @@ test('perform download failed', async () => {
const listenerMock = vi.fn();
downloader.onEvent(listenerMock);

const rejectSpy = vi.fn();

// perform download logic (do not wait)
void downloader.perform('followUpId');
downloader.perform('followUpId').catch((e: unknown) => rejectSpy(e));

// wait for listener to be registered
await vi.waitFor(() => {
Expand All @@ -122,6 +124,8 @@ test('perform download failed', async () => {
status: 'error',
});
expect(promises.rm).toHaveBeenCalledWith('dummyTarget.tmp');

expect(rejectSpy).toHaveBeenCalledWith('dummyError');
});

test('perform download successfully', async () => {
Expand Down
23 changes: 22 additions & 1 deletion packages/backend/src/utils/downloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import { getDurationSecondsSince } from './utils';
import { createWriteStream, promises } from 'node:fs';
import crypto from 'node:crypto';
import https from 'node:https';
import { EventEmitter, type Event } from '@podman-desktop/api';
import type { CompletionEvent, ProgressEvent, BaseEvent } from '../models/baseEvent';
Expand All @@ -32,14 +33,15 @@ export class Downloader {
constructor(
private url: string,
private target: string,
private sha?: string,
private abortSignal?: AbortSignal,
) {}

getTarget(): string {
return this.target;
}

async perform(id: string) {
async perform(id: string): Promise<void> {
this.requestedIdentifier = id;
const startTime = performance.now();

Expand All @@ -66,6 +68,7 @@ export class Downloader {
message: `Request cancelled: ${String(err)}.`,
});
}
throw err;
} finally {
this.completed = true;
}
Expand All @@ -90,6 +93,10 @@ export class Downloader {
let totalFileSize = 0;
let progress = 0;
let previousProgressValue = -1;
let checkSum: crypto.Hash;
if (this.sha) {
checkSum = crypto.createHash('sha256');
}

https.get(url, { signal: this.abortSignal }, resp => {
// Determine the total size
Expand All @@ -113,6 +120,9 @@ export class Downloader {

// On data
resp.on('data', chunk => {
if (checkSum) {
checkSum.update(chunk);
}
progress += chunk.length;
const progressValue = (progress * 100) / totalFileSize;

Expand Down Expand Up @@ -150,6 +160,17 @@ export class Downloader {
return;
}

if (checkSum) {
const actualSha = checkSum.digest('hex');
if (this.sha !== actualSha) {
callback({
error:
"The file's security hash (SHA) does not match the expected value. The file may have been altered or corrupted during the download process",

Check failure on line 168 in packages/backend/src/utils/downloader.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / windows-2022

Strings must use singlequote

Check failure on line 168 in packages/backend/src/utils/downloader.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / ubuntu-22.04

Strings must use singlequote

Check failure on line 168 in packages/backend/src/utils/downloader.ts

View workflow job for this annotation

GitHub Actions / linter, formatters and unit tests / macos-12

Strings must use singlequote
});
return;
}
}

// If everything is fine we simply rename the tmp file to the expected one
promises
.rename(tmpFile, this.target)
Expand Down
42 changes: 42 additions & 0 deletions packages/backend/src/utils/sha.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/**********************************************************************
* Copyright (C) 2024 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
***********************************************************************/
import { beforeEach, expect, test, vi } from 'vitest';
import { promises } from 'node:fs';
import { hasValidSha } from './sha';

beforeEach(() => {
vi.resetAllMocks();
});

test('return true if file has same hash of the expected one', () => {
vi.mock('node:fs');
vi.spyOn(promises, 'readFile').mockImplementation(() => Promise.resolve(Buffer.from('test')));

// sha of test => 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08
const isValid = hasValidSha('file', '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08');
expect(isValid).toBeTruthy();
});

test('return false if file has different hash of the expected one', () => {
vi.mock('node:fs');
vi.spyOn(promises, 'readFile').mockImplementation(() => Promise.resolve(Buffer.from('test')));

// sha of test => 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08
const isValid = hasValidSha('file', 'fakeSha');
expect(isValid).toBeTruthy();
});
Loading

0 comments on commit b4979a3

Please sign in to comment.