diff --git a/packages/backend/src/managers/GPUManager.spec.ts b/packages/backend/src/managers/GPUManager.spec.ts index f87470236..8b5ed423d 100644 --- a/packages/backend/src/managers/GPUManager.spec.ts +++ b/packages/backend/src/managers/GPUManager.spec.ts @@ -105,3 +105,25 @@ test('NVIDIA controller should return intel vendor', async () => { }, ]); }); + +test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => { + vi.mocked(graphics).mockResolvedValue({ + controllers: [ + { + vendor: 'NVIDIA Corporation', + model: 'NVIDIA GeForce GTX 1060 6GB', + vram: 6144, + } as unknown as Systeminformation.GraphicsControllerData, + ], + displays: [], + }); + + const manager = new GPUManager(webviewMock); + expect(await manager.collectGPUs()).toStrictEqual([ + { + vendor: GPUVendor.NVIDIA, + model: 'NVIDIA GeForce GTX 1060 6GB', + vram: 6144, + }, + ]); +}); diff --git a/packages/backend/src/managers/GPUManager.ts b/packages/backend/src/managers/GPUManager.ts index 1ddd324c2..97d99beea 100644 --- a/packages/backend/src/managers/GPUManager.ts +++ b/packages/backend/src/managers/GPUManager.ts @@ -53,6 +53,7 @@ export class GPUManager extends Publisher implements Disposable { case 'Intel Corporation': return GPUVendor.INTEL; case 'NVIDIA': + case 'NVIDIA Corporation': return GPUVendor.NVIDIA; case 'Apple': return GPUVendor.APPLE; diff --git a/packages/backend/src/workers/provider/LlamaCppPython.spec.ts b/packages/backend/src/workers/provider/LlamaCppPython.spec.ts index 921471837..1be3d28a0 100644 --- a/packages/backend/src/workers/provider/LlamaCppPython.spec.ts +++ b/packages/backend/src/workers/provider/LlamaCppPython.spec.ts @@ -313,6 +313,100 @@ describe('perform', () => { expect(server.labels['gpu']).toBe('nvidia'); }); + test('gpu experimental should collect GPU data and find first supported gpu - entry 1 supported', async () => { + vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ + experimentalGPU: true, + modelsPath: '', + apiPort: 10434, + experimentalTuning: false, + modelUploadDisabled: false, + showGPUPromotion: false, + }); + + vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ + { + vram: 1024, + model: 'dummy-model', + vendor: GPUVendor.UNKNOWN, + }, + { + vram: 1024, + model: 'nvidia', + vendor: GPUVendor.NVIDIA, + }, + ]); + + const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); + const server = await provider.perform({ + port: 8000, + image: undefined, + labels: {}, + modelsInfo: [DummyModel], + connection: undefined, + }); + + expect(containerEngine.createContainer).toHaveBeenCalledWith( + DummyImageInfo.engineId, + expect.objectContaining({ + Cmd: [ + '-c', + '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh', + ], + }), + ); + expect(gpuManager.collectGPUs).toHaveBeenCalled(); + expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function)); + expect('gpu' in server.labels).toBeTruthy(); + expect(server.labels['gpu']).toBe('nvidia'); + }); + + test('gpu experimental should collect GPU data and find first supported gpu - entry 0 supported', async () => { + vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ + experimentalGPU: true, + modelsPath: '', + apiPort: 10434, + experimentalTuning: false, + modelUploadDisabled: false, + showGPUPromotion: false, + }); + + vi.mocked(gpuManager.collectGPUs).mockResolvedValue([ + { + vram: 1024, + model: 'nvidia', + vendor: GPUVendor.NVIDIA, + }, + { + vram: 1024, + model: 'dummy-model', + vendor: GPUVendor.UNKNOWN, + }, + ]); + + const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry); + const server = await provider.perform({ + port: 8000, + image: undefined, + labels: {}, + modelsInfo: [DummyModel], + connection: undefined, + }); + + expect(containerEngine.createContainer).toHaveBeenCalledWith( + DummyImageInfo.engineId, + expect.objectContaining({ + Cmd: [ + '-c', + '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh', + ], + }), + ); + expect(gpuManager.collectGPUs).toHaveBeenCalled(); + expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function)); + expect('gpu' in server.labels).toBeTruthy(); + expect(server.labels['gpu']).toBe('nvidia'); + }); + test('unknown gpu on unsupported vmtype should not provide gpu labels', async () => { vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({ experimentalGPU: true, diff --git a/packages/backend/src/workers/provider/LlamaCppPython.ts b/packages/backend/src/workers/provider/LlamaCppPython.ts index 9685c2891..209a75ca5 100644 --- a/packages/backend/src/workers/provider/LlamaCppPython.ts +++ b/packages/backend/src/workers/provider/LlamaCppPython.ts @@ -197,9 +197,21 @@ export class LlamaCppPython extends InferenceProvider { if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) { const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs(); if (gpus.length === 0) throw new Error('no gpu was found.'); - if (gpus.length > 1) - console.warn(`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[0].model}.`); - gpu = gpus[0]; + let selectedGPU = 0; + if (gpus.length > 1) { + // Look for a GPU that is of a known type, use the first one found. + // Fall back to the first one if no GPUs are of known type. + for (let i = 0; i < gpus.length; i++) { + if (gpus[i].vendor !== GPUVendor.UNKNOWN) { + selectedGPU = i; + break; + } + } + console.warn( + `found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[selectedGPU].model}.`, + ); + } + gpu = gpus[selectedGPU]; } let connection: ContainerProviderConnection | undefined = undefined;