Skip to content

Commit

Permalink
feat: improve GPU support when multiple GPUs
Browse files Browse the repository at this point in the history
If there are multiple GPUs use the first one that is of
known type instead of first GPU. If no GPUs of are known
type fall back to the first GPU as before.

Add another string for vendor that is accepted as an NVIDIA
GPU when doing GPU detection based on what was seen on linux
with an NVIDIA 4070 Ti Super

Signed-off-by: Michael Dawson <[email protected]>
  • Loading branch information
mhdawson committed Dec 9, 2024
1 parent e9dd107 commit 022cb15
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 3 deletions.
22 changes: 22 additions & 0 deletions packages/backend/src/managers/GPUManager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,25 @@ test('NVIDIA controller should return intel vendor', async () => {
},
]);
});

test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => {
vi.mocked(graphics).mockResolvedValue({
controllers: [
{
vendor: 'NVIDIA Corporation',
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
} as unknown as Systeminformation.GraphicsControllerData,
],
displays: [],
});

const manager = new GPUManager(webviewMock);
expect(await manager.collectGPUs()).toStrictEqual([
{
vendor: GPUVendor.NVIDIA,
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
},
]);
});
1 change: 1 addition & 0 deletions packages/backend/src/managers/GPUManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export class GPUManager extends Publisher<IGPUInfo[]> implements Disposable {
case 'Intel Corporation':
return GPUVendor.INTEL;
case 'NVIDIA':
case 'NVIDIA Corporation':
return GPUVendor.NVIDIA;
case 'Apple':
return GPUVendor.APPLE;
Expand Down
94 changes: 94 additions & 0 deletions packages/backend/src/workers/provider/LlamaCppPython.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,100 @@ describe('perform', () => {
expect(server.labels['gpu']).toBe('nvidia');
});

test('gpu experimental should collect GPU data and find first supported gpu - entry 1 supported', async () => {
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
experimentalGPU: true,
modelsPath: '',
apiPort: 10434,
experimentalTuning: false,
modelUploadDisabled: false,
showGPUPromotion: false,
});

vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
{
vram: 1024,
model: 'dummy-model',
vendor: GPUVendor.UNKNOWN,
},
{
vram: 1024,
model: 'nvidia',
vendor: GPUVendor.NVIDIA,
},
]);

const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
const server = await provider.perform({
port: 8000,
image: undefined,
labels: {},
modelsInfo: [DummyModel],
connection: undefined,
});

expect(containerEngine.createContainer).toHaveBeenCalledWith(
DummyImageInfo.engineId,
expect.objectContaining({
Cmd: [
'-c',
'/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
],
}),
);
expect(gpuManager.collectGPUs).toHaveBeenCalled();
expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
expect('gpu' in server.labels).toBeTruthy();
expect(server.labels['gpu']).toBe('nvidia');
});

test('gpu experimental should collect GPU data and find first supported gpu - entry 0 supported', async () => {
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
experimentalGPU: true,
modelsPath: '',
apiPort: 10434,
experimentalTuning: false,
modelUploadDisabled: false,
showGPUPromotion: false,
});

vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
{
vram: 1024,
model: 'nvidia',
vendor: GPUVendor.NVIDIA,
},
{
vram: 1024,
model: 'dummy-model',
vendor: GPUVendor.UNKNOWN,
},
]);

const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
const server = await provider.perform({
port: 8000,
image: undefined,
labels: {},
modelsInfo: [DummyModel],
connection: undefined,
});

expect(containerEngine.createContainer).toHaveBeenCalledWith(
DummyImageInfo.engineId,
expect.objectContaining({
Cmd: [
'-c',
'/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
],
}),
);
expect(gpuManager.collectGPUs).toHaveBeenCalled();
expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
expect('gpu' in server.labels).toBeTruthy();
expect(server.labels['gpu']).toBe('nvidia');
});

test('unknown gpu on unsupported vmtype should not provide gpu labels', async () => {
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
experimentalGPU: true,
Expand Down
18 changes: 15 additions & 3 deletions packages/backend/src/workers/provider/LlamaCppPython.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,21 @@ export class LlamaCppPython extends InferenceProvider {
if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) {
const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs();
if (gpus.length === 0) throw new Error('no gpu was found.');
if (gpus.length > 1)
console.warn(`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[0].model}.`);
gpu = gpus[0];
let selectedGPU = 0;
if (gpus.length > 1) {
// Look for a GPU that is of a known type, use the first one found.
// Fall back to the first one if no GPUs are of known type.
for (let i = 0; i < gpus.length; i++) {
if (gpus[i].vendor !== GPUVendor.UNKNOWN) {
selectedGPU = i;
break;
}
}
console.warn(
`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[selectedGPU].model}.`,
);
}
gpu = gpus[selectedGPU];
}

let connection: ContainerProviderConnection | undefined = undefined;
Expand Down

0 comments on commit 022cb15

Please sign in to comment.