feat: improve GPU support when multiple GPUs

If there are multiple GPUs use the first one that is of known type instead of first GPU. If no GPUs of are known type fall back to the first GPU as before. Add another string for vendor that is accepted as an NVIDIA GPU when doing GPU detection based on what was seen on linux with an NVIDIA 4070 Ti Super Signed-off-by: Michael Dawson <[email protected]>
containers · Dec 9, 2024 · 022cb15 · 022cb15
1 parent e9dd107
commit 022cb15
Show file tree

Hide file tree

Showing 4 changed files with 132 additions and 3 deletions.
diff --git a/packages/backend/src/managers/GPUManager.spec.ts b/packages/backend/src/managers/GPUManager.spec.ts
@@ -105,3 +105,25 @@ test('NVIDIA controller should return intel vendor', async () => {
     },
   ]);
 });
+
+test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => {
+  vi.mocked(graphics).mockResolvedValue({
+    controllers: [
+      {
+        vendor: 'NVIDIA Corporation',
+        model: 'NVIDIA GeForce GTX 1060 6GB',
+        vram: 6144,
+      } as unknown as Systeminformation.GraphicsControllerData,
+    ],
+    displays: [],
+  });
+
+  const manager = new GPUManager(webviewMock);
+  expect(await manager.collectGPUs()).toStrictEqual([
+    {
+      vendor: GPUVendor.NVIDIA,
+      model: 'NVIDIA GeForce GTX 1060 6GB',
+      vram: 6144,
+    },
+  ]);
+});
diff --git a/packages/backend/src/managers/GPUManager.ts b/packages/backend/src/managers/GPUManager.ts
@@ -53,6 +53,7 @@ export class GPUManager extends Publisher<IGPUInfo[]> implements Disposable {
       case 'Intel Corporation':
         return GPUVendor.INTEL;
       case 'NVIDIA':
+      case 'NVIDIA Corporation':
         return GPUVendor.NVIDIA;
       case 'Apple':
         return GPUVendor.APPLE;

diff --git a/packages/backend/src/workers/provider/LlamaCppPython.spec.ts b/packages/backend/src/workers/provider/LlamaCppPython.spec.ts
@@ -313,6 +313,100 @@ describe('perform', () => {
     expect(server.labels['gpu']).toBe('nvidia');
   });
 
+  test('gpu experimental should collect GPU data and find first supported gpu - entry 1 supported', async () => {
+    vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
+      experimentalGPU: true,
+      modelsPath: '',
+      apiPort: 10434,
+      experimentalTuning: false,
+      modelUploadDisabled: false,
+      showGPUPromotion: false,
+    });
+
+    vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
+      {
+        vram: 1024,
+        model: 'dummy-model',
+        vendor: GPUVendor.UNKNOWN,
+      },
+      {
+        vram: 1024,
+        model: 'nvidia',
+        vendor: GPUVendor.NVIDIA,
+      },
+    ]);
+
+    const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
+    const server = await provider.perform({
+      port: 8000,
+      image: undefined,
+      labels: {},
+      modelsInfo: [DummyModel],
+      connection: undefined,
+    });
+
+    expect(containerEngine.createContainer).toHaveBeenCalledWith(
+      DummyImageInfo.engineId,
+      expect.objectContaining({
+        Cmd: [
+          '-c',
+          '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
+        ],
+      }),
+    );
+    expect(gpuManager.collectGPUs).toHaveBeenCalled();
+    expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
+    expect('gpu' in server.labels).toBeTruthy();
+    expect(server.labels['gpu']).toBe('nvidia');
+  });
+
+  test('gpu experimental should collect GPU data and find first supported gpu - entry 0 supported', async () => {
+    vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
+      experimentalGPU: true,
+      modelsPath: '',
+      apiPort: 10434,
+      experimentalTuning: false,
+      modelUploadDisabled: false,
+      showGPUPromotion: false,
+    });
+
+    vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
+      {
+        vram: 1024,
+        model: 'nvidia',
+        vendor: GPUVendor.NVIDIA,
+      },
+      {
+        vram: 1024,
+        model: 'dummy-model',
+        vendor: GPUVendor.UNKNOWN,
+      },
+    ]);
+
+    const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
+    const server = await provider.perform({
+      port: 8000,
+      image: undefined,
+      labels: {},
+      modelsInfo: [DummyModel],
+      connection: undefined,
+    });
+
+    expect(containerEngine.createContainer).toHaveBeenCalledWith(
+      DummyImageInfo.engineId,
+      expect.objectContaining({
+        Cmd: [
+          '-c',
+          '/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
+        ],
+      }),
+    );
+    expect(gpuManager.collectGPUs).toHaveBeenCalled();
+    expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
+    expect('gpu' in server.labels).toBeTruthy();
+    expect(server.labels['gpu']).toBe('nvidia');
+  });
+
   test('unknown gpu on unsupported vmtype should not provide gpu labels', async () => {
     vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
       experimentalGPU: true,

diff --git a/packages/backend/src/workers/provider/LlamaCppPython.ts b/packages/backend/src/workers/provider/LlamaCppPython.ts
@@ -197,9 +197,21 @@ export class LlamaCppPython extends InferenceProvider {
     if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) {
       const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs();
       if (gpus.length === 0) throw new Error('no gpu was found.');
-      if (gpus.length > 1)
-        console.warn(`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[0].model}.`);
-      gpu = gpus[0];
+      let selectedGPU = 0;
+      if (gpus.length > 1) {
+        // Look for a GPU that is of a known type, use the first one found.
+        // Fall back to the first one if no GPUs are of known type.
+        for (let i = 0; i < gpus.length; i++) {
+          if (gpus[i].vendor !== GPUVendor.UNKNOWN) {
+            selectedGPU = i;
+            break;
+          }
+        }
+        console.warn(
+          `found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[selectedGPU].model}.`,
+        );
+      }
+      gpu = gpus[selectedGPU];
     }
 
     let connection: ContainerProviderConnection | undefined = undefined;