From 60ab79608ae35bd929d3e1387d226547d18e6bed Mon Sep 17 00:00:00 2001 From: "Azamat H. Hackimov" Date: Tue, 11 Jul 2023 02:24:12 +0300 Subject: [PATCH 01/30] Use libva's driverdir path instead hardcoded In some systems libva may expects driver location in different path. Reusing driverdir variable from pkg-config to provide correct one. --- meson.build | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index 6fd72da..990c2b2 100644 --- a/meson.build +++ b/meson.build @@ -18,10 +18,12 @@ deps = [ cc.find_library('dl', required : false), dependency('egl'), dependency('ffnvcodec', version: '>= 11.1.5.1'), - dependency('libva', version: '>= 1.8.0').partial_dependency(compile_args: true), dependency('libdrm', version: '>=2.4.60').partial_dependency(compile_args: true), dependency('threads'), ] +libva_deps = dependency('libva', version: '>= 1.8.0').partial_dependency(compile_args: true) +deps += [libva_deps] + gst_codecs_deps = dependency('gstreamer-codecparsers-1.0', required: false) if cc.get_argument_syntax() == 'gcc' @@ -69,6 +71,7 @@ if gst_codecs_deps.found() endif nvidia_incdir = include_directories('nvidia-include') +nvidia_install_dir = libva_deps.get_variable(pkgconfig: 'driverdir') shared_library( 'nvidia_drv_video', @@ -77,7 +80,7 @@ shared_library( dependencies: deps, include_directories: nvidia_incdir, install: true, - install_dir: get_option('libdir') / 'dri', + install_dir: nvidia_install_dir, gnu_symbol_visibility: 'hidden', ) From 3ff1284038755c6354956610b345f99375326213 Mon Sep 17 00:00:00 2001 From: thesword53 Date: Sun, 23 Jul 2023 23:44:30 +0200 Subject: [PATCH 02/30] Fix yuv444p16: kept only VA_FOURCC_Q416 --- src/direct/direct-export-buf.c | 12 +---------- src/vabackend.c | 38 +++++----------------------------- src/vabackend.h | 2 -- 3 files changed, 6 insertions(+), 46 deletions(-) diff --git a/src/direct/direct-export-buf.c b/src/direct/direct-export-buf.c index abc5e72..636c554 100644 --- a/src/direct/direct-export-buf.c +++ b/src/direct/direct-export-buf.c @@ -170,17 +170,7 @@ static BackingImage *direct_allocateBackingImage(NVDriver *drv, const NVSurface break; case cudaVideoSurfaceFormat_YUV444_16Bit: - switch (surface->bitDepth) { - case 10: - backingImage->format = NV_FORMAT_Q410; - break; - case 12: - backingImage->format = NV_FORMAT_Q412; - break; - default: - backingImage->format = NV_FORMAT_Q416; - break; - } + backingImage->format = NV_FORMAT_Q416; break; case cudaVideoSurfaceFormat_YUV444: diff --git a/src/vabackend.c b/src/vabackend.c index 5376a57..9c39c09 100644 --- a/src/vabackend.c +++ b/src/vabackend.c @@ -54,16 +54,10 @@ const NVFormatInfo formatsInfo[] = [NV_FORMAT_P012] = {2, 2, DRM_FORMAT_P012, true, false, {{1, DRM_FORMAT_R16, {0,0}}, {2, DRM_FORMAT_RG1616, {1,1}}}, {VA_FOURCC_P012, VA_LSB_FIRST, 24, 0,0,0,0,0}}, [NV_FORMAT_P016] = {2, 2, DRM_FORMAT_P016, true, false, {{1, DRM_FORMAT_R16, {0,0}}, {2, DRM_FORMAT_RG1616, {1,1}}}, {VA_FOURCC_P016, VA_LSB_FIRST, 24, 0,0,0,0,0}}, [NV_FORMAT_444P] = {1, 3, DRM_FORMAT_YUV444, false, true, {{1, DRM_FORMAT_R8, {0,0}}, {1, DRM_FORMAT_R8, {0,0}}, {1, DRM_FORMAT_R8, {0,0}}}, {VA_FOURCC_444P, VA_LSB_FIRST, 24, 0,0,0,0,0}}, - // Nvidia decoder only supports YUV444 planar formats with 3 planes so we can't use VA_FOURCC_Y410, VA_FOURCC_Y412 and VA_FOURCC_Y416. - // VA_FOURCC_Q410, VA_FOURCC_Q412 and VA_FOURCC_Q416 aren't defined in va.h yet. -#if defined(VA_FOURCC_Q410) && defined(DRM_FORMAT_Q410) - [NV_FORMAT_Q410] = {2, 3, DRM_FORMAT_Q410, true, true, {{1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16,{0,0}}}, {VA_FOURCC_Q410, VA_LSB_FIRST, 48, 0,0,0,0,0}}, -#endif -#if defined(VA_FOURCC_Q412) && defined(DRM_FORMAT_Q412) - [NV_FORMAT_Q412] = {2, 3, DRM_FORMAT_Q412, true, true, {{1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16,{0,0}}}, {VA_FOURCC_Q412, VA_LSB_FIRST, 48, 0,0,0,0,0}}, -#endif -#if defined(VA_FOURCC_Q416) && defined(DRM_FORMAT_Q416) - [NV_FORMAT_Q416] = {2, 3, DRM_FORMAT_Q416, true, true, {{1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16,{0,0}}}, {VA_FOURCC_Q416, VA_LSB_FIRST, 48, 0,0,0,0,0}}, + // Nvidia decoder only supports YUV444 planar formats with 3 planes so we can't use VA_FOURCC_Y416. + // VA_FOURCC_Q416 isn't defined in va.h yet. +#if defined(VA_FOURCC_Q416) + [NV_FORMAT_Q416] = {2, 3, DRM_FORMAT_INVALID, true, true, {{1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16,{0,0}}}, {VA_FOURCC_Q416, VA_LSB_FIRST, 48, 0,0,0,0,0}}, #endif }; @@ -506,16 +500,14 @@ static VAStatus nvQueryConfigProfiles( } // Currently VAAPI doesn't support yuv444p10 yuv444p12 and yuv444p16 -#if defined(VA_FOURCC_Q410) && defined(DRM_FORMAT_Q410) +#if defined(VA_FOURCC_Q416) if (drv->supports16BitSurface) { if (doesGPUSupportCodec(cudaVideoCodec_HEVC, 10, cudaVideoChromaFormat_444, NULL, NULL)) { profile_list[profiles++] = VAProfileHEVCMain444_10; } -#if (defined(VA_FOURCC_Q412) && defined(DRM_FORMAT_Q412)) || (defined(VA_FOURCC_Q416) && defined(DRM_FORMAT_Q416)) if (doesGPUSupportCodec(cudaVideoCodec_HEVC, 12, cudaVideoChromaFormat_444, NULL, NULL)) { profile_list[profiles++] = VAProfileHEVCMain444_12; } -#endif if (doesGPUSupportCodec(cudaVideoCodec_VP9, 10, cudaVideoChromaFormat_444, NULL, NULL)) { profile_list[profiles++] = VAProfileVP9Profile3; //color depth: 10–12 bit, 4:2:2, 4:4:0, 4:4:4 } @@ -1786,12 +1778,6 @@ static VAStatus nvQuerySurfaceAttributes( int cnt = 4; if (cfg->chromaFormat == cudaVideoChromaFormat_444) { cnt += 1; -#ifdef VA_FOURCC_Q410 - cnt += 1; -#endif -#ifdef VA_FOURCC_Q412 - cnt += 1; -#endif #ifdef VA_FOURCC_Q416 cnt += 1; #endif @@ -1848,20 +1834,6 @@ static VAStatus nvQuerySurfaceAttributes( attrib_list[attrib_idx].value.type = VAGenericValueTypeInteger; attrib_list[attrib_idx].value.value.i = VA_FOURCC_444P; attrib_idx += 1; -#ifdef VA_FOURCC_Q410 - attrib_list[attrib_idx].type = VASurfaceAttribPixelFormat; - attrib_list[attrib_idx].flags = 0; - attrib_list[attrib_idx].value.type = VAGenericValueTypeInteger; - attrib_list[attrib_idx].value.value.i = VA_FOURCC_Q410; - attrib_idx += 1; -#endif -#ifdef VA_FOURCC_Q412 - attrib_list[attrib_idx].type = VASurfaceAttribPixelFormat; - attrib_list[attrib_idx].flags = 0; - attrib_list[attrib_idx].value.type = VAGenericValueTypeInteger; - attrib_list[attrib_idx].value.value.i = VA_FOURCC_Q412; - attrib_idx += 1; -#endif #ifdef VA_FOURCC_Q416 attrib_list[attrib_idx].type = VASurfaceAttribPixelFormat; attrib_list[attrib_idx].flags = 0; diff --git a/src/vabackend.h b/src/vabackend.h index ffe9bc7..7c2f708 100644 --- a/src/vabackend.h +++ b/src/vabackend.h @@ -76,8 +76,6 @@ typedef enum NV_FORMAT_P012, NV_FORMAT_P016, NV_FORMAT_444P, - NV_FORMAT_Q410, - NV_FORMAT_Q412, NV_FORMAT_Q416 } NVFormat; From 08646af24f43977241e08ffc338241e36031bdfc Mon Sep 17 00:00:00 2001 From: Stephen <2325080+elFarto@users.noreply.github.com> Date: Sun, 6 Aug 2023 15:02:28 +0100 Subject: [PATCH 03/30] Update README.md Removed EGL_PLATFORM definition, as it's probably unneeded and it's causing other problems. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index abd0049..148acfe 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,6 @@ In addition the following environment variables need to be set. For permanent co | Variable | Value | Reason | |---|---|---| | MOZ_DISABLE_RDD_SANDBOX | 1 | Disables the sandbox for the RDD process that the decoder runs in. | -| EGL_PLATFORM | wayland | Required on FF98+ when running on Wayland, due to a regression that has been introduced. | | LIBVA_DRIVER_NAME | nvidia | For libva versions prior to 2.15, this forces libva to load the `nvidia` backend. | | __EGL_VENDOR_LIBRARY_FILENAMES | /usr/share/glvnd/egl_vendor.d/10_nvidia.json | Required for the 470 driver series only. It overrides the list of drivers the glvnd library can use to prevent Firefox from using the MESA driver by mistake. | From 0a358b65da60e15b2dd258bd8d27bd8d1fd3491e Mon Sep 17 00:00:00 2001 From: Stephen Date: Sun, 6 Aug 2023 15:18:52 +0100 Subject: [PATCH 04/30] Use 'Big' pages as for some reason Vulkan can't read from the default sized ones --- src/direct/nv-driver.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index ad6c267..3b65f49 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -339,23 +339,14 @@ bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd) { int nvctlFd2 = -1; NvHandle bufferObject = {0}; - //we don't have huge pages available on all hardware - //turns out we don't need to know that anyway, although this will probably result is less optimal page size - /* - NvU32 pageSizeAttr = context->hasHugePage ? DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _HUGE) - : DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG); - NvU32 pageSizeAttr2 = context->hasHugePage ? DRF_DEF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB) - : 0;*/ - NV_MEMORY_ALLOCATION_PARAMS memParams = { .owner = context->clientObject, .type = NVOS32_TYPE_IMAGE, .flags = NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT | - //NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE | //this doesn't seem to be needed NVOS32_ALLOC_FLAGS_MAP_NOT_REQUIRED | NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM, - .attr = //pageSizeAttr | + .attr = DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _BIG) | DRF_DEF(OS32, _ATTR, _DEPTH, _UNKNOWN) | DRF_DEF(OS32, _ATTR, _FORMAT, _BLOCK_LINEAR) | DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS), @@ -364,8 +355,7 @@ bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd) { .height = 0, .size = size, .alignment = 0, //see flags above - .attr2 = //pageSizeAttr2 | - DRF_DEF(OS32, _ATTR2, _ZBC, _PREFER_NO_ZBC) | + .attr2 = DRF_DEF(OS32, _ATTR2, _ZBC, _PREFER_NO_ZBC) | DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _YES) }; bool ret = nv_alloc_object(context->nvctlFd, context->driverMajorVersion, context->clientObject, context->deviceObject, &bufferObject, NV01_MEMORY_LOCAL_USER, sizeof(memParams), &memParams); From 036c27a973d13e9ee3ff578dc199991a8af5aa45 Mon Sep 17 00:00:00 2001 From: jrg Date: Mon, 7 Aug 2023 12:03:36 +0800 Subject: [PATCH 05/30] fix wrong slice params struct in func copyHEVCSliceData --- src/hevc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hevc.c b/src/hevc.c index 01fc071..d58c629 100644 --- a/src/hevc.c +++ b/src/hevc.c @@ -252,7 +252,7 @@ static void copyHEVCSliceData(NVContext *ctx, NVBuffer* buf, CUVIDPICPARAMS *pic { static const uint8_t header[] = { 0, 0, 1 }; //1 as a 24-bit Big Endian - VASliceParameterBufferH264 *sliceParams = &((VASliceParameterBufferH264*) ctx->lastSliceParams)[i]; + VASliceParameterBufferHEVC *sliceParams = &((VASliceParameterBufferHEVC*) ctx->lastSliceParams)[i]; uint32_t offset = (uint32_t) ctx->bitstreamBuffer.size; appendBuffer(&ctx->sliceOffsets, &offset, sizeof(offset)); appendBuffer(&ctx->bitstreamBuffer, header, sizeof(header)); From dbfd8f2f6e3c82bd508645fcc5977d3afda7d7c2 Mon Sep 17 00:00:00 2001 From: q234rty Date: Thu, 10 Aug 2023 17:45:11 +0800 Subject: [PATCH 06/30] Fix check method for ffmpeg vaapi support --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 148acfe..cb66187 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ Environment variables used to control the behavior of this library. ## Firefox -To use the driver with firefox you will need at least Firefox 96, `ffmpeg` compiled with vaapi support (search ffmpeg output for --enable-vaapi), and the following config options need to be set in the `about:config` page: +To use the driver with firefox you will need at least Firefox 96, `ffmpeg` compiled with vaapi support (`ffmpeg -hwaccels` output should include vaapi), and the following config options need to be set in the `about:config` page: | Option | Value | Reason | |---|---|---| From 3d01ff6080e96df70eaf2e043e6f0ff1aff06255 Mon Sep 17 00:00:00 2001 From: begin-theadventure <99835765+begin-theadventure@users.noreply.github.com> Date: Thu, 24 Aug 2023 09:16:41 +0200 Subject: [PATCH 07/30] Packaging status --- README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index cb66187..a5f2f51 100644 --- a/README.md +++ b/README.md @@ -51,16 +51,11 @@ To install and use `nvidia-vaapi-driver`, follow the steps in installation and c * NVIDIA driver series 470 or 500+ -## Package manager +## Packaging status -| Distribution | Package name | -|---|---| -| ArchAUR | [libva-nvidia-driver](https://aur.archlinux.org/packages/libva-nvidia-driver) | -| ArchAUR | [libva-nvidia-driver-git](https://aur.archlinux.org/packages/libva-nvidia-driver-git) | -| Debian,Ubuntu | nvidia-vaapi-driver[debian](https://tracker.debian.org/pkg/nvidia-vaapi-driver) [ubuntu](https://packages.ubuntu.com/kinetic/nvidia-vaapi-driver) | -| Fedora, RHEL and derivates (Rocky, Alma, etc).| [nvidia-vaapi-driver](https://github.com/rpmfusion/nvidia-vaapi-driver) | +

repologyrepology

-Feel free to add your distributions package in an issue/PR. +[pkgs.org/nvidia-vaapi-driver](https://pkgs.org/search/?q=nvidia-vaapi-driver) [pkgs.org/libva-nvidia-driver](https://pkgs.org/search/?q=libva-nvidia-driver) ## Building From cce1adc17df8792969becaf1a36f0e6e172d3829 Mon Sep 17 00:00:00 2001 From: begin-theadventure <99835765+begin-theadventure@users.noreply.github.com> Date: Thu, 24 Aug 2023 10:09:19 +0200 Subject: [PATCH 08/30] [README.md] openSUSE + info added back. --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a5f2f51..6beeba9 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,10 @@ To install and use `nvidia-vaapi-driver`, follow the steps in installation and c [pkgs.org/nvidia-vaapi-driver](https://pkgs.org/search/?q=nvidia-vaapi-driver) [pkgs.org/libva-nvidia-driver](https://pkgs.org/search/?q=libva-nvidia-driver) +[openSUSE](https://software.opensuse.org/package/libva-nvidia-driver) + +Feel free to add your distributions package in an issue/PR, if it isn't on these websites. + ## Building You'll need `meson`, the `gstreamer-plugins-bad` library, and [`nv-codec-headers`](https://git.videolan.org/?p=ffmpeg/nv-codec-headers.git) installed. From 0d7324f0e56fa33be93cdc5fdd831dab2b74219d Mon Sep 17 00:00:00 2001 From: begin-theadventure <99835765+begin-theadventure@users.noreply.github.com> Date: Thu, 24 Aug 2023 10:11:20 +0200 Subject: [PATCH 09/30] [README.md] openSUSE --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6beeba9..76b8565 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ To install and use `nvidia-vaapi-driver`, follow the steps in installation and c [pkgs.org/nvidia-vaapi-driver](https://pkgs.org/search/?q=nvidia-vaapi-driver) [pkgs.org/libva-nvidia-driver](https://pkgs.org/search/?q=libva-nvidia-driver) -[openSUSE](https://software.opensuse.org/package/libva-nvidia-driver) +openSUSE: [1](https://software.opensuse.org/package/nvidia-vaapi-driver), [2](https://software.opensuse.org/package/libva-nvidia-driver). Feel free to add your distributions package in an issue/PR, if it isn't on these websites. From 3ed6a6cfb5eb1189e91d2585810b0c1481de7e0c Mon Sep 17 00:00:00 2001 From: begin-theadventure <99835765+begin-theadventure@users.noreply.github.com> Date: Thu, 24 Aug 2023 10:17:07 +0200 Subject: [PATCH 10/30] [README.md] Link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 76b8565..b2d91ee 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This is an VA-API implementation that uses NVDEC as a backend. This implementati - [Table of contents](#table-of-contents) - [Codec Support](#codec-support) - [Installation](#installation) - - [Package manager](#package-manager) + - [Packaging status](#packaging-status) - [Building](#building) - [Removal](#removal) - [Configuration](#configuration) From 489b82f76c9ce11a8f143fb5a8ad4ab14016bdcf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 07:11:31 +0000 Subject: [PATCH 11/30] Bump actions/checkout from 3 to 4 Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 74d630d..d36146b 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -14,7 +14,7 @@ jobs: DISTRO: ${{ matrix.os }} steps: - name: 'Checkout' - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: 'Install toolchain' if: ${{ (matrix.compiler == 'clang-15') }} run: .github/workflows/install-clang.sh 15 From 2f375efa5f63a5ef8318600ee785600538b1430d Mon Sep 17 00:00:00 2001 From: crimist Date: Mon, 25 Sep 2023 00:32:53 -0700 Subject: [PATCH 12/30] readme: update egl regression info --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b2d91ee..7907b71 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,11 @@ This is an VA-API implementation that uses NVDEC as a backend. This implementati - [Building](#building) - [Removal](#removal) - [Configuration](#configuration) + - [Upstream regressions](#upstream-regressions) - [Kernel parameters](#kernel-parameters) - [Environment Variables](#environment-variables) - [Firefox](#firefox) + - [Chrome](#chrome) - [MPV](#mpv) - [Direct Backend](#direct-backend) - [Testing](#testing) @@ -84,7 +86,11 @@ By default the driver installs itself as `/usr/lib64/dri/nvidia_drv_video.so` (t # Configuration -**IMPORTANT**: The [direct backend](#direct-backend) is currently required on NVIDIA driver series 525 due to a regression (see [issue #126](/../../issues/126)). +## Upstream regressions + +The EGL backend is broken on driver versions 525 or later due to a regression. Users running these drivers should use the [direct backend](#direct-backend) instead. + +For more information read the [upstream bug report](https://forums.developer.nvidia.com/t/cueglstreamproducerconnect-returns-error-801-on-525-53-driver/233610) or [issue #126](/../../issues/126). ## Kernel parameters From d40a44f2cd84132bb1b77dcbaf5b5514c5e8f045 Mon Sep 17 00:00:00 2001 From: thesword53 Date: Mon, 9 Oct 2023 19:47:27 +0200 Subject: [PATCH 13/30] Use VA_CHECK_VERSION for VA_FOURCC_Q416 --- src/vabackend.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/vabackend.c b/src/vabackend.c index 9c39c09..7f08496 100644 --- a/src/vabackend.c +++ b/src/vabackend.c @@ -54,9 +54,7 @@ const NVFormatInfo formatsInfo[] = [NV_FORMAT_P012] = {2, 2, DRM_FORMAT_P012, true, false, {{1, DRM_FORMAT_R16, {0,0}}, {2, DRM_FORMAT_RG1616, {1,1}}}, {VA_FOURCC_P012, VA_LSB_FIRST, 24, 0,0,0,0,0}}, [NV_FORMAT_P016] = {2, 2, DRM_FORMAT_P016, true, false, {{1, DRM_FORMAT_R16, {0,0}}, {2, DRM_FORMAT_RG1616, {1,1}}}, {VA_FOURCC_P016, VA_LSB_FIRST, 24, 0,0,0,0,0}}, [NV_FORMAT_444P] = {1, 3, DRM_FORMAT_YUV444, false, true, {{1, DRM_FORMAT_R8, {0,0}}, {1, DRM_FORMAT_R8, {0,0}}, {1, DRM_FORMAT_R8, {0,0}}}, {VA_FOURCC_444P, VA_LSB_FIRST, 24, 0,0,0,0,0}}, - // Nvidia decoder only supports YUV444 planar formats with 3 planes so we can't use VA_FOURCC_Y416. - // VA_FOURCC_Q416 isn't defined in va.h yet. -#if defined(VA_FOURCC_Q416) +#if VA_CHECK_VERSION(1, 20, 0) [NV_FORMAT_Q416] = {2, 3, DRM_FORMAT_INVALID, true, true, {{1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16, {0,0}}, {1, DRM_FORMAT_R16,{0,0}}}, {VA_FOURCC_Q416, VA_LSB_FIRST, 48, 0,0,0,0,0}}, #endif }; @@ -499,8 +497,7 @@ static VAStatus nvQueryConfigProfiles( profile_list[profiles++] = VAProfileAV1Profile1; } - // Currently VAAPI doesn't support yuv444p10 yuv444p12 and yuv444p16 -#if defined(VA_FOURCC_Q416) +#if VA_CHECK_VERSION(1, 20, 0) if (drv->supports16BitSurface) { if (doesGPUSupportCodec(cudaVideoCodec_HEVC, 10, cudaVideoChromaFormat_444, NULL, NULL)) { profile_list[profiles++] = VAProfileHEVCMain444_10; @@ -1778,7 +1775,7 @@ static VAStatus nvQuerySurfaceAttributes( int cnt = 4; if (cfg->chromaFormat == cudaVideoChromaFormat_444) { cnt += 1; -#ifdef VA_FOURCC_Q416 +#if VA_CHECK_VERSION(1, 20, 0) cnt += 1; #endif } else { @@ -1834,7 +1831,7 @@ static VAStatus nvQuerySurfaceAttributes( attrib_list[attrib_idx].value.type = VAGenericValueTypeInteger; attrib_list[attrib_idx].value.value.i = VA_FOURCC_444P; attrib_idx += 1; -#ifdef VA_FOURCC_Q416 +#if VA_CHECK_VERSION(1, 20, 0) attrib_list[attrib_idx].type = VASurfaceAttribPixelFormat; attrib_list[attrib_idx].flags = 0; attrib_list[attrib_idx].value.type = VAGenericValueTypeInteger; From 41e26cd1564887ba0e766212380648639e0e6ea0 Mon Sep 17 00:00:00 2001 From: Stephen <2325080+elFarto@users.noreply.github.com> Date: Sun, 29 Oct 2023 10:22:42 +0000 Subject: [PATCH 14/30] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7907b71..f7b6287 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,8 @@ When libva is used it will log out some information, which can be excessive when LIBVA_MESSAGING_LEVEL=1 ``` +If you're using the Snap version of Firefox, it will be unable to access the host version of the driver that is installed. + ## Chrome Chrome is currently unsupported, and will not function. From 98887098da50b9acff686a1a0e468df3926b47b2 Mon Sep 17 00:00:00 2001 From: Stephen Date: Sat, 4 Nov 2023 11:55:51 +0000 Subject: [PATCH 15/30] Fix issue with newer 545.29.02 drivers --- nvidia-include/nvidia-drm-ioctl.h | 20 +++++++++++ src/direct/nv-driver.c | 56 ++++++++++++++++++++++--------- src/direct/nv-driver.h | 6 +++- 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/nvidia-include/nvidia-drm-ioctl.h b/nvidia-include/nvidia-drm-ioctl.h index dcc99c7..d621b1e 100644 --- a/nvidia-include/nvidia-drm-ioctl.h +++ b/nvidia-include/nvidia-drm-ioctl.h @@ -56,6 +56,10 @@ DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DEV_INFO), \ struct drm_nvidia_get_dev_info_params) +#define DRM_IOCTL_NVIDIA_GET_DEV_INFO_545 \ + DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DEV_INFO), \ + struct drm_nvidia_get_dev_info_params_545) + /* * XXX Solaris compiler has issues with DRM_IO. None of this is supported on * Solaris anyway, so just skip it. @@ -136,6 +140,22 @@ struct drm_nvidia_get_dev_info_params { uint32_t sector_layout; /* OUT */ }; +struct drm_nvidia_get_dev_info_params_545 { + uint32_t gpu_id; /* OUT */ + uint32_t primary_index; /* OUT; the "card%d" value */ + + uint32_t supports_alloc; /* OUT */ + /* The generic_page_kind, page_kind_generation, and sector_layout + * fields are only valid if supports_alloc is true. + * See DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D definitions of these. */ + uint32_t generic_page_kind; /* OUT */ + uint32_t page_kind_generation; /* OUT */ + uint32_t sector_layout; /* OUT */ + uint32_t supports_sync_fd; /* OUT */ + uint32_t supports_semsurf; /* OUT */ +}; + + struct drm_nvidia_fence_context_create_params { uint32_t handle; /* OUT GEM handle to fence context */ diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index 3b65f49..7724f6c 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -190,12 +190,35 @@ static bool nv0_register_fd(int nv0_fd, int nvctl_fd) { return true; } -static bool get_device_info(int fd, struct drm_nvidia_get_dev_info_params *devInfo) { - int ret = ioctl(fd, DRM_IOCTL_NVIDIA_GET_DEV_INFO, devInfo); +static bool get_device_info(int fd, NVDriverContext *context) { + //NVIDIA driver v545.29.02 changed the devInfo struct, and partly broke it in the process + //...who adds a field to the middle of an existing struct.... + if (context->driverMajorVersion >= 545 && context->driverMinorVersion >= 29) { + struct drm_nvidia_get_dev_info_params_545 devInfo545; + int ret = ioctl(fd, DRM_IOCTL_NVIDIA_GET_DEV_INFO_545, &devInfo545); + + if (ret != 0) { + LOG("get_device_info failed: %d %d", ret, errno); + return false; + } - if (ret != 0) { - LOG("get_device_info failed: %d %d", ret, errno); - return false; + context->gpu_id = devInfo545.gpu_id; + context->sector_layout = devInfo545.sector_layout; + context->page_kind_generation = devInfo545.page_kind_generation; + context->generic_page_kind = devInfo545.generic_page_kind; + } else { + struct drm_nvidia_get_dev_info_params devInfo; + int ret = ioctl(fd, DRM_IOCTL_NVIDIA_GET_DEV_INFO, &devInfo); + + if (ret != 0) { + LOG("get_device_info failed: %d %d", ret, errno); + return false; + } + + context->gpu_id = devInfo.gpu_id; + context->sector_layout = devInfo.sector_layout; + context->page_kind_generation = devInfo.page_kind_generation; + context->generic_page_kind = devInfo.generic_page_kind; } return true; @@ -203,7 +226,7 @@ static bool get_device_info(int fd, struct drm_nvidia_get_dev_info_params *devIn bool get_device_uuid(NVDriverContext *context, char uuid[16]) { NV0000_CTRL_GPU_GET_UUID_FROM_GPU_ID_PARAMS uuidParams = { - .gpuId = context->devInfo.gpu_id, + .gpuId = context->gpu_id, .flags = NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID_FLAGS_FORMAT_BINARY | NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID_FLAGS_TYPE_SHA1 }; @@ -221,12 +244,6 @@ bool get_device_uuid(NVDriverContext *context, char uuid[16]) { bool init_nvdriver(NVDriverContext *context, int drmFd) { LOG("Initing nvdriver..."); - if (!get_device_info(drmFd, &context->devInfo)) { - return false; - } - - LOG("Got dev info: %x %x %x %x", context->devInfo.gpu_id, context->devInfo.sector_layout, context->devInfo.page_kind_generation, context->devInfo.generic_page_kind); - int nvctlFd = -1, nv0Fd = -1; nvctlFd = open("/dev/nvidiactl", O_RDWR|O_CLOEXEC); @@ -243,9 +260,16 @@ bool init_nvdriver(NVDriverContext *context, int drmFd) { char *ver = NULL; nv_get_versions(nvctlFd, &ver); context->driverMajorVersion = atoi(ver); - LOG("NVIDIA kernel driver version: %s, major version: %d", ver, context->driverMajorVersion); + context->driverMinorVersion = atoi(ver+4); + LOG("NVIDIA kernel driver version: %s, major version: %d, minor version: %d", ver, context->driverMajorVersion, context->driverMinorVersion); free(ver); + if (!get_device_info(drmFd, context)) { + return false; + } + + LOG("Got dev info: %x %x %x %x", context->gpu_id, context->sector_layout, context->page_kind_generation, context->generic_page_kind); + //allocate the root object bool ret = nv_alloc_object(nvctlFd, context->driverMajorVersion, NULL_OBJECT, NULL_OBJECT, &context->clientObject, NV01_ROOT_CLIENT, 0, (void*)0); if (!ret) { @@ -254,7 +278,7 @@ bool init_nvdriver(NVDriverContext *context, int drmFd) { } //attach the drm fd to this handle - ret = nv_attach_gpus(nvctlFd, context->devInfo.gpu_id); + ret = nv_attach_gpus(nvctlFd, context->gpu_id); if (!ret) { LOG("nv_attach_gpu failed"); goto err; @@ -372,7 +396,7 @@ bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd) { } //attach the new fd to the correct gpus - ret = nv_attach_gpus(nvctlFd2, context->devInfo.gpu_id); + ret = nv_attach_gpus(nvctlFd2, context->gpu_id); if (!ret) { LOG("nv_attach_gpus failed"); goto err; @@ -499,7 +523,7 @@ bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint image->nvFd = memFd; image->nvFd2 = memFd2; //not sure why we can't close this one, we shouldn't need it after importing the image image->drmFd = prime_handle.fd; - image->mods = DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, context->devInfo.sector_layout, context->devInfo.page_kind_generation, context->devInfo.generic_page_kind, log2GobsPerBlockY); + image->mods = DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, context->sector_layout, context->page_kind_generation, context->generic_page_kind, log2GobsPerBlockY); image->offset = 0; image->pitch = widthInBytes; image->memorySize = imageSizeInBytes; diff --git a/src/direct/nv-driver.h b/src/direct/nv-driver.h index eff5ca9..ff62b50 100644 --- a/src/direct/nv-driver.h +++ b/src/direct/nv-driver.h @@ -12,12 +12,16 @@ typedef struct { int nvctlFd; int nv0Fd; int drmFd; - struct drm_nvidia_get_dev_info_params devInfo; uint32_t clientObject; uint32_t deviceObject; uint32_t subdeviceObject; uint32_t driverMajorVersion; + uint32_t driverMinorVersion; //bool hasHugePage; + uint32_t gpu_id; + uint32_t generic_page_kind; + uint32_t page_kind_generation; + uint32_t sector_layout; } NVDriverContext; typedef struct { From ea6d8623d3690cc2efa13e8b5711559226c46ae7 Mon Sep 17 00:00:00 2001 From: Stephen Date: Mon, 6 Nov 2023 19:48:00 +0000 Subject: [PATCH 16/30] Add check for Chromium passing 0 surfaces. --- src/vabackend.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/vabackend.c b/src/vabackend.c index 7f08496..ac267a1 100644 --- a/src/vabackend.c +++ b/src/vabackend.c @@ -1006,6 +1006,11 @@ static VAStatus nvCreateContext( cfg->bitDepth = surface->bitDepth; } + if (drv->surfaceCount == 0 && num_render_targets == 0) { + LOG("0 surfaces have been passed to vaCreateContext, this might cause errors. Setting surface count to 16"); + num_render_targets = 16; + } + CUVIDDECODECREATEINFO vdci = { .ulWidth = vdci.ulMaxWidth = vdci.ulTargetWidth = picture_width, .ulHeight = vdci.ulMaxHeight = vdci.ulTargetHeight = picture_height, From f276397c5392039f541ff11d2ea86b7e77631f7a Mon Sep 17 00:00:00 2001 From: Stephen Date: Sat, 11 Nov 2023 19:08:06 +0000 Subject: [PATCH 17/30] first attempt at single buffer export --- src/direct/direct-export-buf.c | 79 +++++++++++++++++++----- src/direct/nv-driver.c | 108 +++++++++++++++++++++++++++++++-- src/direct/nv-driver.h | 2 + src/vabackend.c | 2 +- src/vabackend.h | 2 + 5 files changed, 171 insertions(+), 22 deletions(-) diff --git a/src/direct/direct-export-buf.c b/src/direct/direct-export-buf.c index 636c554..683364b 100644 --- a/src/direct/direct-export-buf.c +++ b/src/direct/direct-export-buf.c @@ -186,25 +186,75 @@ static BackingImage *direct_allocateBackingImage(NVDriver *drv, const NVSurface p = fmtInfo->plane; LOG("Allocating BackingImages: %p %dx%d", backingImage, surface->width, surface->height); + uint32_t totalSize = 0; for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - alloc_image(&drv->driverContext, surface->width >> p[i].ss.x, surface->height >> p[i].ss.y, - p[i].channelCount, 8 * fmtInfo->bppc, p[i].fourcc, &driverImages[i]); + driverImages[i].offset = totalSize; + + totalSize += calculate_image_size(surface->width >> p[i].ss.x, surface->height >> p[i].ss.y, + p[i].channelCount, 8 * fmtInfo->bppc, &driverImages[i].pitch); + + totalSize = ROUND_UP(totalSize, 64); } - LOG("Importing images"); + backingImage->totalSize = totalSize; + + //alloc memory + // alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes, int *fd1, int *fd2, int *drmFd); + int memFd = 0, memFd2 = 0, drmFd = 0; + bool ret = alloc_buffer(&drv->driverContext, totalSize, driverImages[0].pitch, &memFd, &memFd2, &drmFd); + LOG("Allocate Buffer: %d %d %d %d", ret, memFd, memFd2, drmFd); + + for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { + driverImages[i].width = surface->width >> p[i].ss.x; + driverImages[i].height = surface->height >> p[i].ss.y; + driverImages[i].drmFd = drmFd; + driverImages[i].mods = DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, drv->driverContext.sector_layout, drv->driverContext.page_kind_generation, drv->driverContext.generic_page_kind, 4); + //driverImages[i].memorySize = calculate_image_size(surface->width >> p[i].ss.x, surface->height >> p[i].ss.y, p[i].channelCount, 8 * fmtInfo->bppc, NULL); + driverImages[i].fourcc = p[i].fourcc; + } + + CUDA_EXTERNAL_MEMORY_HANDLE_DESC extMemDesc = { + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + .handle.fd = memFd, + .flags = 0, + .size = totalSize + }; + + LOG("importing memory to CUDA: %d bytes", totalSize); + CHECK_CUDA_RESULT_RETURN(drv->cu->cuImportExternalMemory(&backingImage->extMem, &extMemDesc), false); + + close(memFd); + close(memFd2); + for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - if (!import_to_cuda(drv, &driverImages[i], 8 * fmtInfo->bppc, p[i].channelCount, &backingImage->cudaImages[i], &backingImage->arrays[i])) - goto bail; + CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = { + .arrayDesc = { + .Width = driverImages[i].width, + .Height = driverImages[i].height, + .Depth = 0, + .Format = fmtInfo->bppc == 1 ? CU_AD_FORMAT_UNSIGNED_INT8 : CU_AD_FORMAT_UNSIGNED_INT16, + .NumChannels = p[i].channelCount, + .Flags = 0 + }, + .numLevels = 1, + .offset = driverImages[i].offset + }; + + //create a mimap array from the imported memory + CHECK_CUDA_RESULT_RETURN(drv->cu->cuExternalMemoryGetMappedMipmappedArray(&backingImage->cudaImages[i].mipmapArray, backingImage->extMem, &mipmapArrayDesc), false); + + //create an array from the mipmap array + CHECK_CUDA_RESULT_RETURN(drv->cu->cuMipmappedArrayGetLevel(&backingImage->arrays[i], backingImage->cudaImages[i].mipmapArray, 0), false); } backingImage->width = surface->width; backingImage->height = surface->height; for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - backingImage->fds[i] = driverImages[i].drmFd; + backingImage->fds[i] = drmFd; backingImage->strides[i] = driverImages[i].pitch; backingImage->mods[i] = driverImages[i].mods; - backingImage->size[i] = driverImages[i].memorySize; + backingImage->offsets[i] = driverImages[i].offset; } return backingImage; @@ -247,8 +297,8 @@ static void destroyBackingImage(NVDriver *drv, BackingImage *img) { } CHECK_CUDA_RESULT(drv->cu->cuMipmappedArrayDestroy(img->cudaImages[i].mipmapArray)); - CHECK_CUDA_RESULT(drv->cu->cuDestroyExternalMemory(img->cudaImages[i].extMem)); } + CHECK_CUDA_RESULT(drv->cu->cuDestroyExternalMemory(img->extMem)); memset(img, 0, sizeof(BackingImage)); free(img); @@ -301,7 +351,7 @@ static bool copyFrameToSurface(NVDriver *drv, CUdeviceptr ptr, NVSurface *surfac } else { CHECK_CUDA_RESULT(drv->cu->cuMemcpy2DAsync(&cpy, 0)); } - y += surface->height >> p->ss.y; + y += cpy.Height; } //notify anyone waiting for us to be resolved @@ -357,16 +407,15 @@ static bool direct_fillExportDescriptor(NVDriver *drv, NVSurface *surface, VADRM desc->height = surface->height; desc->num_layers = fmtInfo->numPlanes; - desc->num_objects = fmtInfo->numPlanes; + desc->num_objects = 1; + desc->objects[0].fd = dup(img->fds[0]); + desc->objects[0].size = img->totalSize; + desc->objects[0].drm_format_modifier = img->mods[0]; for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - desc->objects[i].fd = dup(img->fds[i]); - desc->objects[i].size = img->size[i]; - desc->objects[i].drm_format_modifier = img->mods[i]; - desc->layers[i].drm_format = fmtInfo->plane[i].fourcc; desc->layers[i].num_planes = 1; - desc->layers[i].object_index[0] = i; + desc->layers[i].object_index[0] = 0; desc->layers[i].offset[0] = img->offsets[i]; desc->layers[i].pitch[0] = img->strides[i]; } diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index 7724f6c..bfa488e 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -432,7 +432,8 @@ bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd) { return false; } -bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint8_t channels, uint8_t bitsPerChannel, uint32_t fourcc, NVDriverImage *image) { +uint32_t calculate_image_size(uint32_t width, uint32_t height, uint8_t channels, uint8_t bitsPerChannel, uint32_t* widthInBytesOut) +{ uint32_t gobWidthInBytes = 64; uint32_t gobHeightInBytes = 8; @@ -449,12 +450,107 @@ bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint //These two seem to be correct, but it was discovered by trial and error so I'm not 100% sure uint32_t widthInBytes = ROUND_UP(width * bytesPerPixel, gobWidthInBytes << log2GobsPerBlockX); uint32_t alignedHeight = ROUND_UP(height, gobHeightInBytes << log2GobsPerBlockY); - uint32_t imageSizeInBytes = widthInBytes * alignedHeight; - uint32_t size = imageSizeInBytes; LOG("Aligned image size: %dx%d = %d", widthInBytes, alignedHeight, imageSizeInBytes); + if (widthInBytesOut != NULL) { + *widthInBytesOut = widthInBytes; + } + return imageSizeInBytes; +} + +bool alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes, int *fd1, int *fd2, int *drmFd) { + int memFd = -1; + bool ret = alloc_memory(context, size, &memFd); + if (!ret) { + LOG("alloc_memory failed"); + return false; + } + + //now export the dma-buf + uint32_t pitchInBlocks = widthInBytes / 64; //TODO replace with better constants + + //printf("got gobsPerBlock: %ux%u %u %u %u %d\n", width, height, log2GobsPerBlockX, log2GobsPerBlockY, log2GobsPerBlockZ, pitchInBlocks); + //duplicate the fd so we don't invalidate it by importing it + int memFd2 = dup(memFd); + if (memFd2 == -1) { + LOG("dup failed"); + goto err; + } + + struct NvKmsKapiPrivImportMemoryParams nvkmsParams = { + .memFd = memFd2, + .surfaceParams = { + .layout = NvKmsSurfaceMemoryLayoutBlockLinear, + .blockLinear = { + .genericMemory = 0, + .pitchInBlocks = pitchInBlocks, + .log2GobsPerBlock.x = 0, + .log2GobsPerBlock.y = 4, //TODO replace with better constants + .log2GobsPerBlock.z = 0, + } + } + }; + + struct drm_nvidia_gem_import_nvkms_memory_params params = { + .mem_size = size, + .nvkms_params_ptr = (uint64_t) &nvkmsParams, + .nvkms_params_size = context->driverMajorVersion == 470 ? 0x20 : sizeof(nvkmsParams) //needs to be 0x20 in the 470 series driver + }; + int drmret = ioctl(context->drmFd, DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY, ¶ms); + if (drmret != 0) { + LOG("DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY failed: %d %d", drmret, errno); + goto err; + } + + //export dma-buf + struct drm_prime_handle prime_handle = { + .handle = params.handle + }; + drmret = ioctl(context->drmFd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime_handle); + if (drmret != 0) { + LOG("DRM_IOCTL_PRIME_HANDLE_TO_FD failed: %d %d", drmret, errno); + goto err; + } + + struct drm_gem_close gem_close = { + .handle = params.handle + }; + drmret = ioctl(context->drmFd, DRM_IOCTL_GEM_CLOSE, &gem_close); + if (drmret != 0) { + LOG("DRM_IOCTL_GEM_CLOSE failed: %d %d", drmret, errno); + goto prime_err; + } + + *fd1 = memFd; + *fd2 = memFd2; + *drmFd = prime_handle.fd; + return true; + +prime_err: + if (prime_handle.fd > 0) { + close(prime_handle.fd); + } + +err: + if (memFd > 0) { + close(memFd); + } + + return false; +} + +bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint8_t channels, uint8_t bitsPerChannel, uint32_t fourcc, NVDriverImage *image) { + uint32_t gobWidthInBytes = 64; + + uint32_t log2GobsPerBlockX = 0; //TODO not sure if these are the correct numbers to start with, but they're the largest ones i've seen used + uint32_t log2GobsPerBlockY = height < 88 ? 3 : 4; //TODO 88 is a guess, 80px high needs 3, 112px needs 4, 96px needs 4, 88px needs 4 + uint32_t log2GobsPerBlockZ = 0; + + uint32_t widthInBytes = 0; + uint32_t size = calculate_image_size(width, height, channels, bitsPerChannel, &widthInBytes); + //this gets us some memory, and the fd to import into cuda int memFd = -1; bool ret = alloc_memory(context, size, &memFd); @@ -489,7 +585,7 @@ bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint }; struct drm_nvidia_gem_import_nvkms_memory_params params = { - .mem_size = imageSizeInBytes, + .mem_size = size, .nvkms_params_ptr = (uint64_t) &nvkmsParams, .nvkms_params_size = context->driverMajorVersion == 470 ? 0x20 : sizeof(nvkmsParams) //needs to be 0x20 in the 470 series driver }; @@ -526,10 +622,10 @@ bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint image->mods = DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, context->sector_layout, context->page_kind_generation, context->generic_page_kind, log2GobsPerBlockY); image->offset = 0; image->pitch = widthInBytes; - image->memorySize = imageSizeInBytes; + image->memorySize = size; image->fourcc = fourcc; - LOG("created image: %dx%d %lx %d %x", width, height, image->mods, widthInBytes, imageSizeInBytes); + LOG("created image: %dx%d %lx %d %x", width, height, image->mods, widthInBytes, size); return true; diff --git a/src/direct/nv-driver.h b/src/direct/nv-driver.h index ff62b50..494750a 100644 --- a/src/direct/nv-driver.h +++ b/src/direct/nv-driver.h @@ -42,5 +42,7 @@ bool free_nvdriver(NVDriverContext *context); bool get_device_uuid(NVDriverContext *context, char uuid[16]); bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd); bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint8_t channels, uint8_t bytesPerChannel, uint32_t fourcc, NVDriverImage *image); +bool alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes, int *fd1, int *fd2, int *drmFd); +uint32_t calculate_image_size(uint32_t width, uint32_t height, uint8_t channels, uint8_t bitsPerChannel, uint32_t* widthInBytesOut); #endif diff --git a/src/vabackend.c b/src/vabackend.c index ac267a1..ce6299c 100644 --- a/src/vabackend.c +++ b/src/vabackend.c @@ -2039,7 +2039,7 @@ static VAStatus nvExportSurfaceHandle( drv->backend->fillExportDescriptor(drv, surface, ptr); - LOG("Exporting with %d %d %d %d %" PRIx64 " %d %d %" PRIx64, ptr->width, ptr->height, ptr->layers[0].offset[0], + LOG("Exporting with w:%d h:%d o:%d p:%d m:%" PRIx64 " o:%d p:%d m:%" PRIx64, ptr->width, ptr->height, ptr->layers[0].offset[0], ptr->layers[0].pitch[0], ptr->objects[0].drm_format_modifier, ptr->layers[1].offset[0], ptr->layers[1].pitch[0], ptr->objects[1].drm_format_modifier); diff --git a/src/vabackend.h b/src/vabackend.h index 7c2f708..9b61724 100644 --- a/src/vabackend.h +++ b/src/vabackend.h @@ -107,6 +107,8 @@ typedef struct _BackingImage { //direct backend only NVCudaImage cudaImages[3]; NVFormat format; + uint32_t totalSize; + CUexternalMemory extMem; } BackingImage; struct _NVDriver; From 6d3ba1de028aaa3c01966c6c28db91fafa12ecef Mon Sep 17 00:00:00 2001 From: Stephen Date: Sun, 12 Nov 2023 10:10:52 +0000 Subject: [PATCH 18/30] tidied up code --- src/common.h | 21 +++ src/direct/direct-export-buf.c | 161 ++++++++--------------- src/direct/nv-driver.c | 229 ++++++++++----------------------- src/direct/nv-driver.h | 17 ++- src/vabackend.h | 14 +- 5 files changed, 156 insertions(+), 286 deletions(-) create mode 100644 src/common.h diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..3ca0af0 --- /dev/null +++ b/src/common.h @@ -0,0 +1,21 @@ +// +// Created by stephen on 12/11/23. +// + +#ifndef COMMON_H +#define COMMON_H + +typedef struct +{ + uint32_t x; + uint32_t y; +} NVSubSampling; + +typedef struct +{ + uint32_t channelCount; + uint32_t fourcc; + NVSubSampling ss; // subsampling +} NVFormatPlane; + +#endif //COMMON_H diff --git a/src/direct/direct-export-buf.c b/src/direct/direct-export-buf.c index 683364b..523bcf1 100644 --- a/src/direct/direct-export-buf.c +++ b/src/direct/direct-export-buf.c @@ -14,6 +14,8 @@ #include #include +static void destroyBackingImage(NVDriver *drv, BackingImage *img); + static void findGPUIndexFromFd(NVDriver *drv) { //find the CUDA device id char drmUuid[16]; @@ -45,7 +47,7 @@ static void debug(EGLenum error,const char *command,EGLint messageType,EGLLabelK static bool direct_initExporter(NVDriver *drv) { //this is only needed to see errors in firefox static const EGLAttrib debugAttribs[] = {EGL_DEBUG_MSG_WARN_KHR, EGL_TRUE, EGL_DEBUG_MSG_INFO_KHR, EGL_TRUE, EGL_NONE}; - PFNEGLDEBUGMESSAGECONTROLKHRPROC eglDebugMessageControlKHR = (PFNEGLDEBUGMESSAGECONTROLKHRPROC) eglGetProcAddress("eglDebugMessageControlKHR"); + const PFNEGLDEBUGMESSAGECONTROLKHRPROC eglDebugMessageControlKHR = (PFNEGLDEBUGMESSAGECONTROLKHRPROC) eglGetProcAddress("eglDebugMessageControlKHR"); eglDebugMessageControlKHR(debug, debugAttribs); //make sure we have a drm fd @@ -56,7 +58,7 @@ static bool direct_initExporter(NVDriver *drv) { nvdGpu = 0; } - int fd = -1; + int fd; int nvIdx = 0; uint8_t drmIdx = 128; char node[20] = {0, }; @@ -79,7 +81,7 @@ static bool direct_initExporter(NVDriver *drv) { continue; } break; - } while (fd != -1); + } while (drmIdx < 16); drv->drmFd = fd; LOG("Found NVIDIA GPU %d at %s", nvdGpu, node); @@ -92,7 +94,7 @@ static bool direct_initExporter(NVDriver *drv) { drv->drmFd = dup(drv->drmFd); } - bool ret = init_nvdriver(&drv->driverContext, drv->drmFd); + const bool ret = init_nvdriver(&drv->driverContext, drv->drmFd); //TODO this isn't really correct as we don't know if the driver version actually supports importing them //but we don't have an easy way to find out. @@ -107,50 +109,8 @@ static void direct_releaseExporter(NVDriver *drv) { free_nvdriver(&drv->driverContext); } -static bool import_to_cuda(NVDriver *drv, NVDriverImage *image, int bpc, int channels, NVCudaImage *cudaImage, CUarray *array) { - CUDA_EXTERNAL_MEMORY_HANDLE_DESC extMemDesc = { - .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, - .handle.fd = image->nvFd, - .flags = 0, - .size = image->memorySize - }; - - LOG("importing memory size: %dx%d = %x", image->width, image->height, image->memorySize); - - CHECK_CUDA_RESULT_RETURN(drv->cu->cuImportExternalMemory(&cudaImage->extMem, &extMemDesc), false); - - //For some reason, this close *must* be *here*, otherwise we will get random visual glitches. - close(image->nvFd); - close(image->nvFd2); - image->nvFd = 0; - image->nvFd2 = 0; - - CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = { - .arrayDesc = { - .Width = image->width, - .Height = image->height, - .Depth = 0, - .Format = bpc == 8 ? CU_AD_FORMAT_UNSIGNED_INT8 : CU_AD_FORMAT_UNSIGNED_INT16, - .NumChannels = channels, - .Flags = 0 - }, - .numLevels = 1, - .offset = 0 - }; - //create a mimap array from the imported memory - CHECK_CUDA_RESULT_RETURN(drv->cu->cuExternalMemoryGetMappedMipmappedArray(&cudaImage->mipmapArray, cudaImage->extMem, &mipmapArrayDesc), false); - - //create an array from the mipmap array - CHECK_CUDA_RESULT_RETURN(drv->cu->cuMipmappedArrayGetLevel(array, cudaImage->mipmapArray, 0), false); - - return true; -} - -static BackingImage *direct_allocateBackingImage(NVDriver *drv, const NVSurface *surface) { +static BackingImage *direct_allocateBackingImage(NVDriver *drv, NVSurface *surface) { NVDriverImage driverImages[3] = { 0 }; - const NVFormatInfo *fmtInfo; - const NVFormatPlane *p; - BackingImage *backingImage = calloc(1, sizeof(BackingImage)); switch (surface->format) @@ -182,50 +142,39 @@ static BackingImage *direct_allocateBackingImage(NVDriver *drv, const NVSurface break; } - fmtInfo = &formatsInfo[backingImage->format]; - p = fmtInfo->plane; - - LOG("Allocating BackingImages: %p %dx%d", backingImage, surface->width, surface->height); - uint32_t totalSize = 0; - for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - driverImages[i].offset = totalSize; + const NVFormatInfo *fmtInfo = &formatsInfo[backingImage->format]; - totalSize += calculate_image_size(surface->width >> p[i].ss.x, surface->height >> p[i].ss.y, - p[i].channelCount, 8 * fmtInfo->bppc, &driverImages[i].pitch); + backingImage->totalSize = calculate_image_size(&drv->driverContext, driverImages, surface->width, surface->height, fmtInfo->bppc, fmtInfo->numPlanes, fmtInfo->plane); + LOG("Allocating BackingImage: %p %ux%u = %u bytes", backingImage, surface->width, surface->height, backingImage->totalSize); - totalSize = ROUND_UP(totalSize, 64); - } - - backingImage->totalSize = totalSize; - - //alloc memory - // alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes, int *fd1, int *fd2, int *drmFd); + //alloc memory - Note this requires that all the planes have the same widthInBytes + //otherwise the value passed to the kernel driver won't be correct, luckily all the formats + //we currently support are all the same width int memFd = 0, memFd2 = 0, drmFd = 0; - bool ret = alloc_buffer(&drv->driverContext, totalSize, driverImages[0].pitch, &memFd, &memFd2, &drmFd); - LOG("Allocate Buffer: %d %d %d %d", ret, memFd, memFd2, drmFd); - - for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - driverImages[i].width = surface->width >> p[i].ss.x; - driverImages[i].height = surface->height >> p[i].ss.y; - driverImages[i].drmFd = drmFd; - driverImages[i].mods = DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, drv->driverContext.sector_layout, drv->driverContext.page_kind_generation, drv->driverContext.generic_page_kind, 4); - //driverImages[i].memorySize = calculate_image_size(surface->width >> p[i].ss.x, surface->height >> p[i].ss.y, p[i].channelCount, 8 * fmtInfo->bppc, NULL); - driverImages[i].fourcc = p[i].fourcc; + if (!alloc_buffer(&drv->driverContext, backingImage->totalSize, driverImages, &memFd, &memFd2, &drmFd)) { + goto import_fail; } + LOG("Allocate Buffer: %d %d %d", memFd, memFd2, drmFd); - CUDA_EXTERNAL_MEMORY_HANDLE_DESC extMemDesc = { + //import the memory to CUDA + const CUDA_EXTERNAL_MEMORY_HANDLE_DESC extMemDesc = { .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, .handle.fd = memFd, .flags = 0, - .size = totalSize + .size = backingImage->totalSize }; - LOG("importing memory to CUDA: %d bytes", totalSize); - CHECK_CUDA_RESULT_RETURN(drv->cu->cuImportExternalMemory(&backingImage->extMem, &extMemDesc), false); + LOG("Importing memory to CUDA"); + if (CHECK_CUDA_RESULT(drv->cu->cuImportExternalMemory(&backingImage->extMem, &extMemDesc))) { + goto import_fail; + } close(memFd); close(memFd2); + memFd = -1; + memFd2 = -1; + //now map the arrays for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = { .arrayDesc = { @@ -233,7 +182,7 @@ static BackingImage *direct_allocateBackingImage(NVDriver *drv, const NVSurface .Height = driverImages[i].height, .Depth = 0, .Format = fmtInfo->bppc == 1 ? CU_AD_FORMAT_UNSIGNED_INT8 : CU_AD_FORMAT_UNSIGNED_INT16, - .NumChannels = p[i].channelCount, + .NumChannels = fmtInfo->plane[i].channelCount, .Flags = 0 }, .numLevels = 1, @@ -241,17 +190,20 @@ static BackingImage *direct_allocateBackingImage(NVDriver *drv, const NVSurface }; //create a mimap array from the imported memory - CHECK_CUDA_RESULT_RETURN(drv->cu->cuExternalMemoryGetMappedMipmappedArray(&backingImage->cudaImages[i].mipmapArray, backingImage->extMem, &mipmapArrayDesc), false); + if (CHECK_CUDA_RESULT(drv->cu->cuExternalMemoryGetMappedMipmappedArray(&backingImage->cudaImages[i].mipmapArray, backingImage->extMem, &mipmapArrayDesc))) { + goto bail; + } //create an array from the mipmap array - CHECK_CUDA_RESULT_RETURN(drv->cu->cuMipmappedArrayGetLevel(&backingImage->arrays[i], backingImage->cudaImages[i].mipmapArray, 0), false); + if (CHECK_CUDA_RESULT(drv->cu->cuMipmappedArrayGetLevel(&backingImage->arrays[i], backingImage->cudaImages[i].mipmapArray, 0))) { + goto bail; + } } backingImage->width = surface->width; backingImage->height = surface->height; - + backingImage->fds[0] = drmFd; for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - backingImage->fds[i] = drmFd; backingImage->strides[i] = driverImages[i].pitch; backingImage->mods[i] = driverImages[i].mods; backingImage->offsets[i] = driverImages[i].offset; @@ -260,27 +212,24 @@ static BackingImage *direct_allocateBackingImage(NVDriver *drv, const NVSurface return backingImage; bail: - for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { - if (driverImages[i].nvFd != 0) { - close(driverImages[i].nvFd); - } - if (driverImages[i].nvFd2 != 0) { - close(driverImages[i].nvFd2); - } - if (driverImages[i].drmFd != 0) { - close(driverImages[i].drmFd); - } - } + destroyBackingImage(drv, backingImage); - if (backingImage != NULL) { - free(backingImage); +import_fail: + if (memFd != 0) { + close(memFd); + } + if (memFd != 0) { + close(memFd); } + if (drmFd != 0) { + close(drmFd); + } + free(backingImage); return NULL; } static void destroyBackingImage(NVDriver *drv, BackingImage *img) { const NVFormatInfo *fmtInfo = &formatsInfo[img->format]; - LOG("Destroying BackingImage: %p", img); if (img->surface != NULL) { img->surface->backingImage = NULL; } @@ -296,9 +245,13 @@ static void destroyBackingImage(NVDriver *drv, BackingImage *img) { CHECK_CUDA_RESULT(drv->cu->cuArrayDestroy(img->arrays[i])); } - CHECK_CUDA_RESULT(drv->cu->cuMipmappedArrayDestroy(img->cudaImages[i].mipmapArray)); + if (img->cudaImages[i].mipmapArray != NULL) { + CHECK_CUDA_RESULT(drv->cu->cuMipmappedArrayDestroy(img->cudaImages[i].mipmapArray)); + } + } + if (img->extMem != NULL) { + CHECK_CUDA_RESULT(drv->cu->cuDestroyExternalMemory(img->extMem)); } - CHECK_CUDA_RESULT(drv->cu->cuDestroyExternalMemory(img->extMem)); memset(img, 0, sizeof(BackingImage)); free(img); @@ -330,8 +283,7 @@ static void direct_destroyAllBackingImage(NVDriver *drv) { } static bool copyFrameToSurface(NVDriver *drv, CUdeviceptr ptr, NVSurface *surface, uint32_t pitch) { - BackingImage *img = surface->backingImage; - const NVFormatInfo *fmtInfo = &formatsInfo[img->format]; + const NVFormatInfo *fmtInfo = &formatsInfo[surface->backingImage->format]; uint32_t y = 0; for (uint32_t i = 0; i < fmtInfo->numPlanes; i++) { @@ -388,10 +340,9 @@ static bool direct_exportCudaPtr(NVDriver *drv, CUdeviceptr ptr, NVSurface *surf return false; } - if (ptr != 0 && !copyFrameToSurface(drv, ptr, surface, pitch)) { - LOG("Unable to update surface from frame"); - return false; - } else if (ptr == 0) { + if (ptr != 0) { + copyFrameToSurface(drv, ptr, surface, pitch); + } else { LOG("exporting with null ptr"); } @@ -399,7 +350,7 @@ static bool direct_exportCudaPtr(NVDriver *drv, CUdeviceptr ptr, NVSurface *surf } static bool direct_fillExportDescriptor(NVDriver *drv, NVSurface *surface, VADRMPRIMESurfaceDescriptor *desc) { - BackingImage *img = surface->backingImage; + const BackingImage *img = surface->backingImage; const NVFormatInfo *fmtInfo = &formatsInfo[img->format]; desc->fourcc = fmtInfo->fourcc; diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index bfa488e..2b7bddf 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -15,10 +15,14 @@ #include #include "../vabackend.h" +//Technically these can vary per architecture, but all the ones we support have the same values +#define GOB_WIDTH_IN_BYTES 64 +#define GOB_HEIGHT_IN_BYTES 8 + static const NvHandle NULL_OBJECT; -static bool nv_alloc_object(int fd, int driverMajorVersion, NvHandle hRoot, NvHandle hObjectParent, NvHandle* hObjectNew, - NvV32 hClass, uint32_t paramSize, void* params) { +static bool nv_alloc_object(const int fd, const int driverMajorVersion, const NvHandle hRoot, const NvHandle hObjectParent, + NvHandle* hObjectNew,const NvV32 hClass, const uint32_t paramSize, void* params) { NVOS64_PARAMETERS alloc = { .hRoot = hRoot, .hObjectParent = hObjectParent, @@ -43,7 +47,7 @@ static bool nv_alloc_object(int fd, int driverMajorVersion, NvHandle hRoot, NvHa size -= 8; } - int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_RM_ALLOC, size), &alloc); + const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_RM_ALLOC, size), &alloc); //this structure changed over the versions, make sure we read the status from the correct place //luckily the two new fields are the same width as the status field, so we can just read from that directly @@ -66,7 +70,7 @@ static bool nv_alloc_object(int fd, int driverMajorVersion, NvHandle hRoot, NvHa return true; } -static bool nv_free_object(int fd, NvHandle hRoot, NvHandle hObject) { +static bool nv_free_object(const int fd, const NvHandle hRoot, const NvHandle hObject) { if (hObject == 0) { return true; } @@ -77,7 +81,7 @@ static bool nv_free_object(int fd, NvHandle hRoot, NvHandle hObject) { .hObjectOld = hObject }; - int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_RM_FREE, sizeof(NVOS00_PARAMETERS)), &freeParams); + const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_RM_FREE, sizeof(NVOS00_PARAMETERS)), &freeParams); if (ret != 0 || freeParams.status != NV_OK) { LOG("nv_free_object failed: %d %X %d", ret, freeParams.status, errno); @@ -87,7 +91,8 @@ static bool nv_free_object(int fd, NvHandle hRoot, NvHandle hObject) { return true; } -static bool nv_rm_control(int fd, NvHandle hClient, NvHandle hObject, NvV32 cmd, NvU32 flags, int paramSize, void* params) { +static bool nv_rm_control(const int fd, const NvHandle hClient, const NvHandle hObject, const NvV32 cmd, + const NvU32 flags, const int paramSize, void* params) { NVOS54_PARAMETERS control = { .hClient = hClient, .hObject = hObject, @@ -97,7 +102,7 @@ static bool nv_rm_control(int fd, NvHandle hClient, NvHandle hObject, NvV32 cmd, .paramsSize = paramSize }; - int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_RM_CONTROL, sizeof(NVOS54_PARAMETERS)), &control); + const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_RM_CONTROL, sizeof(NVOS54_PARAMETERS)), &control); if (ret != 0 || control.status != NV_OK) { LOG("nv_rm_control failed: %d %X %d", ret, control.status, errno); @@ -134,18 +139,19 @@ static bool nv_card_info(int fd, nv_ioctl_card_info_t (*card_info)[32]) { } #endif -static bool nv_attach_gpus(int fd, int gpu) { - int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_ATTACH_GPUS_TO_FD, sizeof(gpu)), &gpu); +static bool nv_attach_gpus(const int fd, int gpu) { + const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_ATTACH_GPUS_TO_FD, sizeof(gpu)), &gpu); if (ret != 0) { LOG("nv_attach_gpus failed: %d %d", ret, errno); return false; } - return ret == 0; + return true; } -static bool nv_export_object_to_fd(int fd, int export_fd, NvHandle hClient, NvHandle hDevice, NvHandle hParent, NvHandle hObject) { +static bool nv_export_object_to_fd(const int fd, const int export_fd, const NvHandle hClient, const NvHandle hDevice, + const NvHandle hParent,const NvHandle hObject) { NV0000_CTRL_OS_UNIX_EXPORT_OBJECT_TO_FD_PARAMS params = { .fd = export_fd, .flags = 0, @@ -162,12 +168,12 @@ static bool nv_export_object_to_fd(int fd, int export_fd, NvHandle hClient, NvHa return nv_rm_control(fd, hClient, hClient, NV0000_CTRL_CMD_OS_UNIX_EXPORT_OBJECT_TO_FD, 0, sizeof(params), ¶ms); } -static bool nv_get_versions(int fd, char **versionString) { +static bool nv_get_versions(const int fd, char **versionString) { nv_ioctl_rm_api_version_t obj = { .cmd = '2' //query }; - int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_CHECK_VERSION_STR, sizeof(obj)), &obj); + const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_CHECK_VERSION_STR, sizeof(obj)), &obj); if (ret != 0) { LOG("nv_check_version failed: %d %d", ret, errno); @@ -179,8 +185,8 @@ static bool nv_get_versions(int fd, char **versionString) { return obj.reply == NV_RM_API_VERSION_REPLY_RECOGNIZED; } -static bool nv0_register_fd(int nv0_fd, int nvctl_fd) { - int ret = ioctl(nv0_fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_REGISTER_FD, sizeof(int)), &nvctl_fd); +static bool nv0_register_fd(const int nv0_fd, int nvctl_fd) { + const int ret = ioctl(nv0_fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_REGISTER_FD, sizeof(int)), &nvctl_fd); if (ret != 0) { LOG("nv0_register_fd failed: %d %d", ret, errno); @@ -190,12 +196,12 @@ static bool nv0_register_fd(int nv0_fd, int nvctl_fd) { return true; } -static bool get_device_info(int fd, NVDriverContext *context) { +static bool get_device_info(const int fd, NVDriverContext *context) { //NVIDIA driver v545.29.02 changed the devInfo struct, and partly broke it in the process //...who adds a field to the middle of an existing struct.... if (context->driverMajorVersion >= 545 && context->driverMinorVersion >= 29) { struct drm_nvidia_get_dev_info_params_545 devInfo545; - int ret = ioctl(fd, DRM_IOCTL_NVIDIA_GET_DEV_INFO_545, &devInfo545); + const int ret = ioctl(fd, DRM_IOCTL_NVIDIA_GET_DEV_INFO_545, &devInfo545); if (ret != 0) { LOG("get_device_info failed: %d %d", ret, errno); @@ -208,7 +214,7 @@ static bool get_device_info(int fd, NVDriverContext *context) { context->generic_page_kind = devInfo545.generic_page_kind; } else { struct drm_nvidia_get_dev_info_params devInfo; - int ret = ioctl(fd, DRM_IOCTL_NVIDIA_GET_DEV_INFO, &devInfo); + const int ret = ioctl(fd, DRM_IOCTL_NVIDIA_GET_DEV_INFO, &devInfo); if (ret != 0) { LOG("get_device_info failed: %d %d", ret, errno); @@ -224,13 +230,13 @@ static bool get_device_info(int fd, NVDriverContext *context) { return true; } -bool get_device_uuid(NVDriverContext *context, char uuid[16]) { +bool get_device_uuid(const NVDriverContext *context, char uuid[16]) { NV0000_CTRL_GPU_GET_UUID_FROM_GPU_ID_PARAMS uuidParams = { .gpuId = context->gpu_id, .flags = NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID_FLAGS_FORMAT_BINARY | NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID_FLAGS_TYPE_SHA1 }; - int ret = nv_rm_control(context->nvctlFd, context->clientObject, context->clientObject, NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID, 0, sizeof(uuidParams), &uuidParams); + const int ret = nv_rm_control(context->nvctlFd, context->clientObject, context->clientObject, NV0000_CTRL_CMD_GPU_GET_UUID_FROM_GPU_ID, 0, sizeof(uuidParams), &uuidParams); if (ret) { return false; } @@ -242,7 +248,7 @@ bool get_device_uuid(NVDriverContext *context, char uuid[16]) { return true; } -bool init_nvdriver(NVDriverContext *context, int drmFd) { +bool init_nvdriver(NVDriverContext *context, const int drmFd) { LOG("Initing nvdriver..."); int nvctlFd = -1, nv0Fd = -1; @@ -358,7 +364,7 @@ bool free_nvdriver(NVDriverContext *context) { return true; } -bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd) { +bool alloc_memory(const NVDriverContext *context, const uint32_t size, int *fd) { //allocate the buffer int nvctlFd2 = -1; NvHandle bufferObject = {0}; @@ -431,49 +437,55 @@ bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd) { return false; } - -uint32_t calculate_image_size(uint32_t width, uint32_t height, uint8_t channels, uint8_t bitsPerChannel, uint32_t* widthInBytesOut) -{ - uint32_t gobWidthInBytes = 64; - uint32_t gobHeightInBytes = 8; - - uint32_t bytesPerChannel = bitsPerChannel/8; - uint32_t bytesPerPixel = channels * bytesPerChannel; - +uint32_t calculate_image_size(const NVDriverContext *context, NVDriverImage images[], const uint32_t width, const uint32_t height, + const uint32_t bppc, const uint32_t numPlanes, const NVFormatPlane planes[]) { //first figure out the gob layout - uint32_t log2GobsPerBlockX = 0; //TODO not sure if these are the correct numbers to start with, but they're the largest ones i've seen used - uint32_t log2GobsPerBlockY = height < 88 ? 3 : 4; //TODO 88 is a guess, 80px high needs 3, 112px needs 4, 96px needs 4, 88px needs 4 - uint32_t log2GobsPerBlockZ = 0; - - LOG("Calculated GOB size: %dx%d (%dx%d)", gobWidthInBytes << log2GobsPerBlockX, gobHeightInBytes << log2GobsPerBlockY, log2GobsPerBlockX, log2GobsPerBlockY); - - //These two seem to be correct, but it was discovered by trial and error so I'm not 100% sure - uint32_t widthInBytes = ROUND_UP(width * bytesPerPixel, gobWidthInBytes << log2GobsPerBlockX); - uint32_t alignedHeight = ROUND_UP(height, gobHeightInBytes << log2GobsPerBlockY); - uint32_t imageSizeInBytes = widthInBytes * alignedHeight; - - LOG("Aligned image size: %dx%d = %d", widthInBytes, alignedHeight, imageSizeInBytes); - - if (widthInBytesOut != NULL) { - *widthInBytesOut = widthInBytes; - } - return imageSizeInBytes; + const uint32_t log2GobsPerBlockX = 0; + const uint32_t log2GobsPerBlockZ = 0; + + uint32_t offset = 0; + for (uint32_t i = 0; i < numPlanes; i++) { + //calculate each planes dimensions and bpp + const uint32_t planeWidth = width >> planes[i].ss.x; + const uint32_t planeHeight = height >> planes[i].ss.y; + const uint32_t bytesPerPixel = planes[i].channelCount * bppc; + + const uint32_t log2GobsPerBlockY = planeHeight < 88 ? 3 : 4; //TODO 88 is a guess, 80px high needs 3, 112px needs 4, 96px needs 4, 88px needs 4 + + LOG("Calculated GOB size: %dx%d (%dx%d)", GOB_WIDTH_IN_BYTES << log2GobsPerBlockX, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY, log2GobsPerBlockX, log2GobsPerBlockY); + + //These two seem to be correct, but it was discovered by trial and error so I'm not 100% sure + const uint32_t widthInBytes = ROUND_UP(planeWidth * bytesPerPixel, GOB_WIDTH_IN_BYTES << log2GobsPerBlockX); + const uint32_t alignedHeight = ROUND_UP(planeHeight, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY); + images[i].width = planeWidth; + images[i].height = planeHeight; + images[i].offset = offset; + images[i].memorySize = widthInBytes * alignedHeight; + images[i].pitch = widthInBytes; + images[i].mods = DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, context->sector_layout, context->page_kind_generation, context->generic_page_kind, log2GobsPerBlockY); + images[i].fourcc = planes[i].fourcc; + images[i].log2GobsPerBlockX = log2GobsPerBlockX; + images[i].log2GobsPerBlockY = log2GobsPerBlockY; + images[i].log2GobsPerBlockZ = log2GobsPerBlockZ; + + offset += images[i].memorySize; + offset = ROUND_UP(offset, 64); + } + + return offset; } -bool alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes, int *fd1, int *fd2, int *drmFd) { +bool alloc_buffer(NVDriverContext *context, const uint32_t size, const NVDriverImage images[], int *fd1, int *fd2, int *drmFd) { int memFd = -1; - bool ret = alloc_memory(context, size, &memFd); + const bool ret = alloc_memory(context, size, &memFd); if (!ret) { LOG("alloc_memory failed"); return false; } //now export the dma-buf - uint32_t pitchInBlocks = widthInBytes / 64; //TODO replace with better constants - - //printf("got gobsPerBlock: %ux%u %u %u %u %d\n", width, height, log2GobsPerBlockX, log2GobsPerBlockY, log2GobsPerBlockZ, pitchInBlocks); //duplicate the fd so we don't invalidate it by importing it - int memFd2 = dup(memFd); + const int memFd2 = dup(memFd); if (memFd2 == -1) { LOG("dup failed"); goto err; @@ -485,10 +497,10 @@ bool alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes .layout = NvKmsSurfaceMemoryLayoutBlockLinear, .blockLinear = { .genericMemory = 0, - .pitchInBlocks = pitchInBlocks, - .log2GobsPerBlock.x = 0, - .log2GobsPerBlock.y = 4, //TODO replace with better constants - .log2GobsPerBlock.z = 0, + .pitchInBlocks = images[0].pitch / GOB_WIDTH_IN_BYTES, + .log2GobsPerBlock.x = images[0].log2GobsPerBlockX, + .log2GobsPerBlock.y = images[0].log2GobsPerBlockY, + .log2GobsPerBlock.z = images[0].log2GobsPerBlockZ, } } }; @@ -540,104 +552,3 @@ bool alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes return false; } - -bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint8_t channels, uint8_t bitsPerChannel, uint32_t fourcc, NVDriverImage *image) { - uint32_t gobWidthInBytes = 64; - - uint32_t log2GobsPerBlockX = 0; //TODO not sure if these are the correct numbers to start with, but they're the largest ones i've seen used - uint32_t log2GobsPerBlockY = height < 88 ? 3 : 4; //TODO 88 is a guess, 80px high needs 3, 112px needs 4, 96px needs 4, 88px needs 4 - uint32_t log2GobsPerBlockZ = 0; - - uint32_t widthInBytes = 0; - uint32_t size = calculate_image_size(width, height, channels, bitsPerChannel, &widthInBytes); - - //this gets us some memory, and the fd to import into cuda - int memFd = -1; - bool ret = alloc_memory(context, size, &memFd); - if (!ret) { - LOG("alloc_memory failed"); - return false; - } - - //now export the dma-buf - uint32_t pitchInBlocks = widthInBytes / (gobWidthInBytes << log2GobsPerBlockX); - - //printf("got gobsPerBlock: %ux%u %u %u %u %d\n", width, height, log2GobsPerBlockX, log2GobsPerBlockY, log2GobsPerBlockZ, pitchInBlocks); - //duplicate the fd so we don't invalidate it by importing it - int memFd2 = dup(memFd); - if (memFd2 == -1) { - LOG("dup failed"); - goto err; - } - - struct NvKmsKapiPrivImportMemoryParams nvkmsParams = { - .memFd = memFd2, - .surfaceParams = { - .layout = NvKmsSurfaceMemoryLayoutBlockLinear, - .blockLinear = { - .genericMemory = 0, - .pitchInBlocks = pitchInBlocks, - .log2GobsPerBlock.x = log2GobsPerBlockX, - .log2GobsPerBlock.y = log2GobsPerBlockY, - .log2GobsPerBlock.z = log2GobsPerBlockZ, - } - } - }; - - struct drm_nvidia_gem_import_nvkms_memory_params params = { - .mem_size = size, - .nvkms_params_ptr = (uint64_t) &nvkmsParams, - .nvkms_params_size = context->driverMajorVersion == 470 ? 0x20 : sizeof(nvkmsParams) //needs to be 0x20 in the 470 series driver - }; - int drmret = ioctl(context->drmFd, DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY, ¶ms); - if (drmret != 0) { - LOG("DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY failed: %d %d", drmret, errno); - goto err; - } - - //export dma-buf - struct drm_prime_handle prime_handle = { - .handle = params.handle - }; - drmret = ioctl(context->drmFd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime_handle); - if (drmret != 0) { - LOG("DRM_IOCTL_PRIME_HANDLE_TO_FD failed: %d %d", drmret, errno); - goto err; - } - - struct drm_gem_close gem_close = { - .handle = params.handle - }; - drmret = ioctl(context->drmFd, DRM_IOCTL_GEM_CLOSE, &gem_close); - if (drmret != 0) { - LOG("DRM_IOCTL_GEM_CLOSE failed: %d %d", drmret, errno); - goto prime_err; - } - - image->width = width; - image->height = height; - image->nvFd = memFd; - image->nvFd2 = memFd2; //not sure why we can't close this one, we shouldn't need it after importing the image - image->drmFd = prime_handle.fd; - image->mods = DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, context->sector_layout, context->page_kind_generation, context->generic_page_kind, log2GobsPerBlockY); - image->offset = 0; - image->pitch = widthInBytes; - image->memorySize = size; - image->fourcc = fourcc; - - LOG("created image: %dx%d %lx %d %x", width, height, image->mods, widthInBytes, size); - - return true; - -prime_err: - if (prime_handle.fd > 0) { - close(prime_handle.fd); - } - -err: - if (memFd > 0) { - close(memFd); - } - - return false; -} diff --git a/src/direct/nv-driver.h b/src/direct/nv-driver.h index 494750a..25007e2 100644 --- a/src/direct/nv-driver.h +++ b/src/direct/nv-driver.h @@ -4,6 +4,7 @@ #include #include +#include "../common.h" #include "nvidia-drm-ioctl.h" #define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) @@ -25,9 +26,6 @@ typedef struct { } NVDriverContext; typedef struct { - int nvFd; - int nvFd2; - int drmFd; uint32_t width; uint32_t height; uint64_t mods; @@ -35,14 +33,15 @@ typedef struct { uint32_t offset; uint32_t pitch; uint32_t fourcc; + uint32_t log2GobsPerBlockX; + uint32_t log2GobsPerBlockY; + uint32_t log2GobsPerBlockZ; } NVDriverImage; bool init_nvdriver(NVDriverContext *context, int drmFd); bool free_nvdriver(NVDriverContext *context); -bool get_device_uuid(NVDriverContext *context, char uuid[16]); -bool alloc_memory(NVDriverContext *context, uint32_t size, int *fd); -bool alloc_image(NVDriverContext *context, uint32_t width, uint32_t height, uint8_t channels, uint8_t bytesPerChannel, uint32_t fourcc, NVDriverImage *image); -bool alloc_buffer(NVDriverContext *context, uint32_t size, uint32_t widthInBytes, int *fd1, int *fd2, int *drmFd); -uint32_t calculate_image_size(uint32_t width, uint32_t height, uint8_t channels, uint8_t bitsPerChannel, uint32_t* widthInBytesOut); - +bool get_device_uuid(const NVDriverContext *context, char uuid[16]); +bool alloc_memory(const NVDriverContext *context, uint32_t size, int *fd); +bool alloc_buffer(NVDriverContext *context, uint32_t size, const NVDriverImage images[], int *fd1, int *fd2, int *drmFd); +uint32_t calculate_image_size(const NVDriverContext *context, NVDriverImage images[], uint32_t width, uint32_t height, uint32_t bppc, uint32_t numPlanes, const NVFormatPlane planes[]); #endif diff --git a/src/vabackend.h b/src/vabackend.h index 9b61724..4b590cb 100644 --- a/src/vabackend.h +++ b/src/vabackend.h @@ -11,6 +11,7 @@ #include #include "list.h" #include "direct/nv-driver.h" +#include "common.h" #define SURFACE_QUEUE_SIZE 16 #define MAX_IMAGE_COUNT 64 @@ -205,19 +206,6 @@ struct _NVCodec { typedef struct _NVCodec NVCodec; -typedef struct -{ - uint32_t x; - uint32_t y; -} NVSubSampling; - -typedef struct -{ - uint32_t channelCount; - uint32_t fourcc; - NVSubSampling ss; // subsampling -} NVFormatPlane; - typedef struct { uint32_t bppc; // bytes per pixel per channel From ecbb8955d2c89d6a89fb656358437b1cafaa22f4 Mon Sep 17 00:00:00 2001 From: Stephen Date: Thu, 23 Nov 2023 07:57:26 +0000 Subject: [PATCH 19/30] fixed issue with single buffer export for small (144p/160p) videos having visual artifacts --- src/direct/nv-driver.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index 2b7bddf..f527a8a 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -13,6 +13,8 @@ #include "nv-driver.h" #include +#include + #include "../vabackend.h" //Technically these can vary per architecture, but all the ones we support have the same values @@ -442,6 +444,7 @@ uint32_t calculate_image_size(const NVDriverContext *context, NVDriverImage imag //first figure out the gob layout const uint32_t log2GobsPerBlockX = 0; const uint32_t log2GobsPerBlockZ = 0; + const uint32_t log2GobsPerBlockY = 4; uint32_t offset = 0; for (uint32_t i = 0; i < numPlanes; i++) { @@ -450,15 +453,19 @@ uint32_t calculate_image_size(const NVDriverContext *context, NVDriverImage imag const uint32_t planeHeight = height >> planes[i].ss.y; const uint32_t bytesPerPixel = planes[i].channelCount * bppc; - const uint32_t log2GobsPerBlockY = planeHeight < 88 ? 3 : 4; //TODO 88 is a guess, 80px high needs 3, 112px needs 4, 96px needs 4, 88px needs 4 - - LOG("Calculated GOB size: %dx%d (%dx%d)", GOB_WIDTH_IN_BYTES << log2GobsPerBlockX, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY, log2GobsPerBlockX, log2GobsPerBlockY); + //Depending on the height of the allocated image, the modifiers + //needed for the exported image to work correctly change. However this can cause problems if the Y surface + //needs one modifier, and UV need another when attempting to use a single surface export (as only one modifier + //is possible). So for now we're just going to limit the minimum height to 88 pixels so we can use a single + //modifier. + //const uint32_t log2GobsPerBlockY = planeHeight < 88 ? 3 : 4; + //LOG("Calculated GOB size: %dx%d (%dx%d)", GOB_WIDTH_IN_BYTES << log2GobsPerBlockX, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY, log2GobsPerBlockX, log2GobsPerBlockY); //These two seem to be correct, but it was discovered by trial and error so I'm not 100% sure const uint32_t widthInBytes = ROUND_UP(planeWidth * bytesPerPixel, GOB_WIDTH_IN_BYTES << log2GobsPerBlockX); - const uint32_t alignedHeight = ROUND_UP(planeHeight, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY); + const uint32_t alignedHeight = MAX(ROUND_UP(planeHeight, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY), 88); images[i].width = planeWidth; - images[i].height = planeHeight; + images[i].height = alignedHeight; images[i].offset = offset; images[i].memorySize = widthInBytes * alignedHeight; images[i].pitch = widthInBytes; From 2bfe04465b79591b65f387104273d6a0a6a1ad88 Mon Sep 17 00:00:00 2001 From: Stephen Date: Sun, 17 Dec 2023 09:06:55 +0000 Subject: [PATCH 20/30] change default backend to direct and add egl to NVD_BACKEND options --- src/vabackend.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/vabackend.c b/src/vabackend.c index ce6299c..c62b6bc 100644 --- a/src/vabackend.c +++ b/src/vabackend.c @@ -44,7 +44,7 @@ static FILE *LOG_OUTPUT; static int gpu = -1; static enum { EGL, DIRECT -} backend = EGL; +} backend = DIRECT; const NVFormatInfo formatsInfo[] = { @@ -93,8 +93,12 @@ static void init() { } char *nvdBackend = getenv("NVD_BACKEND"); - if (nvdBackend != NULL && strncmp(nvdBackend, "direct", 6) == 0) { - backend = DIRECT; + if (nvdBackend != NULL) { + if (strncmp(nvdBackend, "direct", 6) == 0) { + backend = DIRECT; + } else if (strncmp(nvdBackend, "egl", 6) == 0) { + backend = EGL; + } } //try to detect the Firefox sandbox and skip loading CUDA if detected From 5dd4fd67e75779dfe407ff0e94f83d689892fa97 Mon Sep 17 00:00:00 2001 From: Stephen Date: Sun, 17 Dec 2023 09:47:07 +0000 Subject: [PATCH 21/30] added limit to maximum surface count --- src/vabackend.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/vabackend.c b/src/vabackend.c index c62b6bc..37a4268 100644 --- a/src/vabackend.c +++ b/src/vabackend.c @@ -1015,6 +1015,12 @@ static VAStatus nvCreateContext( num_render_targets = 16; } + int surfaceCount = drv->surfaceCount != 0 ? drv->surfaceCount : num_render_targets; + if (surfaceCount > 32) { + LOG("Application requested %d surface(s), limiting to 32. This may cause issues.", surfaceCount); + surfaceCount = 32; + } + CUVIDDECODECREATEINFO vdci = { .ulWidth = vdci.ulMaxWidth = vdci.ulTargetWidth = picture_width, .ulHeight = vdci.ulMaxHeight = vdci.ulTargetHeight = picture_height, @@ -1032,7 +1038,7 @@ static VAStatus nvCreateContext( //it isn't particually efficient to do this, but it is simple .ulNumOutputSurfaces = 1, //just allocate as many surfaces as have been created since we can never have as much information as the decode to guess correctly - .ulNumDecodeSurfaces = drv->surfaceCount != 0 ? drv->surfaceCount : num_render_targets, + .ulNumDecodeSurfaces = surfaceCount, }; From da0cd89ae292a875cf530cff7a7ca8259a6c7315 Mon Sep 17 00:00:00 2001 From: Stephen <2325080+elFarto@users.noreply.github.com> Date: Sun, 17 Dec 2023 11:46:51 +0000 Subject: [PATCH 22/30] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f7b6287..af9de28 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ In addition the following environment variables need to be set. For permanent co | Variable | Value | Reason | |---|---|---| | MOZ_DISABLE_RDD_SANDBOX | 1 | Disables the sandbox for the RDD process that the decoder runs in. | -| LIBVA_DRIVER_NAME | nvidia | For libva versions prior to 2.15, this forces libva to load the `nvidia` backend. | +| LIBVA_DRIVER_NAME | nvidia | Forces libva to load this driver. | | __EGL_VENDOR_LIBRARY_FILENAMES | /usr/share/glvnd/egl_vendor.d/10_nvidia.json | Required for the 470 driver series only. It overrides the list of drivers the glvnd library can use to prevent Firefox from using the MESA driver by mistake. | When libva is used it will log out some information, which can be excessive when Firefox initalises it multiple times per page. This logging can be suppressed by adding the following line to the `/etc/libva.conf` file: From c56a1c4d84d54a6eb595fd1eb3eae577f3be5d07 Mon Sep 17 00:00:00 2001 From: Stephen <2325080+elFarto@users.noreply.github.com> Date: Sun, 17 Dec 2023 11:48:05 +0000 Subject: [PATCH 23/30] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index af9de28..a88e233 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ In addition the following environment variables need to be set. For permanent co | Variable | Value | Reason | |---|---|---| | MOZ_DISABLE_RDD_SANDBOX | 1 | Disables the sandbox for the RDD process that the decoder runs in. | -| LIBVA_DRIVER_NAME | nvidia | Forces libva to load this driver. | +| LIBVA_DRIVER_NAME | nvidia | Required for libva 2.20+, forces libva to load this driver. | | __EGL_VENDOR_LIBRARY_FILENAMES | /usr/share/glvnd/egl_vendor.d/10_nvidia.json | Required for the 470 driver series only. It overrides the list of drivers the glvnd library can use to prevent Firefox from using the MESA driver by mistake. | When libva is used it will log out some information, which can be excessive when Firefox initalises it multiple times per page. This logging can be suppressed by adding the following line to the `/etc/libva.conf` file: From 91b3eb17dcb4c0d154ca22bfab860b5689f61123 Mon Sep 17 00:00:00 2001 From: Stephen Date: Sun, 24 Dec 2023 09:02:51 +0000 Subject: [PATCH 24/30] Log message clean up --- src/direct/direct-export-buf.c | 7 ++++--- src/vabackend.c | 25 ++++++++++++------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/direct/direct-export-buf.c b/src/direct/direct-export-buf.c index 523bcf1..0bfaeb2 100644 --- a/src/direct/direct-export-buf.c +++ b/src/direct/direct-export-buf.c @@ -63,6 +63,7 @@ static bool direct_initExporter(NVDriver *drv) { uint8_t drmIdx = 128; char node[20] = {0, }; do { + LOG("Searching for GPU: %d %d %d", nvIdx, nvdGpu, drmIdx) snprintf(node, 20, "/dev/dri/renderD%d", drmIdx++); fd = open(node, O_RDWR|O_CLOEXEC); if (fd == -1) { @@ -164,7 +165,7 @@ static BackingImage *direct_allocateBackingImage(NVDriver *drv, NVSurface *surfa .size = backingImage->totalSize }; - LOG("Importing memory to CUDA"); + LOG("Importing memory to CUDA") if (CHECK_CUDA_RESULT(drv->cu->cuImportExternalMemory(&backingImage->extMem, &extMemDesc))) { goto import_fail; } @@ -321,7 +322,7 @@ static bool direct_realiseSurface(NVDriver *drv, NVSurface *surface) { //check again to see if it's just been created if (surface->backingImage == NULL) { //try to find a free surface - BackingImage *img = img = direct_allocateBackingImage(drv, surface); + BackingImage *img = direct_allocateBackingImage(drv, surface); if (img == NULL) { LOG("Unable to realise surface: %p (%d)", surface, surface->pictureIdx) pthread_mutex_unlock(&surface->mutex); @@ -343,7 +344,7 @@ static bool direct_exportCudaPtr(NVDriver *drv, CUdeviceptr ptr, NVSurface *surf if (ptr != 0) { copyFrameToSurface(drv, ptr, surface, pitch); } else { - LOG("exporting with null ptr"); + LOG("exporting with null ptr") } return true; diff --git a/src/vabackend.c b/src/vabackend.c index 37a4268..991dbf6 100644 --- a/src/vabackend.c +++ b/src/vabackend.c @@ -392,7 +392,7 @@ static void* resolveSurfaces(void *param) { } pthread_mutex_unlock(&ctx->resolveMutex); //find the last item - LOG("Reading from queue: %d %d", ctx->surfaceQueueReadIdx, ctx->surfaceQueueWriteIdx); + //LOG("Reading from queue: %d %d", ctx->surfaceQueueReadIdx, ctx->surfaceQueueWriteIdx); NVSurface *surface = ctx->surfaceQueue[ctx->surfaceQueueReadIdx++]; if (ctx->surfaceQueueReadIdx >= SURFACE_QUEUE_SIZE) { ctx->surfaceQueueReadIdx = 0; @@ -408,15 +408,15 @@ static void* resolveSurfaces(void *param) { .second_field = surface->secondField }; - LOG("Mapping surface %d", surface->pictureIdx); + //LOG("Mapping surface %d", surface->pictureIdx); if (CHECK_CUDA_RESULT(cv->cuvidMapVideoFrame(ctx->decoder, surface->pictureIdx, &deviceMemory, &pitch, &procParams))) { continue; } - LOG("Mapped surface %d to %p (%d)", surface->pictureIdx, (void*)deviceMemory, pitch); + //LOG("Mapped surface %d to %p (%d)", surface->pictureIdx, (void*)deviceMemory, pitch); //update cuarray drv->backend->exportCudaPtr(drv, deviceMemory, surface, pitch); - LOG("Surface %d exported", surface->pictureIdx); + //LOG("Surface %d exported", surface->pictureIdx); //unmap frame CHECK_CUDA_RESULT(cv->cuvidUnmapVideoFrame(ctx->decoder, deviceMemory)); } @@ -529,7 +529,6 @@ static VAStatus nvQueryConfigProfiles( //now filter out the codecs we don't support for (int i = 0; i < profiles; i++) { if (vaToCuCodec(profile_list[i]) == cudaVideoCodec_NONE) { - //LOG("Removing profile: %d", profile_list[i]) for (int x = i; x < profiles-1; x++) { profile_list[x] = profile_list[x+1]; } @@ -985,7 +984,7 @@ static VAStatus nvCreateContext( return VA_STATUS_ERROR_INVALID_CONFIG; } - LOG("with %d render targets, %d surfaces, at %dx%d", num_render_targets, drv->surfaceCount, picture_width, picture_height); + LOG("creating context with %d render targets, %d surfaces, at %dx%d", num_render_targets, drv->surfaceCount, picture_width, picture_height); //find the codec they've selected const NVCodec *selectedCodec = NULL; @@ -1312,7 +1311,7 @@ static VAStatus nvEndPicture( LOG("cuvidDecodePicture failed: %d", result); return VA_STATUS_ERROR_DECODING_ERROR; } - LOG("Decoded frame successfully to idx: %d (%p)", picParams->CurrPicIdx, nvCtx->renderTarget); + //LOG("Decoded frame successfully to idx: %d (%p)", picParams->CurrPicIdx, nvCtx->renderTarget); NVSurface *surface = nvCtx->renderTarget; @@ -1351,7 +1350,7 @@ static VAStatus nvSyncSurface( //wait for resolve to occur before synchronising pthread_mutex_lock(&surface->mutex); if (surface->resolving) { - LOG("Surface %d not resolved, waiting", surface->pictureIdx); + //LOG("Surface %d not resolved, waiting", surface->pictureIdx); pthread_cond_wait(&surface->cond, &surface->mutex); } pthread_mutex_unlock(&surface->mutex); @@ -2036,7 +2035,7 @@ static VAStatus nvExportSurfaceHandle( return VA_STATUS_ERROR_INVALID_SURFACE; } - LOG("Exporting surface: %d (%p)", surface->pictureIdx, surface); + //LOG("Exporting surface: %d (%p)", surface->pictureIdx, surface); CHECK_CUDA_RESULT_RETURN(cu->cuCtxPushCurrent(drv->cudaContext), VA_STATUS_ERROR_OPERATION_FAILED); @@ -2049,10 +2048,10 @@ static VAStatus nvExportSurfaceHandle( drv->backend->fillExportDescriptor(drv, surface, ptr); - LOG("Exporting with w:%d h:%d o:%d p:%d m:%" PRIx64 " o:%d p:%d m:%" PRIx64, ptr->width, ptr->height, ptr->layers[0].offset[0], - ptr->layers[0].pitch[0], ptr->objects[0].drm_format_modifier, - ptr->layers[1].offset[0], ptr->layers[1].pitch[0], - ptr->objects[1].drm_format_modifier); + //LOG("Exporting with w:%d h:%d o:%d p:%d m:%" PRIx64 " o:%d p:%d m:%" PRIx64, ptr->width, ptr->height, ptr->layers[0].offset[0], + // ptr->layers[0].pitch[0], ptr->objects[0].drm_format_modifier, + // ptr->layers[1].offset[0], ptr->layers[1].pitch[0], + // ptr->objects[1].drm_format_modifier); CHECK_CUDA_RESULT_RETURN(cu->cuCtxPopCurrent(NULL), VA_STATUS_ERROR_OPERATION_FAILED); From f9777663a9519161ad3320819170e039fe909a4f Mon Sep 17 00:00:00 2001 From: Stephen Date: Sun, 24 Dec 2023 09:04:13 +0000 Subject: [PATCH 25/30] Reworked GOB calculation, again. Fixes issue with wide and short video 498x124 showing green bars. --- src/direct/nv-driver.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index f527a8a..3801385 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -444,7 +444,6 @@ uint32_t calculate_image_size(const NVDriverContext *context, NVDriverImage imag //first figure out the gob layout const uint32_t log2GobsPerBlockX = 0; const uint32_t log2GobsPerBlockZ = 0; - const uint32_t log2GobsPerBlockY = 4; uint32_t offset = 0; for (uint32_t i = 0; i < numPlanes; i++) { @@ -458,12 +457,12 @@ uint32_t calculate_image_size(const NVDriverContext *context, NVDriverImage imag //needs one modifier, and UV need another when attempting to use a single surface export (as only one modifier //is possible). So for now we're just going to limit the minimum height to 88 pixels so we can use a single //modifier. - //const uint32_t log2GobsPerBlockY = planeHeight < 88 ? 3 : 4; + const uint32_t log2GobsPerBlockY = planeHeight < 88 ? 3 : 4; //LOG("Calculated GOB size: %dx%d (%dx%d)", GOB_WIDTH_IN_BYTES << log2GobsPerBlockX, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY, log2GobsPerBlockX, log2GobsPerBlockY); //These two seem to be correct, but it was discovered by trial and error so I'm not 100% sure const uint32_t widthInBytes = ROUND_UP(planeWidth * bytesPerPixel, GOB_WIDTH_IN_BYTES << log2GobsPerBlockX); - const uint32_t alignedHeight = MAX(ROUND_UP(planeHeight, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY), 88); + const uint32_t alignedHeight = ROUND_UP(planeHeight, GOB_HEIGHT_IN_BYTES << log2GobsPerBlockY); images[i].width = planeWidth; images[i].height = alignedHeight; images[i].offset = offset; From 7d5f4f2dde0e771d0679dfa9aca22ca17e16246e Mon Sep 17 00:00:00 2001 From: Kiryl Antonik Date: Sun, 21 Jan 2024 15:52:06 +0100 Subject: [PATCH 26/30] direct: Fix drm index loop condition --- src/direct/direct-export-buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/direct/direct-export-buf.c b/src/direct/direct-export-buf.c index 0bfaeb2..0347d29 100644 --- a/src/direct/direct-export-buf.c +++ b/src/direct/direct-export-buf.c @@ -82,7 +82,7 @@ static bool direct_initExporter(NVDriver *drv) { continue; } break; - } while (drmIdx < 16); + } while (drmIdx < 128 + 16); drv->drmFd = fd; LOG("Found NVIDIA GPU %d at %s", nvdGpu, node); From 9516309026e7c3db024847ee7ccc5ceede71713d Mon Sep 17 00:00:00 2001 From: Stephen Date: Sat, 3 Feb 2024 11:06:46 +0000 Subject: [PATCH 27/30] modified check in nv_get_versions to work with 470 drivers --- src/direct/nv-driver.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index 3801385..5aac3d0 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -172,16 +172,11 @@ static bool nv_export_object_to_fd(const int fd, const int export_fd, const NvHa static bool nv_get_versions(const int fd, char **versionString) { nv_ioctl_rm_api_version_t obj = { - .cmd = '2' //query + .cmd = '0' //query }; const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_CHECK_VERSION_STR, sizeof(obj)), &obj); - if (ret != 0) { - LOG("nv_check_version failed: %d %d", ret, errno); - return false; - } - *versionString = strdup(obj.versionString); return obj.reply == NV_RM_API_VERSION_REPLY_RECOGNIZED; From a45b6034a1ca93fec498f465cb6cb891a4d8e92a Mon Sep 17 00:00:00 2001 From: Stephen Date: Sat, 3 Feb 2024 11:13:24 +0000 Subject: [PATCH 28/30] removed unused variable --- src/direct/nv-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index 5aac3d0..073027b 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -175,7 +175,7 @@ static bool nv_get_versions(const int fd, char **versionString) { .cmd = '0' //query }; - const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_CHECK_VERSION_STR, sizeof(obj)), &obj); + ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_CHECK_VERSION_STR, sizeof(obj)), &obj); *versionString = strdup(obj.versionString); From 1d6a9d39bed38d1e2cfbe6528fafc91e1618af68 Mon Sep 17 00:00:00 2001 From: Stephen Date: Sat, 3 Feb 2024 15:02:20 +0000 Subject: [PATCH 29/30] reworked 470 version check fix --- src/direct/nv-driver.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/direct/nv-driver.c b/src/direct/nv-driver.c index 073027b..5ff79d6 100644 --- a/src/direct/nv-driver.c +++ b/src/direct/nv-driver.c @@ -172,12 +172,22 @@ static bool nv_export_object_to_fd(const int fd, const int export_fd, const NvHa static bool nv_get_versions(const int fd, char **versionString) { nv_ioctl_rm_api_version_t obj = { - .cmd = '0' //query + .cmd = '2' //query }; - ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_CHECK_VERSION_STR, sizeof(obj)), &obj); + const int ret = ioctl(fd, _IOC(_IOC_READ|_IOC_WRITE, NV_IOCTL_MAGIC, NV_ESC_CHECK_VERSION_STR, sizeof(obj)), &obj); - *versionString = strdup(obj.versionString); + if (ret != 0) { + LOG("nv_check_version failed: %d %d", ret, errno); + return false; + } + + if (strlen(obj.versionString) == 0) { + //the newer 470 series of drivers don't actually return the version number, so just substitute in a dummy one + *versionString = strdup("470.123.45"); + } else { + *versionString = strdup(obj.versionString); + } return obj.reply == NV_RM_API_VERSION_REPLY_RECOGNIZED; } From d94c592fda7d4bd74301a2d0f99f56763927aa6e Mon Sep 17 00:00:00 2001 From: "Azamat H. Hackimov" Date: Fri, 9 Feb 2024 20:54:17 +0300 Subject: [PATCH 30/30] Fix building with musl qsort_r invocations uses `__compar_d_fn_t` typedef that defined only in glibc. Added missing typedef to fix compilation on musl systems. See: https://bugs.gentoo.org/924146 Signed-off-by: Azamat H. Hackimov --- src/hevc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hevc.c b/src/hevc.c index d58c629..a1db0a3 100644 --- a/src/hevc.c +++ b/src/hevc.c @@ -3,6 +3,10 @@ #include "vabackend.h" #include +#if !defined(__GLIBC__) +typedef int (*__compar_d_fn_t) (const void *, const void *, void *); +#endif + static const uint8_t ff_hevc_diag_scan4x4_x[16] = { 0, 0, 1, 0, 1, 2, 0, 1,