From 87e57429c72b121867368df3e55847c8657ba1a6 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Mon, 16 May 2022 15:09:19 -0700 Subject: [PATCH 1/3] reduce decode latency by 2 frames --- PyNvCodec/TC/src/NvDecoder.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/PyNvCodec/TC/src/NvDecoder.cpp b/PyNvCodec/TC/src/NvDecoder.cpp index f8b18763..cda91bf9 100644 --- a/PyNvCodec/TC/src/NvDecoder.cpp +++ b/PyNvCodec/TC/src/NvDecoder.cpp @@ -614,6 +614,8 @@ NvDecoder::NvDecoder(CUstream cuStream, CUcontext cuContext, cudaVideoCodec eCodec, bool bLowLatency, int maxWidth, int maxHeight) { + bLowLatency = true; + p_impl = new NvDecoderImpl(); p_impl->m_cuvidStream = cuStream; p_impl->m_cuContext = cuContext; @@ -716,6 +718,7 @@ bool NvDecoder::DecodeLockSurface(Buffer const* encFrame, encFrame ? encFrame->GetDataAs() : nullptr; packet.payload_size = encFrame ? encFrame->GetRawMemSize() : 0U; packet.flags = CUVID_PKT_TIMESTAMP; + packet.flags |= CUVID_PKT_ENDOFPICTURE; packet.timestamp = pdata.pts; if (!decCtx.no_eos && (nullptr == packet.payload || 0 == packet.payload_size)) { From 15ecac7e65c80ae792dbf9e703a7433c2b9d6bce Mon Sep 17 00:00:00 2001 From: George Hotz Date: Mon, 16 May 2022 15:40:29 -0700 Subject: [PATCH 2/3] env var gate --- PyNvCodec/TC/inc/NvDecoder.h | 1 + PyNvCodec/TC/src/NvDecoder.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/PyNvCodec/TC/inc/NvDecoder.h b/PyNvCodec/TC/inc/NvDecoder.h index e50dd511..2f0a74be 100644 --- a/PyNvCodec/TC/inc/NvDecoder.h +++ b/PyNvCodec/TC/inc/NvDecoder.h @@ -122,4 +122,5 @@ class DllExport NvDecoder { int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat); struct NvDecoderImpl *p_impl; + bool setEndOfPicture = false; }; diff --git a/PyNvCodec/TC/src/NvDecoder.cpp b/PyNvCodec/TC/src/NvDecoder.cpp index cda91bf9..74aa7cac 100644 --- a/PyNvCodec/TC/src/NvDecoder.cpp +++ b/PyNvCodec/TC/src/NvDecoder.cpp @@ -614,7 +614,9 @@ NvDecoder::NvDecoder(CUstream cuStream, CUcontext cuContext, cudaVideoCodec eCodec, bool bLowLatency, int maxWidth, int maxHeight) { - bLowLatency = true; + int lowLatency = getenv("NV_LOW_LATENCY") ? atoi(getenv("NV_LOW_LATENCY")) : 0; + if (lowLatency&1) setEndOfPicture = true; + if (lowLatency&2) bLowLatency = true; p_impl = new NvDecoderImpl(); p_impl->m_cuvidStream = cuStream; @@ -718,7 +720,7 @@ bool NvDecoder::DecodeLockSurface(Buffer const* encFrame, encFrame ? encFrame->GetDataAs() : nullptr; packet.payload_size = encFrame ? encFrame->GetRawMemSize() : 0U; packet.flags = CUVID_PKT_TIMESTAMP; - packet.flags |= CUVID_PKT_ENDOFPICTURE; + if (setEndOfPicture) packet.flags |= CUVID_PKT_ENDOFPICTURE; packet.timestamp = pdata.pts; if (!decCtx.no_eos && (nullptr == packet.payload || 0 == packet.payload_size)) { From c3b6c25a233b64589483e64f22719901e2cfc6c4 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Mon, 23 May 2022 14:23:03 -0700 Subject: [PATCH 3/3] decode surfaces --- PyNvCodec/TC/src/NvDecoder.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/PyNvCodec/TC/src/NvDecoder.cpp b/PyNvCodec/TC/src/NvDecoder.cpp index 74aa7cac..f01072d6 100644 --- a/PyNvCodec/TC/src/NvDecoder.cpp +++ b/PyNvCodec/TC/src/NvDecoder.cpp @@ -101,6 +101,11 @@ static int GetChromaPlaneCount(cudaVideoChromaFormat eChromaFormat) unsigned long GetNumDecodeSurfaces(cudaVideoCodec eCodec, unsigned int nWidth, unsigned int nHeight) { + const char *num_decode_surfaces_str = getenv("NV_NUM_DECODE_SURFACES"); + if (num_decode_surfaces_str) { + return atoi(num_decode_surfaces_str); + } + if (eCodec == cudaVideoCodec_VP9) { return 12; }