Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

Issue #536: add PyFFmpegDecoder unit test, fix decoder flush #537

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/PyNvCodec/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ class PyFFmpegDemuxer:

class PyFfmpegDecoder:
def __init__(self, input: str, opts: Dict[str,str], gpu_id: int = ...) -> None: ...
def AvgFramerate(self) -> float: ...
def Codec(self) -> CudaVideoCodec: ...
def ColorRange(self) -> ColorRange: ...
def ColorSpace(self) -> ColorSpace: ...
Expand All @@ -360,6 +361,8 @@ class PyFfmpegDecoder:
def Framerate(self) -> float: ...
def GetMotionVectors(self) -> numpy.ndarray[MotionVector]: ...
def Height(self) -> int: ...
def Numframes(self) -> int: ...
def Timebase(self) -> float: ...
def Width(self) -> int: ...

class PyFrameUploader:
Expand Down
3 changes: 3 additions & 0 deletions src/PyNvCodec/inc/PyNvCodec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,9 @@ class PyFfmpegDecoder {
uint32_t Width() const;
uint32_t Height() const;
double Framerate() const;
double AvgFramerate() const;
double Timebase() const;
uint32_t Numframes() const;
ColorSpace Color_Space() const;
ColorRange Color_Range() const;
cudaVideoCodec Codec() const;
Expand Down
34 changes: 34 additions & 0 deletions src/PyNvCodec/src/PyFFMpegDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,27 @@ cudaVideoCodec PyFfmpegDecoder::Codec() const
return params.videoContext.codec;
};

double PyFfmpegDecoder::AvgFramerate() const
{
MuxingParams params;
upDecoder->GetParams(params);
return params.videoContext.avgFrameRate;
};

double PyFfmpegDecoder::Timebase() const
{
MuxingParams params;
upDecoder->GetParams(params);
return params.videoContext.timeBase;
};

uint32_t PyFfmpegDecoder::Numframes() const
{
MuxingParams params;
upDecoder->GetParams(params);
return params.videoContext.num_frames;
};

Pixel_Format PyFfmpegDecoder::PixelFormat() const
{
MuxingParams params;
Expand Down Expand Up @@ -246,6 +267,19 @@ void Init_PyFFMpegDecoder(py::module& m)
.def("Framerate", &PyFfmpegDecoder::Framerate,
R"pbdoc(
Return encoded video file framerate.
)pbdoc")
.def("AvgFramerate", &PyFfmpegDecoder::AvgFramerate,
R"pbdoc(
Return encoded video file average framerate.
)pbdoc")
.def("Timebase", &PyFfmpegDecoder::Timebase,
R"pbdoc(
Return encoded video file time base.
)pbdoc")
.def("Numframes", &PyFfmpegDecoder::Numframes,
R"pbdoc(
Return number of video frames in encoded video file.
Please note that some video containers doesn't store this infomation.
)pbdoc")
.def("ColorSpace", &PyFfmpegDecoder::Color_Space,
R"pbdoc(
Expand Down
1 change: 0 additions & 1 deletion src/TC/src/FFmpegDemuxer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,6 @@ FFmpegDemuxer::FFmpegDemuxer(AVFormatContext *fmtcx) : fmtc(fmtcx) {
throw runtime_error(ss.str());
}

//gop_size = fmtc->streams[videoStream]->codec->gop_size;
eVideoCodec = fmtc->streams[videoStream]->codecpar->codec_id;
width = fmtc->streams[videoStream]->codecpar->width;
height = fmtc->streams[videoStream]->codecpar->height;
Expand Down
59 changes: 43 additions & 16 deletions src/TC/src/FfmpegSwDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ struct FfmpegDecodeFrame_Impl {
map<AVFrameSideDataType, Buffer*> side_data;

int video_stream_idx = -1;
bool end_encode = false;
bool end_decode = false;
bool eof = false;

FfmpegDecodeFrame_Impl(const char* URL, AVDictionary* pOptions)
{
Expand Down Expand Up @@ -251,32 +252,41 @@ struct FfmpegDecodeFrame_Impl {
return true;
}

bool DecodeSingleFrame()
bool DecodeSingleFrame()
{
if (end_encode) {
if (end_decode) {
return false;
}

// Send packets to decoder until it outputs frame;
do {
// Read packets from stream until we find a video packet;
do {
if (eof) {
break;
}

auto ret = av_read_frame(fmt_ctx, &pktSrc);
if (ret < 0) {
// Flush decoder;
end_encode = true;
return DecodeSinglePacket(nullptr);

if (AVERROR_EOF == ret) {
eof = true;
break;
} else if (ret < 0) {
end_decode = true;
return false;
}
} while (pktSrc.stream_index != video_stream_idx);

auto status = DecodeSinglePacket(&pktSrc);
auto status = DecodeSinglePacket(eof ? nullptr : &pktSrc);

switch (status) {
case DEC_SUCCESS:
return true;
case DEC_ERROR:
end_decode = true;
return false;
case DEC_EOS:
end_decode = true;
return false;
case DEC_MORE:
continue;
Expand Down Expand Up @@ -332,7 +342,10 @@ struct FfmpegDecodeFrame_Impl {
DECODE_STATUS DecodeSinglePacket(const AVPacket* pktSrc)
{
auto res = avcodec_send_packet(avctx, pktSrc);
if (res < 0) {
if (AVERROR_EOF == res) {
// Flush decoder;
res = 0;
} else if (res < 0) {
cerr << "Error while sending a packet to the decoder" << endl;
cerr << "Error description: " << AvErrorToString(res) << endl;
return DEC_ERROR;
Expand All @@ -341,7 +354,6 @@ struct FfmpegDecodeFrame_Impl {
while (res >= 0) {
res = avcodec_receive_frame(avctx, frame);
if (res == AVERROR_EOF) {
cerr << "Input file is over" << endl;
return DEC_EOS;
} else if (res == AVERROR(EAGAIN)) {
return DEC_MORE;
Expand Down Expand Up @@ -394,13 +406,28 @@ TaskExecStatus FfmpegDecodeFrame::Run()
void FfmpegDecodeFrame::GetParams(MuxingParams& params)
{
memset((void*)&params, 0, sizeof(params));
auto fmtc = pImpl->fmt_ctx;
auto videoStream =
av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
if (videoStream < 0) {
stringstream ss;
ss << __FUNCTION__ << ": can't find video stream in input file." << endl;
throw runtime_error(ss.str());
}

params.videoContext.width = pImpl->avctx->width;
params.videoContext.height = pImpl->avctx->height;
params.videoContext.gop_size = pImpl->avctx->gop_size;
params.videoContext.width = fmtc->streams[videoStream]->codecpar->width;
params.videoContext.height = fmtc->streams[videoStream]->codecpar->height;
params.videoContext.frameRate =
(1.0 * pImpl->avctx->framerate.num) / (1.0 * pImpl->avctx->framerate.den);
(double)fmtc->streams[videoStream]->r_frame_rate.num /
(double)fmtc->streams[videoStream]->r_frame_rate.den;
params.videoContext.avgFrameRate =
(double)fmtc->streams[videoStream]->avg_frame_rate.num /
(double)fmtc->streams[videoStream]->avg_frame_rate.den;
params.videoContext.timeBase =
(double)fmtc->streams[videoStream]->time_base.num /
(double)fmtc->streams[videoStream]->time_base.den;
params.videoContext.codec = FFmpeg2NvCodecId(pImpl->avctx->codec_id);
params.videoContext.num_frames = fmtc->streams[videoStream]->nb_frames;

switch (pImpl->avctx->pix_fmt) {
case AV_PIX_FMT_YUVJ420P:
Expand Down Expand Up @@ -432,7 +459,7 @@ void FfmpegDecodeFrame::GetParams(MuxingParams& params)
break;
}

switch (pImpl->avctx->colorspace) {
switch (fmtc->streams[videoStream]->codecpar->color_space) {
case AVCOL_SPC_BT709:
params.videoContext.color_space = BT_709;
break;
Expand All @@ -445,7 +472,7 @@ void FfmpegDecodeFrame::GetParams(MuxingParams& params)
break;
}

switch (pImpl->avctx->color_range) {
switch (fmtc->streams[videoStream]->codecpar->color_range) {
case AVCOL_RANGE_MPEG:
params.videoContext.color_range = MPEG;
break;
Expand Down
27 changes: 27 additions & 0 deletions tests/gt_files.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"basic": {
"uri": "test.mp4",
"width": 848,
"height": 464,
"is_vfr": false,
"pix_fmt": "PixelFormat.NV12",
"framerate": 30,
"num_frames": 96,
"timebase": 8.1380e-5,
"color_space": "ColorSpace.BT_709",
"color_range": "ColorRange.MPEG"
},
"res_change": {
"uri": "test_res_change.h264",
"width": 848,
"height": 464,
"res_change_factor": 0.5,
"is_vfr": false,
"pix_fmt": "PixelFormat.NV12",
"framerate": 30,
"num_frames": 47,
"timebase": 8.1380e-5,
"color_space": "ColorSpace.BT_709",
"color_range": "ColorRange.MPEG"
}
}
107 changes: 107 additions & 0 deletions tests/test_PyFFmpegDecoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#
# Copyright 2023 Vision Labs LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Starting from Python 3.8 DLL search policy has changed.
# We need to add path to CUDA DLLs explicitly.
import sys
import os
from os.path import join, dirname


if os.name == "nt":
# Add CUDA_PATH env variable
cuda_path = os.environ["CUDA_PATH"]
if cuda_path:
os.add_dll_directory(cuda_path)
else:
print("CUDA_PATH environment variable is not set.", file=sys.stderr)
print("Can't set CUDA DLLs search path.", file=sys.stderr)
exit(1)

# Add PATH as well for minor CUDA releases
sys_path = os.environ["PATH"]
if sys_path:
paths = sys_path.split(";")
for path in paths:
if os.path.isdir(path):
os.add_dll_directory(path)
else:
print("PATH environment variable is not set.", file=sys.stderr)
exit(1)

import PyNvCodec as nvc
import numpy as np
import unittest
import random
import json
from pydantic import BaseModel

class GroundTruth(BaseModel):
uri: str
width: int
height: int
is_vfr: bool
pix_fmt: str
framerate: float
num_frames: int
timebase: float
color_space: str
color_range: str


class TestDecoderBasic(unittest.TestCase):
def __init__(self, methodName):
super().__init__(methodName=methodName)

f = open("gt_files.json")
data = json.load(f)["basic"]
self.gtInfo = GroundTruth(**data)
self.ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {})

def test_width(self):
self.assertEqual(self.gtInfo.width, self.ffDec.Width())

def test_height(self):
self.assertEqual(self.gtInfo.height, self.ffDec.Height())

def test_color_space(self):
self.assertEqual(self.gtInfo.color_space, str(self.ffDec.ColorSpace()))

def test_color_range(self):
self.assertEqual(self.gtInfo.color_range, str(self.ffDec.ColorRange()))

def test_format(self):
self.assertEqual(self.gtInfo.pix_fmt, str(self.ffDec.Format()))

def test_framerate(self):
self.assertEqual(self.gtInfo.framerate, self.ffDec.Framerate())

def test_avgframerate(self):
self.assertEqual(self.gtInfo.framerate, self.ffDec.AvgFramerate())

def test_timebase(self):
epsilon = 1e-4
self.assertLessEqual(np.abs(self.gtInfo.timebase - self.ffDec.Timebase()), epsilon)

def test_decode_all_frames(self):
dec_frames = 0
frame = np.ndarray(dtype=np.uint8, shape=())
while self.ffDec.DecodeSingleFrame(frame):
dec_frames += 1
self.assertEqual(self.gtInfo.num_frames, dec_frames)

if __name__ == "__main__":
unittest.main()