diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index a5492c42bb4a..9c609f63f6ed 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -177,10 +177,13 @@ if(HAVE_GAPI_ONEVPL) src/streaming/onevpl/onevpl_cfg_params.cpp src/streaming/onevpl/onevpl_data_provider_interface_exception.cpp src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp + src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp src/streaming/onevpl/accelerators/surface/surface.cpp src/streaming/onevpl/accelerators/surface/surface_pool.cpp + src/streaming/onevpl/accelerators/utils/shared_lock.cpp src/streaming/onevpl/accelerators/accel_policy_cpu.cpp src/streaming/onevpl/accelerators/accel_policy_dx11.cpp + src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp src/streaming/onevpl/engine/engine_session.cpp src/streaming/onevpl/engine/processing_engine_base.cpp diff --git a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp index b4b141523306..b9f09c1176f4 100644 --- a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp +++ b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp @@ -26,7 +26,8 @@ const std::string codec[] = { using source_t = std::string; using codec_t = std::string; -using source_description_t = std::tuple; +using accel_mode_t = std::string; +using source_description_t = std::tuple; class OneVPLSourcePerfTest : public TestPerfParams {}; class VideoCapSourcePerfTest : public TestPerfParams {}; @@ -38,12 +39,16 @@ PERF_TEST_P_(OneVPLSourcePerfTest, TestPerformance) const auto params = GetParam(); source_t src = findDataFile(get<0>(params)); codec_t type = get<1>(params); + accel_mode_t mode = get<2>(params); std::vector cfg_params { oneVPL_cfg_param::create("mfxImplDescription.Impl", "MFX_IMPL_TYPE_HARDWARE"), oneVPL_cfg_param::create("mfxImplDescription.mfxDecoderDescription.decoder.CodecID", type), }; + if (!mode.empty()) { + cfg_params.push_back(oneVPL_cfg_param::create("mfxImplDescription.AccelerationMode", mode)); + } auto source_ptr = make_vpl_src(src, cfg_params); Data out; TEST_CYCLE() @@ -70,8 +75,10 @@ PERF_TEST_P_(VideoCapSourcePerfTest, TestPerformance) } INSTANTIATE_TEST_CASE_P(Streaming, OneVPLSourcePerfTest, - Values(source_description_t(files[0], codec[0]), - source_description_t(files[1], codec[1]))); + Values(source_description_t(files[0], codec[0], ""), + source_description_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11"), + source_description_t(files[1], codec[1], ""), + source_description_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11"))); INSTANTIATE_TEST_CASE_P(Streaming, VideoCapSourcePerfTest, Values(files[0], diff --git a/modules/gapi/src/backends/render/grenderocv.cpp b/modules/gapi/src/backends/render/grenderocv.cpp index 265228466808..12adc47de9d5 100644 --- a/modules/gapi/src/backends/render/grenderocv.cpp +++ b/modules/gapi/src/backends/render/grenderocv.cpp @@ -4,6 +4,7 @@ #include #include +#include "logger.hpp" struct RenderOCVState { @@ -128,15 +129,13 @@ GAPI_OCV_KERNEL_ST(RenderFrameOCVImpl, cv::gapi::wip::draw::GRenderFrame, Render out = in; auto desc = out.desc(); - auto w_out = out.access(cv::MediaFrame::Access::W); - auto out_y = cv::Mat(desc.size, CV_8UC1, w_out.ptr[0], w_out.stride[0]); - auto out_uv = cv::Mat(desc.size / 2, CV_8UC2, w_out.ptr[1], w_out.stride[1]); - - auto r_in = in.access(cv::MediaFrame::Access::R); + cv::Mat upsample_uv, yuv; + { + auto r_in = in.access(cv::MediaFrame::Access::R); - auto in_y = cv::Mat(desc.size, CV_8UC1, r_in.ptr[0], r_in.stride[0]); - auto in_uv = cv::Mat(desc.size / 2, CV_8UC2, r_in.ptr[1], r_in.stride[1]); + auto in_y = cv::Mat(desc.size, CV_8UC1, r_in.ptr[0], r_in.stride[0]); + auto in_uv = cv::Mat(desc.size / 2, CV_8UC2, r_in.ptr[1], r_in.stride[1]); /* FIXME How to render correctly on NV12 format ? * @@ -157,19 +156,26 @@ GAPI_OCV_KERNEL_ST(RenderFrameOCVImpl, cv::gapi::wip::draw::GRenderFrame, Render * */ - // NV12 -> YUV - cv::Mat upsample_uv, yuv; - cv::resize(in_uv, upsample_uv, in_uv.size() * 2, cv::INTER_LINEAR); - cv::merge(std::vector{in_y, upsample_uv}, yuv); + // NV12 -> YUV + cv::resize(in_uv, upsample_uv, in_uv.size() * 2, cv::INTER_LINEAR); + cv::merge(std::vector{in_y, upsample_uv}, yuv); + } cv::gapi::wip::draw::drawPrimitivesOCVYUV(yuv, prims, state.ftpr); // YUV -> NV12 - cv::Mat out_u, out_v, uv_plane; - std::vector chs = { out_y, out_u, out_v }; - cv::split(yuv, chs); - cv::merge(std::vector{chs[1], chs[2]}, uv_plane); - cv::resize(uv_plane, out_uv, uv_plane.size() / 2, cv::INTER_LINEAR); + { + auto w_out = out.access(cv::MediaFrame::Access::W); + + auto out_y = cv::Mat(desc.size, CV_8UC1, w_out.ptr[0], w_out.stride[0]); + auto out_uv = cv::Mat(desc.size / 2, CV_8UC2, w_out.ptr[1], w_out.stride[1]); + + cv::Mat out_u, out_v, uv_plane; + std::vector chs = { out_y, out_u, out_v }; + cv::split(yuv, chs); + cv::merge(std::vector{chs[1], chs[2]}, uv_plane); + cv::resize(uv_plane, out_uv, uv_plane.size() / 2, cv::INTER_LINEAR); + } } static void setup(const cv::GFrameDesc& /* in_nv12 */, diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp index 6a9a16862db0..8dfd7946980b 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp @@ -21,6 +21,98 @@ namespace cv { namespace gapi { namespace wip { +namespace utils { + /* UTILS */ +mfxU32 GetSurfaceSize_(mfxU32 FourCC, mfxU32 width, mfxU32 height) { + mfxU32 nbytes = 0; + + mfxU32 half_width = width / 2; + mfxU32 half_height = height / 2; + switch (FourCC) { + case MFX_FOURCC_I420: + case MFX_FOURCC_NV12: + nbytes = width * height + 2 * half_width * half_height; + break; + case MFX_FOURCC_I010: + case MFX_FOURCC_P010: + nbytes = width * height + 2 * half_width * half_height; + nbytes *= 2; + break; + case MFX_FOURCC_RGB4: + nbytes = width * height * 4; + break; + default: + break; + } + + return nbytes; +} + +surface_ptr_t create_surface_RGB4_(mfxFrameInfo frameInfo, + std::shared_ptr out_buf_ptr, + size_t out_buf_ptr_offset, + size_t out_buf_size) +{ + mfxU8* buf = reinterpret_cast(out_buf_ptr.get()); + mfxU16 surfW = frameInfo.Width * 4; + mfxU16 surfH = frameInfo.Height; + (void)surfH; + + // TODO more intelligent check + if (out_buf_size <= out_buf_ptr_offset) { + GAPI_LOG_WARNING(nullptr, "Not enough buffer, ptr: " << out_buf_ptr << + ", size: " << out_buf_size << + ", offset: " << out_buf_ptr_offset << + ", W: " << surfW << + ", H: " << surfH); + GAPI_Assert(false && "Invalid offset"); + } + + std::unique_ptr handle(new mfxFrameSurface1); + memset(handle.get(), 0, sizeof(mfxFrameSurface1)); + + handle->Info = frameInfo; + handle->Data.B = buf + out_buf_ptr_offset; + handle->Data.G = handle->Data.B + 1; + handle->Data.R = handle->Data.B + 2; + handle->Data.A = handle->Data.B + 3; + handle->Data.Pitch = surfW; + + return Surface::create_surface(std::move(handle), out_buf_ptr); +} + +surface_ptr_t create_surface_other_(mfxFrameInfo frameInfo, + std::shared_ptr out_buf_ptr, + size_t out_buf_ptr_offset, + size_t out_buf_size) +{ + mfxU8* buf = reinterpret_cast(out_buf_ptr.get()); + mfxU16 surfH = frameInfo.Height; + mfxU16 surfW = (frameInfo.FourCC == MFX_FOURCC_P010) ? frameInfo.Width * 2 : frameInfo.Width; + + // TODO more intelligent check + if (out_buf_size <= + out_buf_ptr_offset + (surfW * surfH) + ((surfW / 2) * (surfH / 2))) { + GAPI_LOG_WARNING(nullptr, "Not enough buffer, ptr: " << out_buf_ptr << + ", size: " << out_buf_size << + ", offset: " << out_buf_ptr_offset << + ", W: " << surfW << + ", H: " << surfH); + GAPI_Assert(false && "Invalid offset"); + } + + std::unique_ptr handle(new mfxFrameSurface1); + memset(handle.get(), 0, sizeof(mfxFrameSurface1)); + + handle->Info = frameInfo; + handle->Data.Y = buf + out_buf_ptr_offset; + handle->Data.U = buf + out_buf_ptr_offset + (surfW * surfH); + handle->Data.V = handle->Data.U + ((surfW / 2) * (surfH / 2)); + handle->Data.Pitch = surfW; + + return Surface::create_surface(std::move(handle), out_buf_ptr); +} +} // namespace utils VPLCPUAccelerationPolicy::VPLCPUAccelerationPolicy() { GAPI_LOG_INFO(nullptr, "created"); @@ -35,6 +127,10 @@ VPLCPUAccelerationPolicy::~VPLCPUAccelerationPolicy() { GAPI_LOG_INFO(nullptr, "destroyed"); } +VPLAccelerationPolicy::AccelType VPLCPUAccelerationPolicy::get_accel_type() const { + return AccelType::CPU; +} + void VPLCPUAccelerationPolicy::init(session_t session) { (void)session; //MFXVideoCORE_SetFrameAllocator(session, mfxFrameAllocator instance) @@ -120,6 +216,38 @@ VPLCPUAccelerationPolicy::create_surface_pool(size_t pool_size, size_t surface_s return preallocated_pool_memory_ptr; } +VPLCPUAccelerationPolicy::pool_key_t +VPLCPUAccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) { + +// External (application) allocation of decode surfaces + GAPI_LOG_DEBUG(nullptr, "Query mfxFrameAllocRequest.NumFrameSuggested: " << alloc_request.NumFrameSuggested << + ", mfxFrameAllocRequest.Type: " << alloc_request.Type); + + mfxU32 singleSurfaceSize = utils::GetSurfaceSize_(param.mfx.FrameInfo.FourCC, + param.mfx.FrameInfo.Width, + param.mfx.FrameInfo.Height); + if (!singleSurfaceSize) { + throw std::runtime_error("Cannot determine surface size for: fourCC" + + std::to_string(param.mfx.FrameInfo.FourCC) + + ", width: " + std::to_string(param.mfx.FrameInfo.Width) + + ", height: " + std::to_string(param.mfx.FrameInfo.Height)); + } + + const auto &frameInfo = param.mfx.FrameInfo; + auto surface_creator = + [&frameInfo] (std::shared_ptr out_buf_ptr, size_t out_buf_ptr_offset, + size_t out_buf_size) -> surface_ptr_t { + return (frameInfo.FourCC == MFX_FOURCC_RGB4) ? + utils::create_surface_RGB4_(frameInfo, out_buf_ptr, out_buf_ptr_offset, + out_buf_size) : + utils::create_surface_other_(frameInfo, out_buf_ptr, out_buf_ptr_offset, + out_buf_size);}; + + //TODO Configure preallocation size (how many frames we can hold) + const size_t preallocated_frames_count = 30; + return create_surface_pool(alloc_request.NumFrameSuggested * preallocated_frames_count, + singleSurfaceSize, surface_creator); +} VPLCPUAccelerationPolicy::surface_weak_ptr_t VPLCPUAccelerationPolicy::get_free_surface(pool_key_t key) { auto pool_it = pool_table.find(key); diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp index 2cfefafde7fd..aa19928d3605 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp @@ -29,9 +29,11 @@ struct VPLCPUAccelerationPolicy final : public VPLAccelerationPolicy using pool_t = CachedPool; + GAPI_EXPORTS AccelType get_accel_type() const override; GAPI_EXPORTS void init(session_t session) override; GAPI_EXPORTS void deinit(session_t session) override; - GAPI_EXPORTS pool_key_t create_surface_pool(size_t pool_size, size_t surface_size_bytes, surface_ptr_ctr_t creator) override; + GAPI_EXPORTS pool_key_t create_surface_pool(size_t pool_size, size_t surface_size_bytes, surface_ptr_ctr_t creator); + GAPI_EXPORTS pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) override; GAPI_EXPORTS surface_weak_ptr_t get_free_surface(pool_key_t key) override; GAPI_EXPORTS size_t get_free_surface_count(pool_key_t key) const override; GAPI_EXPORTS size_t get_surface_count(pool_key_t key) const override; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp index 8ed5ecd5c06d..ab769e342aab 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp @@ -5,9 +5,11 @@ // Copyright (C) 2021 Intel Corporation #ifdef HAVE_ONEVPL +#include + #include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" -//#include "streaming/vpl/vpl_utils.hpp" #include "streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp" #include "streaming/onevpl/accelerators/surface/surface.hpp" #include "streaming/onevpl/onevpl_utils.hpp" #include "logger.hpp" @@ -28,31 +30,106 @@ namespace cv { namespace gapi { namespace wip { -VPLDX11AccelerationPolicy::VPLDX11AccelerationPolicy() + +VPLDX11AccelerationPolicy::VPLDX11AccelerationPolicy() : + hw_handle(), + device_context(), + allocator() { -#ifdef CPU_ACCEL_ADAPTER - adapter.reset(new VPLCPUAccelerationPolicy); -#endif + // setup dx11 allocator + memset(&allocator, 0, sizeof(mfxFrameAllocator)); + allocator.Alloc = alloc_cb; + allocator.Lock = lock_cb; + allocator.Unlock = unlock_cb; + allocator.GetHDL = get_hdl_cb; + allocator.Free = free_cb; + allocator.pthis = this; } VPLDX11AccelerationPolicy::~VPLDX11AccelerationPolicy() { + for (auto& allocation_pair : allocation_table) { + allocation_pair.second.reset(); + } + + if (device_context) { + GAPI_LOG_INFO(nullptr, "release context: " << device_context); + device_context->Release(); + } + if (hw_handle) { - GAPI_LOG_INFO(nullptr, "VPLDX11AccelerationPolicy release ID3D11Device"); + GAPI_LOG_INFO(nullptr, "release ID3D11Device"); hw_handle->Release(); } } +VPLAccelerationPolicy::AccelType VPLDX11AccelerationPolicy::get_accel_type() const { + return AccelType::GPU; +} + void VPLDX11AccelerationPolicy::init(session_t session) { - mfxStatus sts = MFXVideoCORE_GetHandle(session, MFX_HANDLE_D3D11_DEVICE, reinterpret_cast(&hw_handle)); + //Create device + UINT creationFlags = 0;//D3D11_CREATE_DEVICE_BGRA_SUPPORT; + +#if defined _DEBUG || defined CV_STATIC_ANALYSIS + // If the project is in a debug build, enable debugging via SDK Layers with this flag. + creationFlags |= D3D11_CREATE_DEVICE_DEBUG; +#endif + + // This array defines the set of DirectX hardware feature levels this app will support. + // Note the ordering should be preserved. + // Don't forget to declare your application's minimum required feature level in its + // description. All applications are assumed to support 9.1 unless otherwise stated. + D3D_FEATURE_LEVEL featureLevels[] = + { + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + }; + D3D_FEATURE_LEVEL featureLevel; + + // Create the Direct3D 11 API device object and a corresponding context. + HRESULT err = + D3D11CreateDevice( + nullptr, // Specify nullptr to use the default adapter. + D3D_DRIVER_TYPE_HARDWARE, + nullptr, + creationFlags, // Set set debug and Direct2D compatibility flags. + featureLevels, // List of feature levels this app can support. + ARRAYSIZE(featureLevels), + D3D11_SDK_VERSION, // Always set this to D3D11_SDK_VERSION. + &hw_handle, // Returns the Direct3D device created. + &featureLevel, // Returns feature level of device created. + &device_context // Returns the device immediate context. + ); + if(FAILED(err)) + { + throw std::logic_error("Cannot create D3D11CreateDevice, error: " + std::to_string(HRESULT_CODE(err))); + } + + // oneVPL recommendation + { + ID3D11Multithread *pD11Multithread; + device_context->QueryInterface(IID_PPV_ARGS(&pD11Multithread)); + pD11Multithread->SetMultithreadProtected(true); + pD11Multithread->Release(); + } + + mfxStatus sts = MFXVideoCORE_SetHandle(session, MFX_HANDLE_D3D11_DEVICE, (mfxHDL) hw_handle); if (sts != MFX_ERR_NONE) { - throw std::logic_error("Cannot create VPLDX11AccelerationPolicy, MFXVideoCORE_GetHandle error: " + + throw std::logic_error("Cannot create VPLDX11AccelerationPolicy, MFXVideoCORE_SetHandle error: " + mfxstatus_to_string(sts)); } - //MFXVideoCORE_SetFrameAllocator(session, mfxFrameAllocator instance) + sts = MFXVideoCORE_SetFrameAllocator(session, &allocator); + if (sts != MFX_ERR_NONE) + { + throw std::logic_error("Cannot create VPLDX11AccelerationPolicy, MFXVideoCORE_SetFrameAllocator error: " + + mfxstatus_to_string(sts)); + } GAPI_LOG_INFO(nullptr, "VPLDX11AccelerationPolicy initialized, session: " << session); } @@ -62,53 +139,291 @@ void VPLDX11AccelerationPolicy::deinit(session_t session) { } VPLDX11AccelerationPolicy::pool_key_t -VPLDX11AccelerationPolicy::create_surface_pool(size_t pool_size, size_t surface_size_bytes, - surface_ptr_ctr_t creator) { - GAPI_LOG_DEBUG(nullptr, "pool size: " << pool_size << ", surface size bytes: " << surface_size_bytes); +VPLDX11AccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_req, + mfxVideoParam& param) { -#ifdef CPU_ACCEL_ADAPTER - return adapter->create_surface_pool(pool_size, surface_size_bytes, creator); -#endif - (void)pool_size; - (void)surface_size_bytes; - (void)creator; - throw std::runtime_error("VPLDX11AccelerationPolicy::create_surface_pool() is not implemented"); + param.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY; + + // allocate textures by explicit request + mfxFrameAllocResponse mfxResponse; + //TODO + mfxFrameAllocRequest alloc_request = alloc_req; + alloc_request.NumFrameSuggested = alloc_request.NumFrameSuggested * 5; + mfxStatus sts = on_alloc(&alloc_request, &mfxResponse); + if (sts != MFX_ERR_NONE) + { + throw std::logic_error("Cannot create allocate memory for surfaces, error: " + + mfxstatus_to_string(sts)); + } + + // get reference pointer + auto table_it = allocation_table.find(alloc_request.AllocId); + GAPI_DbgAssert (allocation_table.end() != table_it); + + mfxU16 numSurfaces = alloc_request.NumFrameSuggested; + + // create pool + pool_t pool; + pool.reserve(numSurfaces); + for (int i = 0; i < numSurfaces; i++) { + std::unique_ptr handle(new mfxFrameSurface1 {}); + handle->Info = param.mfx.FrameInfo; + handle->Data.MemId = mfxResponse.mids[i]; + + pool.push_back(Surface::create_surface(std::move(handle), table_it->second)); + } + + // remember pool by key + pool_key_t key = reinterpret_cast(table_it->second.get()); + GAPI_LOG_INFO(nullptr, "New pool allocated, key: " << key << + ", surface count: " << pool.total_size()); + try { + if (!pool_table.emplace(key, std::move(pool)).second) { + throw std::runtime_error(std::string("VPLDX11AccelerationPolicy::create_surface_pool - ") + + "cannot insert pool, table size: " + std::to_string(pool_table.size())); + } + } catch (const std::exception&) { + throw; + } + return key; } VPLDX11AccelerationPolicy::surface_weak_ptr_t VPLDX11AccelerationPolicy::get_free_surface(pool_key_t key) { -#ifdef CPU_ACCEL_ADAPTER - return adapter->get_free_surface(key); -#endif - (void)key; - throw std::runtime_error("VPLDX11AccelerationPolicy::get_free_surface() is not implemented"); + auto pool_it = pool_table.find(key); + if (pool_it == pool_table.end()) { + std::stringstream ss; + ss << "key is not found: " << key << ", table size: " << pool_table.size(); + const std::string& str = ss.str(); + GAPI_LOG_WARNING(nullptr, str); + throw std::runtime_error(std::string(__FUNCTION__) + " - " + str); + } + + pool_t& requested_pool = pool_it->second; + return requested_pool.find_free(); } -size_t VPLDX11AccelerationPolicy::get_free_surface_count(pool_key_t key) const { -#ifdef CPU_ACCEL_ADAPTER - return adapter->get_free_surface_count(key); -#endif - (void)key; - throw std::runtime_error("get_free_surface_count() is not implemented"); +size_t VPLDX11AccelerationPolicy::get_free_surface_count(pool_key_t) const { + GAPI_Assert(false && "get_free_surface_count() is not implemented"); } -size_t VPLDX11AccelerationPolicy::get_surface_count(pool_key_t key) const { -#ifdef CPU_ACCEL_ADAPTER - return adapter->get_surface_count(key); -#endif - (void)key; - throw std::runtime_error("VPLDX11AccelerationPolicy::get_surface_count() is not implemented"); +size_t VPLDX11AccelerationPolicy::get_surface_count(pool_key_t) const { + GAPI_Assert(false && "VPLDX11AccelerationPolicy::get_surface_count() is not implemented"); } cv::MediaFrame::AdapterPtr VPLDX11AccelerationPolicy::create_frame_adapter(pool_key_t key, mfxFrameSurface1* surface) { -#ifdef CPU_ACCEL_ADAPTER - return adapter->create_frame_adapter(key, surface); -#endif - (void)key; - (void)surface; - throw std::runtime_error("VPLDX11AccelerationPolicy::create_frame_adapter() is not implemented"); + auto pool_it = pool_table.find(key); + if (pool_it == pool_table.end()) { + std::stringstream ss; + ss << "key is not found: " << key << ", table size: " << pool_table.size(); + const std::string& str = ss.str(); + GAPI_LOG_WARNING(nullptr, str); + throw std::runtime_error(std::string(__FUNCTION__) + " - " + str); + } + + pool_t& requested_pool = pool_it->second; + return cv::MediaFrame::AdapterPtr{new VPLMediaFrameDX11Adapter(requested_pool.find_by_handle(surface))}; +} + +mfxStatus VPLDX11AccelerationPolicy::alloc_cb(mfxHDL pthis, mfxFrameAllocRequest *request, + mfxFrameAllocResponse *response) { + if (!pthis) { + return MFX_ERR_MEMORY_ALLOC; + } + + VPLDX11AccelerationPolicy *self = static_cast(pthis); + + request->NumFrameSuggested *= 5; + return self->on_alloc(request, response); +} + +mfxStatus VPLDX11AccelerationPolicy::lock_cb(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) { + VPLDX11AccelerationPolicy *self = static_cast(pthis); + GAPI_LOG_DEBUG(nullptr, "called from: " << self ? "Policy" : "Resource"); + cv::util::suppress_unused_warning(self); + return on_lock(mid, ptr); +} + +mfxStatus VPLDX11AccelerationPolicy::unlock_cb(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr) { + VPLDX11AccelerationPolicy *self = static_cast(pthis); + GAPI_LOG_DEBUG(nullptr, "called from: " << self ? "Policy" : "Resource"); + cv::util::suppress_unused_warning(self); + return on_unlock(mid, ptr); +} + +mfxStatus VPLDX11AccelerationPolicy::get_hdl_cb(mfxHDL pthis, mfxMemId mid, mfxHDL *handle) { + if (!pthis) { + return MFX_ERR_MEMORY_ALLOC; + } + + VPLDX11AccelerationPolicy *self = static_cast(pthis); + return self->on_get_hdl(mid, handle); +} + +mfxStatus VPLDX11AccelerationPolicy::free_cb(mfxHDL pthis, mfxFrameAllocResponse *response) { + if (!pthis) { + return MFX_ERR_MEMORY_ALLOC; + } + + VPLDX11AccelerationPolicy *self = static_cast(pthis); + return self->on_free(response); +} + +mfxStatus VPLDX11AccelerationPolicy::on_alloc(const mfxFrameAllocRequest *request, + mfxFrameAllocResponse *response) { + GAPI_LOG_DEBUG(nullptr, "Requestend allocation id: " << std::to_string(request->AllocId) << + ", type: " << ext_mem_frame_type_to_cstr(request->Type) << + ", size: " << request->Info.Width << "x" << request->Info.Height << + ", frames minimum count: " << request->NumFrameMin << + ", frames sugested count: " << request->NumFrameSuggested); + auto table_it = allocation_table.find(request->AllocId); + if (allocation_table.end() != table_it) { + GAPI_LOG_WARNING(nullptr, "Allocation already exist, id: " + std::to_string(request->AllocId) + + ". Total allocation size: " + std::to_string(allocation_table.size())); + + // TODO cache + allocation_t &resources_array = table_it->second; + response->AllocId = request->AllocId; + response->NumFrameActual = request->NumFrameSuggested; + response->mids = reinterpret_cast(resources_array->data()); + + return MFX_ERR_NONE; + } + + DXGI_FORMAT colorFormat = VPLMediaFrameDX11Adapter::get_dx11_color_format(request->Info.FourCC); + + if (DXGI_FORMAT_UNKNOWN == colorFormat || colorFormat != DXGI_FORMAT_NV12) { + GAPI_LOG_WARNING(nullptr, "Unsupported fourcc :" << request->Info.FourCC); + return MFX_ERR_UNSUPPORTED; + } + + D3D11_TEXTURE2D_DESC desc = { 0 }; + + desc.Width = request->Info.Width; + desc.Height = request->Info.Height; + + desc.MipLevels = 1; + // single texture with subresources + desc.ArraySize = request->NumFrameSuggested; + desc.Format = colorFormat; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; + desc.BindFlags = D3D11_BIND_DECODER; + + if (request->Type & MFX_MEMTYPE_SHARED_RESOURCE) { + desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE; + desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; + } + + ID3D11Texture2D *pTexture2D; + HRESULT err = hw_handle->CreateTexture2D(&desc, nullptr, &pTexture2D); + if (FAILED(err)) { + GAPI_LOG_WARNING(nullptr, "Cannot create texture, error: " + std::to_string(HRESULT_CODE(err))); + return MFX_ERR_MEMORY_ALLOC; + } + + // create staging texture to read it from + desc.ArraySize = 1; + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + desc.BindFlags = 0; + desc.MiscFlags = 0; + std::vector staging_textures; + staging_textures.reserve(request->NumFrameSuggested); + for (int i = 0; i < request->NumFrameSuggested; i ++ ) { + ID3D11Texture2D *staging_texture_2d = nullptr; + err = hw_handle->CreateTexture2D(&desc, NULL, &staging_texture_2d); + if (FAILED(err)) { + GAPI_LOG_WARNING(nullptr, "Cannot create staging texture, error: " + std::to_string(HRESULT_CODE(err))); + return MFX_ERR_MEMORY_ALLOC; + } + staging_textures.push_back(staging_texture_2d); + } + + // for multiple subresources initialize allocation array + auto cand_resource_it = allocation_table.end(); + { + // insert into global table + auto inserted_it = + allocation_table.emplace(request->AllocId, + DX11AllocationRecord::create(request->NumFrameSuggested, + device_context, + allocator, + pTexture2D, + std::move(staging_textures))); + if (!inserted_it.second) { + GAPI_LOG_WARNING(nullptr, "Cannot assign allocation by id: " + std::to_string(request->AllocId) + + " - aldeady exist. Total allocation size: " + std::to_string(allocation_table.size())); + pTexture2D->Release(); + return MFX_ERR_MEMORY_ALLOC; + } + + GAPI_LOG_DEBUG(nullptr, "allocation by id: " << request->AllocId << + " was created, total allocations count: " << allocation_table.size()); + cand_resource_it = inserted_it.first; + } + + //fill out response + GAPI_DbgAssert(cand_resource_it != allocation_table.end() && "Invalid cand_resource_it"); + + allocation_t &resources_array = cand_resource_it->second; + response->AllocId = request->AllocId; + response->NumFrameActual = request->NumFrameSuggested; + response->mids = reinterpret_cast(resources_array->data()); + + return MFX_ERR_NONE; +} + +mfxStatus VPLDX11AccelerationPolicy::on_lock(mfxMemId mid, mfxFrameData *ptr) { + DX11AllocationRecord::AllocationId data = reinterpret_cast(mid); + if (!data) { + GAPI_LOG_WARNING(nullptr, "Allocation record is empty"); + return MFX_ERR_LOCK_MEMORY; + } + + return data->acquire_access(ptr); +} + +mfxStatus VPLDX11AccelerationPolicy::on_unlock(mfxMemId mid, mfxFrameData *ptr) { + DX11AllocationRecord::AllocationId data = reinterpret_cast(mid); + if (!data) { + return MFX_ERR_LOCK_MEMORY; + } + + return data->release_access(ptr); +} + +mfxStatus VPLDX11AccelerationPolicy::on_get_hdl(mfxMemId mid, mfxHDL *handle) { + DX11AllocationRecord::AllocationId data = reinterpret_cast(mid); + if (!data) { + return MFX_ERR_INVALID_HANDLE; + } + + mfxHDLPair *pPair = reinterpret_cast(handle); + + pPair->first = data->get_texture(); + pPair->second = (mfxHDL)reinterpret_cast(data->get_subresource()); + + GAPI_LOG_DEBUG(nullptr, "texture : " << pPair->first << ", sub id: " << pPair->second); + return MFX_ERR_NONE; +} + +mfxStatus VPLDX11AccelerationPolicy::on_free(mfxFrameAllocResponse *response) { + GAPI_LOG_DEBUG(nullptr, "Allocations count before: " << allocation_table.size() << + ", requested id: " << response->AllocId); + + auto table_it = allocation_table.find(response->AllocId); + if (allocation_table.end() == table_it) { + GAPI_LOG_WARNING(nullptr, "Cannot find allocation id: " + std::to_string(response->AllocId) + + ". Total allocation size: " + std::to_string(allocation_table.size())); + return MFX_ERR_MEMORY_ALLOC; + } + + allocation_table.erase(table_it); + return MFX_ERR_NONE; } } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp index 61f9c6369594..d8f628e93fc9 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp @@ -6,22 +6,20 @@ #ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_ACCEL_POLICY_DX11_HPP #define GAPI_STREAMING_ONEVPL_ACCELERATORS_ACCEL_POLICY_DX11_HPP +#include #include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS -//TODO -#define CPU_ACCEL_ADAPTER #ifdef HAVE_ONEVPL #include #include "streaming/onevpl/accelerators/accel_policy_interface.hpp" - -#ifdef CPU_ACCEL_ADAPTER -#include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" -#endif +#include "streaming/onevpl/accelerators/surface/surface_pool.hpp" +#include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp" #ifdef HAVE_DIRECTX #ifdef HAVE_D3D11 #define D3D11_NO_HELPERS + #define NOMINMAX #include #include #include "opencv2/core/directx.hpp" @@ -33,32 +31,56 @@ namespace cv { namespace gapi { namespace wip { -struct VPLDX11AccelerationPolicy final: public VPLAccelerationPolicy +struct GAPI_EXPORTS VPLDX11AccelerationPolicy final: public VPLAccelerationPolicy { // GAPI_EXPORTS for tests - GAPI_EXPORTS VPLDX11AccelerationPolicy(); - GAPI_EXPORTS ~VPLDX11AccelerationPolicy(); + VPLDX11AccelerationPolicy(); + ~VPLDX11AccelerationPolicy(); + + using pool_t = CachedPool; - GAPI_EXPORTS void init(session_t session) override; - GAPI_EXPORTS void deinit(session_t session) override; - GAPI_EXPORTS pool_key_t create_surface_pool(size_t pool_size, size_t surface_size_bytes, surface_ptr_ctr_t creator) override; - GAPI_EXPORTS surface_weak_ptr_t get_free_surface(pool_key_t key) override; - GAPI_EXPORTS size_t get_free_surface_count(pool_key_t key) const override; - GAPI_EXPORTS size_t get_surface_count(pool_key_t key) const override; + AccelType get_accel_type() const override; + void init(session_t session) override; + void deinit(session_t session) override; + pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, + mfxVideoParam& param) override; + surface_weak_ptr_t get_free_surface(pool_key_t key) override; + size_t get_free_surface_count(pool_key_t key) const override; + size_t get_surface_count(pool_key_t key) const override; - GAPI_EXPORTS cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) override; + cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, + mfxFrameSurface1* surface) override; private: ID3D11Device *hw_handle; + ID3D11DeviceContext* device_context; + + mfxFrameAllocator allocator; + static mfxStatus MFX_CDECL alloc_cb(mfxHDL pthis, + mfxFrameAllocRequest *request, + mfxFrameAllocResponse *response); + static mfxStatus MFX_CDECL lock_cb(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr); + static mfxStatus MFX_CDECL unlock_cb(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr); + static mfxStatus MFX_CDECL get_hdl_cb(mfxHDL pthis, mfxMemId mid, mfxHDL *handle); + static mfxStatus MFX_CDECL free_cb(mfxHDL pthis, mfxFrameAllocResponse *response); + + virtual mfxStatus on_alloc(const mfxFrameAllocRequest *request, + mfxFrameAllocResponse *response); + static mfxStatus on_lock(mfxMemId mid, mfxFrameData *ptr); + static mfxStatus on_unlock(mfxMemId mid, mfxFrameData *ptr); + virtual mfxStatus on_get_hdl(mfxMemId mid, mfxHDL *handle); + virtual mfxStatus on_free(mfxFrameAllocResponse *response); + + using alloc_id_t = mfxU32; + using allocation_t = std::shared_ptr; + std::map allocation_table; -#ifdef CPU_ACCEL_ADAPTER - std::unique_ptr adapter; -#endif + std::map pool_table; }; } // namespace wip } // namespace gapi } // namespace cv +#undef NOMINMAX #endif // HAVE_D3D11 #endif // HAVE_DIRECTX diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp index 87b1246d257e..226db99f51b5 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp @@ -35,6 +35,12 @@ struct VPLAccelerationPolicy size_t out_buf_ptr_offset, size_t out_buf_ptr_size)>; + enum class AccelType { + CPU, + GPU + }; + + virtual AccelType get_accel_type() const = 0; virtual void init(session_t session) = 0; virtual void deinit(session_t session) = 0; @@ -42,7 +48,7 @@ struct VPLAccelerationPolicy // for existing workspace in existing pool (see realloc) // thus it is not implemented, // PLEASE provide initial memory area large enough - virtual pool_key_t create_surface_pool(size_t pool_size, size_t surface_size_bytes, surface_ptr_ctr_t creator) = 0; + virtual pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) = 0; virtual surface_weak_ptr_t get_free_surface(pool_key_t key) = 0; virtual size_t get_free_surface_count(pool_key_t key) const = 0; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp new file mode 100644 index 000000000000..0702a2fc7bd6 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp @@ -0,0 +1,387 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp" +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" +#include "logger.hpp" + +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 +#pragma comment(lib,"d3d11.lib") + +#define D3D11_NO_HELPERS +#define NOMINMAX +#include +#include +#include +#include "opencv2/core/directx.hpp" +#ifdef HAVE_OPENCL +#include +#endif // HAVE_OPENCL +#undef D3D11_NO_HELPERS +#undef NOMINMAX + +namespace cv { +namespace gapi { +namespace wip { + +LockAdapter::LockAdapter(mfxFrameAllocator origin_allocator) : + lockable_allocator(origin_allocator), + impl() { + GAPI_DbgAssert((lockable_allocator.Lock && lockable_allocator.Unlock) && + "Cannot create LockAdapter for empty origin allocator"); + + // abandon unusable c-allocator interfaces + // because LockAdapter requires Lock & Unlock only + lockable_allocator.Alloc = nullptr; + lockable_allocator.Free = nullptr; + lockable_allocator.pthis = nullptr; +} + +size_t LockAdapter::read_lock(mfxMemId mid, mfxFrameData &data) { + size_t prev_lock_count = 0; + if (impl) { + prev_lock_count = impl->shared_lock(); + } + + // dispatch to VPL allocator using READ access mode + mfxStatus sts = MFX_ERR_LOCK_MEMORY; + try { + sts = lockable_allocator.Lock(nullptr, mid, &data); + } catch(...) { + } + + // adapter will throw error if VPL frame allocator fails + if (sts != MFX_ERR_NONE) { + impl->unlock_shared(); + GAPI_Assert(false && "Cannot lock frame on READ using VPL allocator"); + } + + return prev_lock_count; +} + +size_t LockAdapter::unlock_read(mfxMemId mid, mfxFrameData &data) { + GAPI_DbgAssert(!impl || !is_write_acquired() && + "Reject `unlock_read` in `write_lock` state"); + lockable_allocator.Unlock(nullptr, mid, &data); + return impl ? impl->unlock_shared() : 0; +} + +void LockAdapter::write_lock(mfxMemId mid, mfxFrameData &data) { + if (impl) { + // TODO consider using `try_lock` in loop with limited iteration count + // to prevent dead-lock with WARN at least notification + impl->lock(); + } + + // dispatch to VPL allocator using READ access mode + mfxStatus sts = MFX_ERR_LOCK_MEMORY; + try { + sts = lockable_allocator.Lock(nullptr, mid, &data); + } catch(...) { + } + + // adapter will throw error if VPL frame allocator fails + if (sts != MFX_ERR_NONE) { + impl->unlock(); + GAPI_Assert(false && "Cannot lock frame on WRITE using VPL allocator"); + } +} + +bool LockAdapter::is_write_acquired() { + if(!impl) return true; + return impl->owns(); +} + +void LockAdapter::unlock_write(mfxMemId mid, mfxFrameData &data) { + GAPI_DbgAssert(is_write_acquired() && + "Reject `unlock_write` for unlocked state"); + lockable_allocator.Unlock(nullptr, mid, &data); + if (impl) { + impl->unlock(); + } +} + +SharedLock* LockAdapter::set_adaptee(SharedLock* new_impl) { + SharedLock* old_impl = impl; + impl = new_impl; + return old_impl; +} + +SharedLock* LockAdapter::get_adaptee() { + return impl; +} + +DX11AllocationItem::DX11AllocationItem(std::weak_ptr parent, + CComPtr origin_ctx, + mfxFrameAllocator origin_allocator, + CComPtr tex_ptr, + subresource_id_t subtex_id, + CComPtr staging_tex_ptr) : + LockAdapter(origin_allocator), + shared_device_context(origin_ctx), + texture_ptr(tex_ptr), + subresource_id(subtex_id), + staging_texture_ptr(staging_tex_ptr), + observer(parent) { + GAPI_DbgAssert(texture_ptr && + "Cannot create DX11AllocationItem for empty texture"); + GAPI_DbgAssert(staging_tex_ptr && + "Cannot create DX11AllocationItem for empty staging texture"); + GAPI_DbgAssert(observer.lock() && + "Cannot create DX11AllocationItem for empty parent"); +} + +DX11AllocationItem::~DX11AllocationItem() { + release(); + observer.reset(); +} + +void DX11AllocationItem::release() { + auto parent = observer.lock(); + GAPI_LOG_DEBUG(nullptr, "texture: " << texture_ptr << + ", subresource id: " << subresource_id << + ", parent: " << parent.get()); + cv::util::suppress_unused_warning(parent); +} + +CComPtr DX11AllocationItem::get_texture() { + return texture_ptr; +} + +CComPtr DX11AllocationItem::get_staging_texture() { + return staging_texture_ptr; +} + +DX11AllocationItem::subresource_id_t DX11AllocationItem::get_subresource() const { + return subresource_id; +} + +CComPtr DX11AllocationItem::get_device_ctx() { + return shared_device_context; +} + +void DX11AllocationItem::on_first_in_impl(mfxFrameData *ptr) { + D3D11_MAP mapType = D3D11_MAP_READ; + UINT mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; + + shared_device_context->CopySubresourceRegion(get_staging_texture(), 0, + 0, 0, 0, + get_texture(), get_subresource(), + nullptr); + HRESULT err = S_OK; + D3D11_MAPPED_SUBRESOURCE lockedRect {}; + do { + err = shared_device_context->Map(get_staging_texture(), 0, mapType, mapFlags, &lockedRect); + if (S_OK != err && DXGI_ERROR_WAS_STILL_DRAWING != err) { + GAPI_LOG_WARNING(nullptr, "Cannot Map staging texture in device context, error: " << std::to_string(HRESULT_CODE(err))); + GAPI_Assert(false && "Cannot Map staging texture in device context"); + } + } while (DXGI_ERROR_WAS_STILL_DRAWING == err); + + if (FAILED(err)) { + GAPI_LOG_WARNING(nullptr, "Cannot lock frame"); + GAPI_Assert(false && "Cannot lock frame"); + return ; + } + + D3D11_TEXTURE2D_DESC desc {}; + get_texture()->GetDesc(&desc); + switch (desc.Format) { + case DXGI_FORMAT_NV12: + ptr->Pitch = (mfxU16)lockedRect.RowPitch; + ptr->Y = (mfxU8 *)lockedRect.pData; + ptr->UV = (mfxU8 *)lockedRect.pData + desc.Height * lockedRect.RowPitch; + + GAPI_Assert(ptr->Y && ptr->UV/* && ptr->V */&& "DXGI_FORMAT_NV12 locked frame data is nullptr"); + break; + default: + GAPI_LOG_WARNING(nullptr, "Unknown DXGI format: " << desc.Format); + return; + } +} + +void DX11AllocationItem::on_last_out_impl(mfxFrameData *ptr) { + shared_device_context->Unmap(get_staging_texture(), 0); + if (ptr) { + ptr->Pitch = 0; + ptr->U = ptr->V = ptr->Y = 0; + ptr->A = ptr->R = ptr->G = ptr->B = 0; + } +} + +mfxStatus DX11AllocationItem::acquire_access(mfxFrameData *ptr) { + if (is_write_acquired()) { + return exclusive_access_acquire_unsafe(ptr); + } + return shared_access_acquire_unsafe(ptr); +} + +mfxStatus DX11AllocationItem::release_access(mfxFrameData *ptr) { + if (is_write_acquired()) { + return exclusive_access_release_unsafe(ptr); + } + return shared_access_release_unsafe(ptr); +} + +mfxStatus DX11AllocationItem::shared_access_acquire_unsafe(mfxFrameData *ptr) { + GAPI_LOG_DEBUG(nullptr, "acquire READ lock: " << this); + GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture() << + ", sub id: " << get_subresource()); + // shared access requires elastic barrier + // first-in visited thread uses resource mapping on host memory + // subsequent threads reuses mapped memory + // + // exclusive access is prohibited while any one shared access has been obtained + visit_in(ptr); + + if (!(ptr->Y && (ptr->UV || (ptr->U && ptr->V)))) { + GAPI_LOG_WARNING(nullptr, "No any data obtained: " << this); + return MFX_ERR_LOCK_MEMORY; + } + GAPI_LOG_DEBUG(nullptr, "READ access granted: " << this); + return MFX_ERR_NONE; +} + +mfxStatus DX11AllocationItem::shared_access_release_unsafe(mfxFrameData *ptr) { + GAPI_LOG_DEBUG(nullptr, "releasing READ lock: " << this); + GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture() << + ", sub id: " << get_subresource()); + // releasing shared access requires elastic barrier + // last-out thread must make memory unmapping then and only then no more + // read access is coming. If another read-access goes into critical section + // (or waiting for acees) we must drop off unmapping procedure + visit_out(ptr); + + GAPI_LOG_DEBUG(nullptr, "access on READ released: " << this); + return MFX_ERR_NONE; +} + +mfxStatus DX11AllocationItem::exclusive_access_acquire_unsafe(mfxFrameData *ptr) { + GAPI_LOG_DEBUG(nullptr, "acquire WRITE lock: " << this); + GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture() << + ", sub id: " << get_subresource()); + D3D11_MAP mapType = D3D11_MAP_WRITE; + UINT mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; + + HRESULT err = S_OK; + D3D11_MAPPED_SUBRESOURCE lockedRect {}; + do { + err = get_device_ctx()->Map(get_staging_texture(), 0, mapType, mapFlags, &lockedRect); + if (S_OK != err && DXGI_ERROR_WAS_STILL_DRAWING != err) { + GAPI_LOG_WARNING(nullptr, "Cannot Map staging texture in device context, error: " << std::to_string(HRESULT_CODE(err))); + return MFX_ERR_LOCK_MEMORY; + } + } while (DXGI_ERROR_WAS_STILL_DRAWING == err); + + if (FAILED(err)) { + GAPI_LOG_WARNING(nullptr, "Cannot lock frame"); + return MFX_ERR_LOCK_MEMORY; + } + + D3D11_TEXTURE2D_DESC desc {}; + get_texture()->GetDesc(&desc); + switch (desc.Format) { + case DXGI_FORMAT_NV12: + ptr->Pitch = (mfxU16)lockedRect.RowPitch; + ptr->Y = (mfxU8 *)lockedRect.pData; + ptr->UV = (mfxU8 *)lockedRect.pData + desc.Height * lockedRect.RowPitch; + if (!ptr->Y || !ptr->UV) { + GAPI_LOG_WARNING(nullptr, "DXGI_FORMAT_NV12 locked frame data is nullptr"); + return MFX_ERR_LOCK_MEMORY; + } + break; + default: + GAPI_LOG_WARNING(nullptr, "Unknown DXGI format: " << desc.Format); + return MFX_ERR_LOCK_MEMORY; + } + + GAPI_LOG_DEBUG(nullptr, "WRITE access granted: " << this); + return MFX_ERR_NONE; +} + +mfxStatus DX11AllocationItem::exclusive_access_release_unsafe(mfxFrameData *ptr) { + GAPI_LOG_DEBUG(nullptr, "releasing WRITE lock: " << this); + GAPI_LOG_DEBUG(nullptr, "texture: " << get_texture() << + ", sub id: " << get_subresource()); + + get_device_ctx()->Unmap(get_staging_texture(), 0); + + get_device_ctx()->CopySubresourceRegion(get_texture(), + get_subresource(), + 0, 0, 0, + get_staging_texture(), 0, + nullptr); + + if (ptr) { + ptr->Pitch = 0; + ptr->U = ptr->V = ptr->Y = 0; + ptr->A = ptr->R = ptr->G = ptr->B = 0; + } + GAPI_LOG_DEBUG(nullptr, "access on WRITE released: " << this); + return MFX_ERR_NONE; +} + +DX11AllocationRecord::DX11AllocationRecord() = default; + +DX11AllocationRecord::~DX11AllocationRecord() { + GAPI_LOG_DEBUG(nullptr, "record: " << this << + ", subresources count: " << resources.size()); + + for (AllocationId id : resources) { + delete id; + } + resources.clear(); + + GAPI_LOG_DEBUG(nullptr, "release final referenced texture: " << texture_ptr); + if(texture_ptr) { + texture_ptr->Release(); + } +} + +void DX11AllocationRecord::init(unsigned int items, + CComPtr origin_ctx, + mfxFrameAllocator origin_allocator, + ID3D11Texture2D* texture, + std::vector &&staging_textures) { + GAPI_DbgAssert(items != 0 && "Cannot create DX11AllocationRecord with empty items"); + GAPI_DbgAssert(items == staging_textures.size() && "Allocation items count and staging size are not equal"); + GAPI_DbgAssert(origin_ctx && + "Cannot create DX11AllocationItem for empty origin_ctx"); + auto shared_allocator_copy = origin_allocator; + GAPI_DbgAssert((shared_allocator_copy.Lock && shared_allocator_copy.Unlock) && + "Cannot create DX11AllocationItem for empty origin allocator"); + + // abandon unusable c-allocator interfaces + shared_allocator_copy.Alloc = nullptr; + shared_allocator_copy.Free = nullptr; + shared_allocator_copy.pthis = nullptr; + + + GAPI_LOG_DEBUG(nullptr, "subresources count: " << items << ", text: " << texture) + resources.reserve(items); + texture_ptr = texture; // no AddRef here, because DX11AllocationRecord receive ownership it here + for(unsigned int i = 0; i < items; i++ ) { + resources.emplace_back(new DX11AllocationItem(get_ptr(), origin_ctx, shared_allocator_copy, + texture, i, staging_textures[i])); + } +} + +DX11AllocationRecord::Ptr DX11AllocationRecord::get_ptr() { + return shared_from_this(); +} + +DX11AllocationRecord::AllocationId* DX11AllocationRecord::data() { + return resources.data(); +} + +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_D3D11 +#endif // HAVE_DIRECTX +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp new file mode 100644 index 000000000000..5fff2672cbed --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp @@ -0,0 +1,138 @@ +#ifndef GAPI_STREAMING_ONEVPL_ACCEL_DX11_ALLOC_RESOURCE_HPP +#define GAPI_STREAMING_ONEVPL_ACCEL_DX11_ALLOC_RESOURCE_HPP + +#include + +#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#include + +#ifdef HAVE_ONEVPL +#include +#include "streaming/onevpl/accelerators/utils/elastic_barrier.hpp" + +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 +#pragma comment(lib,"d3d11.lib") + +#define D3D11_NO_HELPERS +#define NOMINMAX +#include +#include +#include +#include +#include "opencv2/core/directx.hpp" +#ifdef HAVE_OPENCL +#include +#endif // HAVE_OPENCL +#undef D3D11_NO_HELPERS +#undef NOMINMAX + +namespace cv { +namespace gapi { +namespace wip { + +class SharedLock; +struct GAPI_EXPORTS LockAdapter { + LockAdapter(mfxFrameAllocator origin_allocator); + + // GAPI_EXPORTS for tests + size_t read_lock(mfxMemId mid, mfxFrameData &data); + size_t unlock_read(mfxMemId mid, mfxFrameData &data); + + void write_lock(mfxMemId mid, mfxFrameData &data); + bool is_write_acquired(); + void unlock_write(mfxMemId mid, mfxFrameData &data); + + SharedLock* set_adaptee(SharedLock* new_impl); + SharedLock* get_adaptee(); +private: + LockAdapter(const LockAdapter&) = delete; + LockAdapter(LockAdapter&&) = delete; + LockAdapter& operator= (const LockAdapter&) = delete; + LockAdapter& operator= (LockAdapter&&) = delete; + + mfxFrameAllocator lockable_allocator; + SharedLock* impl; +}; + +struct DX11AllocationRecord; +struct DX11AllocationItem : public LockAdapter, + public elastic_barrier { + using subresource_id_t = unsigned int; + + friend struct DX11AllocationRecord; + friend class elastic_barrier; + ~DX11AllocationItem(); + + void release(); + CComPtr get_texture(); + CComPtr get_staging_texture(); + DX11AllocationItem::subresource_id_t get_subresource() const; + + CComPtr get_device_ctx(); + + // public transactional access to resources. + // implements dispatching through different access acquisition modes. + // current acquisition mode determined by `LockAdapter` with `is_write_acquired()` + mfxStatus acquire_access(mfxFrameData *ptr); + mfxStatus release_access(mfxFrameData *ptr); +private: + DX11AllocationItem(std::weak_ptr parent, + CComPtr origin_ctx, + mfxFrameAllocator origin_allocator, + CComPtr texture_ptr, + subresource_id_t subresource_id, + CComPtr staging_tex_ptr); + + // elastic barrier interface impl + void on_first_in_impl(mfxFrameData *ptr); + void on_last_out_impl(mfxFrameData *ptr); + + mfxStatus shared_access_acquire_unsafe(mfxFrameData *ptr); + mfxStatus shared_access_release_unsafe(mfxFrameData *ptr); + mfxStatus exclusive_access_acquire_unsafe(mfxFrameData *ptr); + mfxStatus exclusive_access_release_unsafe(mfxFrameData *ptr); + + CComPtr shared_device_context; + + CComPtr texture_ptr; + subresource_id_t subresource_id = 0; + CComPtr staging_texture_ptr; + std::weak_ptr observer; +}; + +struct DX11AllocationRecord : public std::enable_shared_from_this { + + using Ptr = std::shared_ptr; + + ~DX11AllocationRecord(); + + template + static Ptr create(Args&& ...args) { + std::shared_ptr record(new DX11AllocationRecord); + record->init(std::forward(args)...); + return record; + } + + Ptr get_ptr(); + + // Raw ptr is required as a part of VPL `Mid` c-interface + // which requires contiguous memory + using AllocationId = DX11AllocationItem*; + AllocationId* data(); +private: + DX11AllocationRecord(); + void init(unsigned int items, CComPtr origin_ctx, + mfxFrameAllocator origin_allocator, + ID3D11Texture2D* texture, std::vector &&staging_textures); + + std::vector resources; + ID3D11Texture2D* texture_ptr = nullptr; +}; +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_D3D11 +#endif // HAVE_DIRECTX +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONEVPL_ACCEL_DX11_ALLOC_RESOURCE_HPP diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp new file mode 100644 index 000000000000..5319a24eae5f --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp @@ -0,0 +1,227 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include "streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "logger.hpp" + +#ifdef HAVE_ONEVPL + +#if (MFX_VERSION >= 2000) +#include +#endif + +#include + +namespace cv { +namespace gapi { +namespace wip { + + +void lock_mid(mfxMemId mid, mfxFrameData &data, MediaFrame::Access mode) { + LockAdapter* alloc_data = reinterpret_cast(mid); + if (mode == MediaFrame::Access::R) { + alloc_data->read_lock(mid, data); + } else { + alloc_data->write_lock(mid, data); + } +} + +void unlock_mid(mfxMemId mid, mfxFrameData &data, MediaFrame::Access mode) { + LockAdapter* alloc_data = reinterpret_cast(data.MemId); + if (mode == MediaFrame::Access::R) { + alloc_data->unlock_read(mid, data); + } else { + alloc_data->unlock_write(mid, data); + } +} + +VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr surface): + parent_surface_ptr(surface) { + + GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); + parent_surface_ptr->obtain_lock(); + + + const Surface::info_t& info = parent_surface_ptr->get_info(); + Surface::data_t& data = parent_surface_ptr->get_data(); + + LockAdapter* alloc_data = reinterpret_cast(data.MemId); + alloc_data->set_adaptee(this); + + GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << + ", w: " << info.Width << ", h: " << info.Height << + ", p: " << data.Pitch); +} + +VPLMediaFrameDX11Adapter::~VPLMediaFrameDX11Adapter() { + + // Each VPLMediaFrameDX11Adapter releases mfx surface counter + // The last VPLMediaFrameDX11Adapter releases shared Surface pointer + // The last surface pointer releases workspace memory + Surface::data_t& data = parent_surface_ptr->get_data(); + LockAdapter* alloc_data = reinterpret_cast(data.MemId); + alloc_data->set_adaptee(nullptr); + + parent_surface_ptr->release_lock(); +} + +cv::GFrameDesc VPLMediaFrameDX11Adapter::meta() const { + GFrameDesc desc; + const Surface::info_t& info = parent_surface_ptr->get_info(); + switch(info.FourCC) + { + case MFX_FOURCC_I420: + throw std::runtime_error("MediaFrame doesn't support I420 type"); + break; + case MFX_FOURCC_NV12: + desc.fmt = MediaFormat::NV12; + break; + default: + throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); + } + + desc.size = cv::Size{info.Width, info.Height}; + return desc; +} + +MediaFrame::View VPLMediaFrameDX11Adapter::access(MediaFrame::Access mode) { + + Surface::data_t& data = parent_surface_ptr->get_data(); + const Surface::info_t& info = parent_surface_ptr->get_info(); + void* frame_id = reinterpret_cast(this); + + GAPI_LOG_DEBUG(nullptr, "START lock frame in surface: " << parent_surface_ptr->get_handle() << + ", frame id: " << frame_id); + + // lock MT + lock_mid(data.MemId, data, mode); + + GAPI_LOG_DEBUG(nullptr, "FINISH lock frame in surface: " << parent_surface_ptr->get_handle() << + ", frame id: " << frame_id); + using stride_t = typename cv::MediaFrame::View::Strides::value_type; + stride_t pitch = static_cast(data.Pitch); + + //TODO + auto parent_surface_ptr_copy = parent_surface_ptr; + switch(info.FourCC) { + case MFX_FOURCC_I420: + { + GAPI_Assert(data.Y && data.U && data.V && "MFX_FOURCC_I420 frame data is nullptr"); + cv::MediaFrame::View::Ptrs pp = { + data.Y, + data.U, + data.V, + nullptr + }; + cv::MediaFrame::View::Strides ss = { + pitch, + pitch / 2, + pitch / 2, 0u + }; + return cv::MediaFrame::View(std::move(pp), std::move(ss), + [parent_surface_ptr_copy, + frame_id, mode] () { + parent_surface_ptr_copy->obtain_lock(); + + auto& data = parent_surface_ptr_copy->get_data(); + GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + unlock_mid(data.MemId, data, mode); + + GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + + parent_surface_ptr_copy->release_lock(); + }); + } + case MFX_FOURCC_NV12: + { + if (!data.Y || !data.UV) { + GAPI_LOG_WARNING(nullptr, "Empty data detected!!! for surface: " << parent_surface_ptr->get_handle() << + ", frame id: " << frame_id); + } + GAPI_Assert(data.Y && data.UV && "MFX_FOURCC_NV12 frame data is nullptr"); + cv::MediaFrame::View::Ptrs pp = { + data.Y, + data.UV, nullptr, nullptr + }; + cv::MediaFrame::View::Strides ss = { + pitch, + pitch, 0u, 0u + }; + return cv::MediaFrame::View(std::move(pp), std::move(ss), + [parent_surface_ptr_copy, + frame_id, mode] () { + parent_surface_ptr_copy->obtain_lock(); + + auto& data = parent_surface_ptr_copy->get_data(); + GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + unlock_mid(data.MemId, data, mode); + + GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + parent_surface_ptr_copy->release_lock(); + }); + } + break; + default: + throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); + } +} + +cv::util::any VPLMediaFrameDX11Adapter::blobParams() const { + GAPI_Assert("VPLMediaFrameDX11Adapter::blobParams() is not implemented"); + return {}; +} + +void VPLMediaFrameDX11Adapter::serialize(cv::gapi::s11n::IOStream&) { + GAPI_Assert("VPLMediaFrameDX11Adapter::serialize() is not implemented"); +} + +void VPLMediaFrameDX11Adapter::deserialize(cv::gapi::s11n::IIStream&) { + GAPI_Assert("VPLMediaFrameDX11Adapter::deserialize() is not implemented"); +} + +DXGI_FORMAT VPLMediaFrameDX11Adapter::get_dx11_color_format(uint32_t mfx_fourcc) { + switch (mfx_fourcc) { + case MFX_FOURCC_NV12: + return DXGI_FORMAT_NV12; + + case MFX_FOURCC_YUY2: + return DXGI_FORMAT_YUY2; + + case MFX_FOURCC_RGB4: + return DXGI_FORMAT_B8G8R8A8_UNORM; + + case MFX_FOURCC_P8: + case MFX_FOURCC_P8_TEXTURE: + return DXGI_FORMAT_P8; + + case MFX_FOURCC_ARGB16: + case MFX_FOURCC_ABGR16: + return DXGI_FORMAT_R16G16B16A16_UNORM; + + case MFX_FOURCC_P010: + return DXGI_FORMAT_P010; + + case MFX_FOURCC_A2RGB10: + return DXGI_FORMAT_R10G10B10A2_UNORM; + + case DXGI_FORMAT_AYUV: + case MFX_FOURCC_AYUV: + return DXGI_FORMAT_AYUV; + + default: + return DXGI_FORMAT_UNKNOWN; + } +} +} // namespace wip +} // namespace gapi +} // namespace cv +#endif HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp new file mode 100644 index 000000000000..f99212841403 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp @@ -0,0 +1,64 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_DX11_FRAME_ADAPTER_HPP +#define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_DX11_FRAME_ADAPTER_HPP +#include + +#include +#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS + +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" +#ifdef HAVE_ONEVPL +#if (MFX_VERSION >= 2000) +#include +#endif + +#include + +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 + #define D3D11_NO_HELPERS + #define NOMINMAX + #include + #include + #include "opencv2/core/directx.hpp" + #ifdef HAVE_OPENCL + #include + #endif + +namespace cv { +namespace gapi { +namespace wip { + +class Surface; +class VPLMediaFrameDX11Adapter final: public cv::MediaFrame::IAdapter, + public SharedLock { +public: + // GAPI_EXPORTS for tests + GAPI_EXPORTS VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface); + GAPI_EXPORTS ~VPLMediaFrameDX11Adapter(); + cv::GFrameDesc meta() const override; + MediaFrame::View access(MediaFrame::Access) override; + + // The default implementation does nothing + cv::util::any blobParams() const override; + void serialize(cv::gapi::s11n::IOStream&) override; + void deserialize(cv::gapi::s11n::IIStream&) override; + + static DXGI_FORMAT get_dx11_color_format(uint32_t mfx_fourcc); +private: + std::shared_ptr parent_surface_ptr; + mfxFrameAllocator allocator; +}; +} // namespace wip +} // namespace gapi +} // namespace cv +#undef NOMINMAX +#endif // HAVE_D3D11 +#endif // HAVE_DIRECTX +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_DX11_FRAME_ADAPTER_HPP diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.cpp index 5d315412e1be..970305105652 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.cpp @@ -19,14 +19,14 @@ Surface::Surface(std::unique_ptr&& surf, std::shared_ptr associa mirrored_locked_count() { GAPI_Assert(mfx_surface && "Surface is nullptr"); - mirrored_locked_count.store(mfx_surface->Data.Locked); GAPI_LOG_DEBUG(nullptr, "create surface: " << mfx_surface << ", locked count: " << mfx_surface->Data.Locked); } Surface::~Surface() { GAPI_LOG_DEBUG(nullptr, "destroy surface: " << mfx_surface << - ", worspace memory counter: " << workspace_memory_ptr.use_count()); + ", worspace memory counter: " << + workspace_memory_ptr.use_count()); } std::shared_ptr Surface::create_surface(std::unique_ptr&& surf, @@ -47,14 +47,16 @@ const Surface::data_t& Surface::get_data() const { return mfx_surface->Data; } +Surface::data_t& Surface::get_data() { + return const_cast(static_cast(this)->get_data()); +} + size_t Surface::get_locks_count() const { - return mirrored_locked_count.load(); + return mirrored_locked_count.load() + mfx_surface->Data.Locked; } size_t Surface::obtain_lock() { size_t locked_count = mirrored_locked_count.fetch_add(1); - GAPI_Assert(locked_count < std::numeric_limits::max() && "Too many references "); - mfx_surface->Data.Locked = static_cast(locked_count + 1); GAPI_LOG_DEBUG(nullptr, "surface: " << mfx_surface.get() << ", locked times: " << locked_count + 1); return locked_count; // return preceding value @@ -62,9 +64,7 @@ size_t Surface::obtain_lock() { size_t Surface::release_lock() { size_t locked_count = mirrored_locked_count.fetch_sub(1); - GAPI_Assert(locked_count < std::numeric_limits::max() && "Too many references "); GAPI_Assert(locked_count && "Surface lock counter is invalid"); - mfx_surface->Data.Locked = static_cast(locked_count - 1); GAPI_LOG_DEBUG(nullptr, "surface: " << mfx_surface.get() << ", locked times: " << locked_count - 1); return locked_count; // return preceding value diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp index fbb790c97109..dddb80e619b7 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp @@ -25,12 +25,8 @@ namespace gapi { namespace wip { class Surface { - using handle_t = mfxFrameSurface1; - - std::shared_ptr workspace_memory_ptr; - std::unique_ptr mfx_surface; - std::atomic mirrored_locked_count; public: + using handle_t = mfxFrameSurface1; using info_t = mfxFrameInfo; using data_t = mfxFrameData; @@ -42,6 +38,7 @@ class Surface { GAPI_EXPORTS handle_t* get_handle() const; GAPI_EXPORTS const info_t& get_info() const; GAPI_EXPORTS const data_t& get_data() const; + GAPI_EXPORTS data_t& get_data(); GAPI_EXPORTS size_t get_locks_count() const; @@ -50,6 +47,11 @@ class Surface { GAPI_EXPORTS size_t release_lock(); private: Surface(std::unique_ptr&& surf, std::shared_ptr accociated_memory); + + std::shared_ptr workspace_memory_ptr; + std::unique_ptr mfx_surface; + std::atomic mirrored_locked_count; + }; using surface_ptr_t = std::shared_ptr; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp b/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp new file mode 100644 index 000000000000..604d2ca6ec16 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp @@ -0,0 +1,316 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_UTILS_ELASTIC_BARRIER_HPP +#define GAPI_STREAMING_ONEVPL_ACCELERATORS_UTILS_ELASTIC_BARRIER_HPP +#include + +namespace cv { +namespace gapi { +namespace wip { + +template +class elastic_barrier { +public: + using self_t = Impl; + elastic_barrier() : + incoming_requests(), + outgoing_requests(), + pending_requests(), + reinit(false) { + } + + self_t* get_self() { + return static_cast(this); + } + + template + void visit_in (Args&& ...args) { + on_lock(std::forward(args)...); + } + + template + void visit_out (Args&& ...args) { + on_unlock(std::forward(args)...); + } + +protected: + ~elastic_barrier() = default; + +private: + std::atomic incoming_requests; + std::atomic outgoing_requests; + std::atomic pending_requests; + std::atomic reinit; + + template + void on_first_in(Args&& ...args) { + get_self()->on_first_in_impl(std::forward(args)...); + } + + template + void on_last_out(Args&& ...args) { + get_self()->on_last_out_impl(std::forward(args)...); + } + + template + void on_lock(Args&& ...args) { + // Read access is more complex + // each `incoming` request must check in before acquire resource + size_t thread_id = incoming_requests.fetch_add(1); + if (thread_id == 0) { + /* + * only one `incoming` request is allowable to init resource + * at first time + * let's filter out the first one by `thread_id` + * + * The first one `incoming` request becomes main `incoming` request + * */ + if (outgoing_requests.load() == 0) { + get_self()->on_first_in(std::forward(args)...); + /* + * The main `incoming` request finished resource initialization + * and became `outgoing` + * + * Non empty `outgoing` count means that + * other further `incoming` (or busy-wait) requests + * are getting on with its job without resource initialization, + * because main `incoming` request has already initialized it at here + * */ + outgoing_requests.fetch_add(1); + /*GAPI_Assert(ptr->Y && (ptr->UV || (ptr->U && ptr->V)) && + "on_lock: data must be correct after first init"); + */ + return; + } + GAPI_Assert(false && "Unknown state"); + return; + } else { + /* + * CASE 1) + * + * busy wait for others `incoming` requests for resource initialization + * besides main `incoming` request which are getting on + * resource initialization at this point + * + * */ + + // OR + + /* + * CASE 2) + * + * busy wait for ALL `incoming` request for resource initialization + * including main `incoming` request. It will happen if + * new `incoming` requests had came here while resource was getting on deinit + * in `on_unlock` in another processing thread. + * In this case no actual main `incoming` request is available and + * all `incoming` requests must be in busy-wait stare + * + * */ + + // Each `incoming` request became `busy-wait` request + size_t busy_thread_id = pending_requests.fetch_add(1); + + /* + * CASE 1) + * + * Non empty `outgoing` requests count means that other further `incoming` or + * `busy-wait` request are getting on with its job + * without resource initialization because + * main thread has already initialized it at here + * */ + while (outgoing_requests.load() == 0) { + + // OR + + /* + * CASE 2) + * + * In case of NO master `incoming `request is available and doesn't + * provide resource initialization. All `incoming` requests must be in + * busy-wait state. + * If it is not true then CASE 1) is going on + * + * OR + * + * `on_unlock` is in deinitialization phase in another thread. + * Both cases mean busy-wait state here + * */ + if (pending_requests.load() == incoming_requests.load()) { + /* + * CASE 2) ONLY + * + * It will happen if 'on_unlock` in another thread + * finishes its execution only + * + * `on_unlock` in another thread might finished with either + * deinitialization action or without deinitialization action + * (the call off deinitialization case) + * + * We must not continue at here (without reinit) + * if deinitialization happens in `on_unlock` in another thread. + * So try it on + * */ + + // only single `busy-wait` request must make sure about possible + // deinitialization. So first `busy-wait` request becomes + // main `busy-wait` request + if (busy_thread_id == 0) { + bool expected_reinit = true; + if (!reinit.compare_exchange_strong(expected_reinit, false)) { + /* + * deinitialization called off in `on_unlock` + * because new `incoming` request had appeared at here before + * `on_unlock` started deinit procedure in another thread. + * So no reinit required because no deinit had happended + * + * main `busy-wait` request must break busy-wait state + * and become `outgoing` request. + * Non empty `outgoing` count means that other + * further `incoming` requests or + * `busy-wait` requests are getting on with its job + * without resource initialization/reinitialization + * because no deinit happened in `on_unlock` + * in another thread + * */ + break; //just quit busy loop + } else { + /* Deinitialization had happened in `on_unlock` + * in another thread right before + * new `incoming` requests appeared. + * So main `busy-wait` request must start reinit procedure + */ + get_self()->on_first_in(std::forward(args)...); + + /* + * Main `busy-wait` request has finished reinit procedure + * and becomes `outgong` request. + * Non empty `outgoing` count means that other + * further `incoming` requests or + * `busy-wait` requests are getting on with its job + * without resource initialization because + * main `busy-wait` request + * has already re-initialized it at here + */ + outgoing_requests.fetch_add(1); + pending_requests.fetch_sub(1); + /* + GAPI_LOG_DEBUG(nullptr, "REcharged, data: " << data); + GAPI_Assert(ptr->Y && (ptr->UV || (ptr->U && ptr->V)) && + "on_lock: data must be correct after subsequent reinit"); + */ + return; + } + } + } + } + + // All non main requests became `outgoing` and look at on initialized resource + outgoing_requests++; + + // Each `busy-wait` request are not busy-wait now + pending_requests.fetch_sub(1); + } + + /* + GAPI_Assert(ptr->Y && (ptr->UV || (ptr->U && ptr->V)) && + "on_lock: data must exist for charging `outgoing_requests`"); + */ + return; + } + + template + void on_unlock(Args&& ...args) { + // Read unlock + /* + * Each released `outgoing` request checks out to doesn't use resource anymore. + * The last `outgoing` request becomes main `outgoing` request and + * must deinitialize resource if no `incoming` or `busy-wait` requests + * are waiting for it + */ + size_t thread_id = outgoing_requests.fetch_sub(1); + if (thread_id == 1) { + /* + * Make sure that no another `incoming` (including `busy-wait) + * exists. + * But beforehand its must make sure that no `incoming` or `pending` + * requests are exist. + * + * The main `outgoing` request is an one of `incoming` request + * (it is the oldest one in the current `incoming` bunch) and still + * holds resource in initialized state (thus we compare with 1). + * We must not deinitialize resource before decrease + * `incoming` requests counter because + * after it has got 0 value in `on_lock` another thread + * will start initialize resource procedure which will get conflict + * with current deinitialize procedure + * + * From this point, all `on_lock` request in another thread would + * become `busy-wait` without reaching main `incoming` state (CASE 2) + * */ + if (incoming_requests.load() == 1) { + /* + * The main `outgoing` request is ready to deinit shared resource + * in unconflicting manner. + * + * This is a critical section for single thread for main `outgoing` + * request + * + * CASE 2 only available in `on_lock` thread + * */ + get_self()->on_last_out(std::forward(args)...); + + /* + * Before main `outgoinq` request become released it must notify + * subsequent `busy-wait` requests in `on_lock` in another thread + * that main `busy-wait` must start reinit resource procedure + * */ + reinit.store(true); + /* + GAPI_Assert(!ptr->Y && !(ptr->UV || (ptr->U && ptr->V)) && + "on_unlock: data must be cleared after last `incoming_requests`"); +*/ + /* + * Deinitialize procedure is finished and main `outgoing` request + * (it is the oldest one in `incoming` request) must become released + * + * Right after when we decrease `incoming` counter + * the condition for equality + * `busy-wait` and `incoming` counter will become true (CASE 2 only) + * in `on_lock` in another threads. After that + * a main `busy-wait` request would check `reinit` condition + * */ + incoming_requests.fetch_sub(1); + return; + } + + /* + * At this point we have guarantee that new `incoming` requests + * had became increased in `on_lock` in another thread right before + * current thread deinitialize resource. + * + * So call off deinitialization procedure here + * */ + } + + /* + GAPI_Assert(ptr->Y && (ptr->UV || (ptr->U && ptr->V)) && + "on_unlock: data must exist till last `outgoing_requests`"); + */ + incoming_requests.fetch_sub(1); + } + + elastic_barrier(const elastic_barrier&) = delete; + elastic_barrier(elastic_barrier&&) = delete; + elastic_barrier& operator() (const elastic_barrier&) = delete; + elastic_barrier& operator() (elastic_barrier&&) = delete; +}; +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // GAPI_STREAMING_ONEVPL_ACCELERATORS_UTILS_ELASTIC_BARRIER_HPP diff --git a/modules/gapi/src/streaming/onevpl/accelerators/utils/shared_lock.cpp b/modules/gapi/src/streaming/onevpl/accelerators/utils/shared_lock.cpp new file mode 100644 index 000000000000..04d19f976f3d --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/utils/shared_lock.cpp @@ -0,0 +1,53 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" + + +namespace cv { +namespace gapi { +namespace wip { + +size_t SharedLock::shared_lock() { + int curr_value; + do { + do { + // acquire if no writer, multiple readers are allowed + curr_value = counter.load(); + } while (EXCLUSIVE_ACCESS == curr_value); + } while (!counter.compare_exchange_weak(curr_value, curr_value + 1)); + + //return prev value + return curr_value; +} + +size_t SharedLock::unlock_shared() { + return counter.fetch_sub(1); +} + +void SharedLock::lock() { + int curr_value; + do { + // acquire if no readers only + curr_value = 0; + } while (!counter.compare_exchange_weak(curr_value, EXCLUSIVE_ACCESS)); +} + +bool SharedLock::try_lock() { + int curr_value = 0; + return counter.compare_exchange_strong(curr_value, EXCLUSIVE_ACCESS); +} + +void SharedLock::unlock() { + counter.store(0); +} + +bool SharedLock::owns() const { + return (counter.load() == EXCLUSIVE_ACCESS); +} +} // namespace wip +} // namespace gapi +} // namespace cv diff --git a/modules/gapi/src/streaming/onevpl/accelerators/utils/shared_lock.hpp b/modules/gapi/src/streaming/onevpl/accelerators/utils/shared_lock.hpp new file mode 100644 index 000000000000..579fe0140988 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/utils/shared_lock.hpp @@ -0,0 +1,48 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_SHARED_LOCK_HPP +#define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_SHARED_LOCK_HPP + +#include +#include + +#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS + +namespace cv { +namespace gapi { +namespace wip { + +class GAPI_EXPORTS SharedLock { +public: + SharedLock() = default; + ~SharedLock() = default; + + size_t shared_lock(); + size_t unlock_shared(); + + void lock(); + bool try_lock(); + void unlock(); + + bool owns() const; +private: + enum { + EXCLUSIVE_ACCESS = -1 + }; + + SharedLock(const SharedLock&) = delete; + SharedLock& operator= (const SharedLock&) = delete; + SharedLock(SharedLock&&) = delete; + SharedLock& operator== (SharedLock&&) = delete; + + std::atomic counter; +}; +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_SHARED_LOCK_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp index e5fbe928bd87..bb828bf1a97a 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp @@ -12,103 +12,13 @@ #include "streaming/onevpl/accelerators/accel_policy_interface.hpp" #include "streaming/onevpl/accelerators/surface/surface.hpp" #include "streaming/onevpl/onevpl_utils.hpp" -//TODO #include "streaming/vpl/vpl_utils.hpp" +#include "streaming/onevpl/onevpl_cfg_params_parser.hpp" #include "logger.hpp" namespace cv { namespace gapi { namespace wip { -/* UTILS */ -mfxU32 GetSurfaceSize_(mfxU32 FourCC, mfxU32 width, mfxU32 height) { - mfxU32 nbytes = 0; - - mfxU32 half_width = width / 2; - mfxU32 half_height = height / 2; - switch (FourCC) { - case MFX_FOURCC_I420: - case MFX_FOURCC_NV12: - nbytes = width * height + 2 * half_width * half_height; - break; - case MFX_FOURCC_I010: - case MFX_FOURCC_P010: - nbytes = width * height + 2 * half_width * half_height; - nbytes *= 2; - break; - case MFX_FOURCC_RGB4: - nbytes = width * height * 4; - break; - default: - break; - } - - return nbytes; -} - -surface_ptr_t create_surface_RGB4_(mfxFrameInfo frameInfo, - std::shared_ptr out_buf_ptr, - size_t out_buf_ptr_offset, - size_t out_buf_size) -{ - mfxU8* buf = reinterpret_cast(out_buf_ptr.get()); - mfxU16 surfW = frameInfo.Width * 4; - mfxU16 surfH = frameInfo.Height; - (void)surfH; - - // TODO more intelligent check - if (out_buf_size <= out_buf_ptr_offset) { - GAPI_LOG_WARNING(nullptr, "Not enough buffer, ptr: " << out_buf_ptr << - ", size: " << out_buf_size << - ", offset: " << out_buf_ptr_offset << - ", W: " << surfW << - ", H: " << surfH); - GAPI_Assert(false && "Invalid offset"); - } - - std::unique_ptr handle(new mfxFrameSurface1); - memset(handle.get(), 0, sizeof(mfxFrameSurface1)); - - handle->Info = frameInfo; - handle->Data.B = buf + out_buf_ptr_offset; - handle->Data.G = handle->Data.B + 1; - handle->Data.R = handle->Data.B + 2; - handle->Data.A = handle->Data.B + 3; - handle->Data.Pitch = surfW; - - return Surface::create_surface(std::move(handle), out_buf_ptr); -} - -surface_ptr_t create_surface_other_(mfxFrameInfo frameInfo, - std::shared_ptr out_buf_ptr, - size_t out_buf_ptr_offset, - size_t out_buf_size) -{ - mfxU8* buf = reinterpret_cast(out_buf_ptr.get()); - mfxU16 surfH = frameInfo.Height; - mfxU16 surfW = (frameInfo.FourCC == MFX_FOURCC_P010) ? frameInfo.Width * 2 : frameInfo.Width; - - // TODO more intelligent check - if (out_buf_size <= - out_buf_ptr_offset + (surfW * surfH) + ((surfW / 2) * (surfH / 2))) { - GAPI_LOG_WARNING(nullptr, "Not enough buffer, ptr: " << out_buf_ptr << - ", size: " << out_buf_size << - ", offset: " << out_buf_ptr_offset << - ", W: " << surfW << - ", H: " << surfH); - GAPI_Assert(false && "Invalid offset"); - } - - std::unique_ptr handle(new mfxFrameSurface1); - memset(handle.get(), 0, sizeof(mfxFrameSurface1)); - - handle->Info = frameInfo; - handle->Data.Y = buf + out_buf_ptr_offset; - handle->Data.U = buf + out_buf_ptr_offset + (surfW * surfH); - handle->Data.V = handle->Data.U + ((surfW / 2) * (surfH / 2)); - handle->Data.Pitch = surfW; - - return Surface::create_surface(std::move(handle), out_buf_ptr); -} VPLLegacyDecodeEngineAsync::VPLLegacyDecodeEngineAsync(std::unique_ptr&& accel) : ProcessingEngineBase(std::move(accel)) { @@ -160,13 +70,14 @@ VPLLegacyDecodeEngineAsync::VPLLegacyDecodeEngineAsync(std::unique_ptr ExecutionStatus { LegacyDecodeSessionAsync& my_sess = static_cast(sess); + do { if (!my_sess.sync_queue.empty()) // FIFO: check the oldest async operation complete { LegacyDecodeSessionAsync::op_handle_t& pending_op = my_sess.sync_queue.front(); @@ -183,11 +95,14 @@ VPLLegacyDecodeEngineAsync::VPLLegacyDecodeEngineAsync(std::unique_ptr provider) +std::shared_ptr +VPLLegacyDecodeEngineAsync::initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) { - mfxFrameAllocRequest decRequest = {}; + GAPI_DbgAssert(provider && "Cannot create decoder, data provider is nullptr"); + + // Find codec ID from config + auto dec_it = std::find_if(cfg_params.begin(), cfg_params.end(), [] (const oneVPL_cfg_param& value) { + return value.get_name() == "mfxImplDescription.mfxDecoderDescription.decoder.CodecID"; + }); + if (dec_it == cfg_params.end()) { + throw std::logic_error("Cannot determine DecoderID from oneVPL config. Abort"); + } + + mfxVariant decoder = cfg_param_to_mfx_variant(*dec_it); + + // fill input bitstream + mfxBitstream bitstream{}; + const int BITSTREAM_BUFFER_SIZE = 2000000; + bitstream.MaxLength = BITSTREAM_BUFFER_SIZE; + bitstream.Data = (mfxU8 *)calloc(bitstream.MaxLength, sizeof(mfxU8)); + if(!bitstream.Data) { + throw std::runtime_error("Cannot allocate bitstream.Data bytes: " + + std::to_string(bitstream.MaxLength * sizeof(mfxU8))); + } + + bitstream.CodecId = decoder.Data.U32; + mfxStatus sts = ReadEncodedStream(bitstream, provider); + if(MFX_ERR_NONE != sts) { + throw std::runtime_error("Error reading bitstream, error: " + + mfxstatus_to_string(sts)); + } + + // init session + acceleration_policy->init(mfx_session); + + // Retrieve the frame information from input stream + mfxVideoParam mfxDecParams {}; + mfxDecParams.mfx.CodecId = decoder.Data.U32; + VPLAccelerationPolicy::AccelType accel_type = acceleration_policy->get_accel_type(); + if (accel_type == VPLAccelerationPolicy::AccelType::GPU) { + mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY; + } else { + mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_SYSTEM_MEMORY; + } + + sts = MFXVideoDECODE_DecodeHeader(mfx_session, &bitstream, &mfxDecParams); + if(MFX_ERR_NONE != sts) { + throw std::runtime_error("Error decoding header, error: " + + mfxstatus_to_string(sts)); + } + + mfxFrameAllocRequest decRequest {}; + // Query number required surfaces for decoder - MFXVideoDECODE_QueryIOSurf(mfx_session, &decoder_param.param, &decRequest); + MFXVideoDECODE_QueryIOSurf(mfx_session, &mfxDecParams, &decRequest); // External (application) allocation of decode surfaces GAPI_LOG_DEBUG(nullptr, "Query IOSurf for session: " << mfx_session << + ", mfxFrameAllocRequest.NumFrameMin: " << decRequest.NumFrameMin << ", mfxFrameAllocRequest.NumFrameSuggested: " << decRequest.NumFrameSuggested << ", mfxFrameAllocRequest.Type: " << decRequest.Type); - mfxU32 singleSurfaceSize = GetSurfaceSize_(decoder_param.param.mfx.FrameInfo.FourCC, - decoder_param.param.mfx.FrameInfo.Width, - decoder_param.param.mfx.FrameInfo.Height); - if (!singleSurfaceSize) { - throw std::runtime_error("Cannot determine surface size for: fourCC" + - std::to_string(decoder_param.param.mfx.FrameInfo.FourCC) + - ", width: " + std::to_string(decoder_param.param.mfx.FrameInfo.Width) + - ", height: " + std::to_string(decoder_param.param.mfx.FrameInfo.Height)); + VPLAccelerationPolicy::pool_key_t decode_pool_key = + acceleration_policy->create_surface_pool(decRequest, mfxDecParams); + + // Input parameters finished, now initialize decode + // create decoder for session accoring to header recovered from source file + sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); + if (MFX_ERR_NONE != sts) { + throw std::runtime_error("Error initializing Decode, error: " + + mfxstatus_to_string(sts)); } - const auto &frameInfo = decoder_param.param.mfx.FrameInfo; - auto surface_creator = - [&frameInfo] (std::shared_ptr out_buf_ptr, size_t out_buf_ptr_offset, - size_t out_buf_size) -> surface_ptr_t { - return (frameInfo.FourCC == MFX_FOURCC_RGB4) ? - create_surface_RGB4_(frameInfo, out_buf_ptr, out_buf_ptr_offset, - out_buf_size) : - create_surface_other_(frameInfo, out_buf_ptr, out_buf_ptr_offset, - out_buf_size);}; - - //TODO Configure preallocation size (how many frames we can hold) - const size_t preallocated_frames_count = 30; - VPLAccelerationPolicy::pool_key_t decode_pool_key = - acceleration_policy->create_surface_pool(decRequest.NumFrameSuggested * preallocated_frames_count, - singleSurfaceSize, - surface_creator); + DecoderParams decoder_param {bitstream, mfxDecParams}; // create session std::shared_ptr sess_ptr = @@ -247,6 +199,7 @@ void VPLLegacyDecodeEngineAsync::initialize_session(mfxSession mfx_session, sess_ptr->init_surface_pool(decode_pool_key); // prepare working decode surface sess_ptr->swap_surface(*this); + return sess_ptr; } ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngineAsync::execute_op(operation_t& op, EngineSession& sess) { @@ -280,6 +233,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngineAsync::process_error( } catch (const std::exception& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what() << "Abort"); + return ExecutionStatus::Continue; // read more data } } case MFX_ERR_MORE_DATA: // The function requires more bitstream at input before decoding can proceed @@ -301,6 +255,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngineAsync::process_error( } catch (const std::exception& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what() << "Abort"); + return ExecutionStatus::Continue; // read more data } break; } @@ -345,6 +300,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngineAsync::process_error( } catch (const std::exception& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what() << "Abort"); + return ExecutionStatus::Continue; } default: GAPI_LOG_WARNING(nullptr, "Unknown status code: " << mfxstatus_to_string(status) << diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp index db6ac8e53636..78140b291b69 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp @@ -31,8 +31,9 @@ class VPLLegacyDecodeEngineAsync : public ProcessingEngineBase { public: VPLLegacyDecodeEngineAsync(std::unique_ptr&& accel); - void initialize_session(mfxSession mfx_session, DecoderParams&& decoder_param, - std::shared_ptr provider) override; + std::shared_ptr initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) override; private: ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override; diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp index 7cb145d83df9..172e2a283571 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp @@ -51,6 +51,7 @@ void LegacyDecodeSessionAsync::swap_surface(VPLLegacyDecodeEngineAsync& engine) } catch (const std::exception& ex) { GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what() << "Abort"); + throw ; } } @@ -69,6 +70,10 @@ Data::Meta LegacyDecodeSessionAsync::generate_frame_meta() { }; return meta; } + +const mfxVideoParam& LegacyDecodeSessionAsync::get_video_param() const { + return mfx_decoder_param; +} } // namespace wip } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp index 6dc7752a7199..b92554fea6eb 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp @@ -37,11 +37,11 @@ class LegacyDecodeSessionAsync : public EngineSession { void swap_surface(VPLLegacyDecodeEngineAsync& engine); void init_surface_pool(VPLAccelerationPolicy::pool_key_t key); - mfxVideoParam mfx_decoder_param; - std::shared_ptr data_provider; - Data::Meta generate_frame_meta(); + const mfxVideoParam& get_video_param() const override; private: + mfxVideoParam mfx_decoder_param; + std::shared_ptr data_provider; VPLAccelerationPolicy::pool_key_t decoder_pool_id; mfxFrameAllocRequest request; diff --git a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp index 2a1dcb75661f..533340e958f1 100644 --- a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp @@ -38,6 +38,8 @@ struct GAPI_EXPORTS EngineSession { EngineSession(mfxSession sess, mfxBitstream&& str); std::string error_code_to_str() const; virtual ~EngineSession(); + + virtual const mfxVideoParam& get_video_param() const = 0; }; } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp index c0fa33aa9aa8..404b4d1b1e5f 100644 --- a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp @@ -18,7 +18,8 @@ ProcessingEngineBase::ProcessingEngineBase(std::unique_ptr #include "streaming/onevpl/engine/engine_session.hpp" +#include #include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS namespace cv { @@ -45,9 +46,9 @@ class GAPI_EXPORTS ProcessingEngineBase { ProcessingEngineBase(std::unique_ptr&& accel); virtual ~ProcessingEngineBase(); - virtual void initialize_session(mfxSession mfx_session, - DecoderParams&& decoder_param, - std::shared_ptr provider) = 0; + virtual std::shared_ptr initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) = 0; ExecutionStatus process(mfxSession session); size_t get_ready_frames_count() const; diff --git a/modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp b/modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp index 85c3c8077941..ef81721667a8 100644 --- a/modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp +++ b/modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp @@ -73,6 +73,7 @@ OneVPLSource::Priv::Priv(std::shared_ptr provider, const std::vec this->mfx_handle_configs.resize(cfg_params.size()); // Set handle config params + GAPI_LOG_INFO(nullptr, "Creating VPL config from input params"); auto cfg_param_it = cfg_params.begin(); for (mfxConfig& cfg_inst : mfx_handle_configs) { cfg_inst = MFXCreateConfig(mfx_handle); @@ -85,6 +86,7 @@ OneVPLSource::Priv::Priv(std::shared_ptr provider, const std::vec ++cfg_param_it; continue; } + GAPI_LOG_DEBUG(nullptr, "Apply major param: " << cfg_param_it->get_name()); mfxVariant mfx_param = cfg_param_to_mfx_variant(*cfg_param_it); mfxStatus sts = MFXSetConfigFilterProperty(cfg_inst, (mfxU8 *)cfg_param_it->get_name().c_str(), @@ -181,15 +183,6 @@ OneVPLSource::Priv::Priv(std::shared_ptr provider, const std::vec // initialize decoder try { - // Find codec ID from config - auto dec_it = std::find_if(cfg_params.begin(), cfg_params.end(), [] (const oneVPL_cfg_param& value) - { - return value.get_name() == "mfxImplDescription.mfxDecoderDescription.decoder.CodecID"; - }); - if (dec_it == cfg_params.end()) { - throw std::logic_error("Cannot determine DecoderID from oneVPL config. Abort"); - } - // create session driving engine if required if (!engine) { @@ -209,12 +202,28 @@ OneVPLSource::Priv::Priv(std::shared_ptr provider, const std::vec } } - //create decoder for session accoring to header recovered from source file - DecoderParams decoder_param = create_decoder_from_file(*dec_it, provider); - // create engine session for processing mfx session pipeline - engine->initialize_session(mfx_session, std::move(decoder_param), - provider); + auto engine_session_ptr = engine->initialize_session(mfx_session, cfg_params, + provider); + + const mfxVideoParam& video_param = engine_session_ptr->get_video_param(); + + // set valid description + description.size = cv::Size { + video_param.mfx.FrameInfo.Width, + video_param.mfx.FrameInfo.Height}; + switch(video_param.mfx.FrameInfo.FourCC) { + case MFX_FOURCC_I420: + throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame doesn't support I420 type"); + case MFX_FOURCC_NV12: + description.fmt = cv::MediaFormat::NV12; + break; + default: + throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame unknown 'fmt' type: " + + std::to_string(video_param.mfx.FrameInfo.FourCC)); + } + description_is_valid = true; + } catch(const std::exception& ex) { std::stringstream ss; ss << ex.what() << ". Unload VPL session: " << mfx_session; @@ -237,73 +246,12 @@ OneVPLSource::Priv::Priv(std::shared_ptr provider, const std::vec OneVPLSource::Priv::~Priv() { + engine.reset(); + GAPI_LOG_INFO(nullptr, "Unload MFX handle: " << mfx_handle); MFXUnload(mfx_handle); } -DecoderParams OneVPLSource::Priv::create_decoder_from_file(const oneVPL_cfg_param& decoder_cfg, - std::shared_ptr provider) -{ - GAPI_DbgAssert(provider && "Cannot create decoder, data provider is nullptr"); - - mfxBitstream bitstream{}; - const int BITSTREAM_BUFFER_SIZE = 2000000; - bitstream.MaxLength = BITSTREAM_BUFFER_SIZE; - bitstream.Data = (mfxU8 *)calloc(bitstream.MaxLength, sizeof(mfxU8)); - if(!bitstream.Data) { - throw std::runtime_error("Cannot allocate bitstream.Data bytes: " + - std::to_string(bitstream.MaxLength * sizeof(mfxU8))); - } - - mfxVariant decoder = cfg_param_to_mfx_variant(decoder_cfg); - // according to oneVPL documentation references - // https://spec.oneapi.io/versions/latest/elements/oneVPL/source/API_ref/VPL_disp_api_struct.html - // mfxVariant is an `union` type and considered different meaning for different param ids - // So CodecId has U32 data type - bitstream.CodecId = decoder.Data.U32; - - mfxStatus sts = ReadEncodedStream(bitstream, provider); - if(MFX_ERR_NONE != sts) { - throw std::runtime_error("Error reading bitstream, error: " + - mfxstatus_to_string(sts)); - } - - // Retrieve the frame information from input stream - mfxVideoParam mfxDecParams {}; - mfxDecParams.mfx.CodecId = decoder.Data.U32; - mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_SYSTEM_MEMORY;//MFX_IOPATTERN_OUT_VIDEO_MEMORY; - sts = MFXVideoDECODE_DecodeHeader(mfx_session, &bitstream, &mfxDecParams); - if(MFX_ERR_NONE != sts) { - throw std::runtime_error("Error decoding header, error: " + - mfxstatus_to_string(sts)); - } - - // Input parameters finished, now initialize decode - sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); - if (MFX_ERR_NONE != sts) { - throw std::runtime_error("Error initializing Decode, error: " + - mfxstatus_to_string(sts)); - } - - // set valid description - description.size = cv::Size { - mfxDecParams.mfx.FrameInfo.Width, - mfxDecParams.mfx.FrameInfo.Height}; - switch(mfxDecParams.mfx.FrameInfo.FourCC) { - case MFX_FOURCC_I420: - throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame doesn't support I420 type"); - case MFX_FOURCC_NV12: - description.fmt = cv::MediaFormat::NV12; - break; - default: - throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame unknown 'fmt' type: " + - std::to_string(mfxDecParams.mfx.FrameInfo.FourCC)); - } - description_is_valid = true; - - return {bitstream, mfxDecParams}; -} - std::unique_ptr OneVPLSource::Priv::initializeHWAccel() { std::unique_ptr ret; diff --git a/modules/gapi/src/streaming/onevpl/onevpl_utils.cpp b/modules/gapi/src/streaming/onevpl/onevpl_utils.cpp index 7ecc3f5abb84..4c08c9116e60 100644 --- a/modules/gapi/src/streaming/onevpl/onevpl_utils.cpp +++ b/modules/gapi/src/streaming/onevpl/onevpl_utils.cpp @@ -14,6 +14,12 @@ #include "streaming/onevpl/onevpl_utils.hpp" #include "logger.hpp" +#define ONEVPL_STRINGIFY_CASE(value) \ + case value: return #value; + +#define APPEND_STRINGIFY_MASK_N_ERASE(value, pref, mask) \ + if (value & mask) { ss << pref << #mask; value ^= mask; } + namespace cv { namespace gapi { namespace wip { @@ -398,6 +404,31 @@ std::string mfxstatus_to_string(mfxStatus err) { ret += std::to_string(err) + ">"; return ret; } + +std::string ext_mem_frame_type_to_cstr(int type) { + std::stringstream ss; + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_DXVA2_DECODER_TARGET); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_DXVA2_PROCESSOR_TARGET); + //APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET); + //APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_SYSTEM_MEMORY); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_RESERVED1); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_FROM_ENCODE); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_FROM_DECODE); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_FROM_VPPIN); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_FROM_VPPOUT); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_FROM_ENC); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_INTERNAL_FRAME); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_EXTERNAL_FRAME); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_EXPORT_FRAME); + //APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_SHARED_RESOURCE); + APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_VIDEO_MEMORY_ENCODER_TARGET); + + if (type != 0) { + ss << "(rest: " << std::to_string(type) << ")"; + } + return ss.str(); +} } // namespace wip } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/streaming/onevpl/onevpl_utils.hpp b/modules/gapi/src/streaming/onevpl/onevpl_utils.hpp index 68c06ca67432..92c70caa51b6 100644 --- a/modules/gapi/src/streaming/onevpl/onevpl_utils.hpp +++ b/modules/gapi/src/streaming/onevpl/onevpl_utils.hpp @@ -41,6 +41,8 @@ mfxU32 cstr_to_mfx_version(const char* cstr); std::string mfxstatus_to_string(mfxStatus err); std::ostream& operator<< (std::ostream& out, const mfxImplDescription& idesc); + +std::string ext_mem_frame_type_to_cstr(int type); } // namespace wip } // namespace gapi } // namespace cv diff --git a/modules/gapi/test/common/gapi_streaming_tests_common.hpp b/modules/gapi/test/common/gapi_streaming_tests_common.hpp new file mode 100644 index 000000000000..9c8968e78022 --- /dev/null +++ b/modules/gapi/test/common/gapi_streaming_tests_common.hpp @@ -0,0 +1,56 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_TESTS_COMMON_HPP +#define OPENCV_GAPI_STREAMING_TESTS_COMMON_HPP + +#include "gapi_tests_common.hpp" +#include + +#ifdef HAVE_ONEVPL + +#if (MFX_VERSION >= 2000) +#include +#endif + +#include +#endif // HAVE_ONEVPL + +namespace opencv_test { +namespace streaming { +namespace onevpl { + +struct StreamDataProvider : public cv::gapi::wip::IDataProvider { + + StreamDataProvider(std::istream& in) : data_stream (in) { + EXPECT_TRUE(in); + } + + size_t provide_data(size_t out_data_size, void* out_data_buf) override { + data_stream.read(reinterpret_cast(out_data_buf), out_data_size); + return data_stream.gcount(); + } + bool empty() const override { + return data_stream.eof() || data_stream.bad(); + } +private: + std::istream& data_stream; +}; + +static const unsigned char hevc_header[] = { + 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0C, 0x06, 0xFF, 0xFF, 0x01, 0x40, 0x00, + 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x78, 0x00, + 0x00, 0x04, 0x02, 0x10, 0x30, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, + 0x01, 0xE5, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x06, 0x01, 0x40, 0x00, 0x00, + 0x03, 0x00, 0x80, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x78, 0x00, 0x00, + 0xA0, 0x10, 0x20, 0x61, 0x63, 0x41, 0x00, 0x86, 0x49, 0x1B, 0x2B, 0x20, 0x00, + 0x00, 0x00, 0x01, 0x44, 0x01, 0xC0, 0x71, 0xC0, 0xD9, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x26, 0x01, 0xAF, 0x0C +}; +} // namespace onevpl +} // namespace streaming +} // namespace opencv_test +#endif // OPENCV_GAPI_STREAMING_TESTS_HPP diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp index 8749f47b1834..97b44efac5da 100644 --- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp @@ -7,7 +7,7 @@ #include "../test_precomp.hpp" -#include "../common/gapi_tests_common.hpp" +#include "../common/gapi_streaming_tests_common.hpp" #include // sleep_for (Delay) @@ -24,17 +24,7 @@ #include #include #include - -#include - -#ifdef HAVE_ONEVPL - -#if (MFX_VERSION >= 2000) -#include -#endif - -#include -#endif // HAVE_ONEVPL +#include namespace opencv_test { @@ -131,7 +121,7 @@ struct GAPI_Streaming: public ::testing::TestWithParam(out_data_buf), out_data_size); - return data_stream.gcount(); - } - bool empty() const override { - return data_stream.eof() || data_stream.bad(); - } -private: - std::istream& data_stream; -}; } // anonymous namespace TEST_P(GAPI_Streaming, SmokeTest_ConstInput_GMat) @@ -2242,16 +2215,7 @@ TEST(GAPI_Streaming, TestPythonAPI) } #ifdef HAVE_ONEVPL -const unsigned char hevc_header[] = { - 0x00, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0C, 0x06, 0xFF, 0xFF, 0x01, 0x40, 0x00, - 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x78, 0x00, - 0x00, 0x04, 0x02, 0x10, 0x30, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, 0x00, 0x03, - 0x01, 0xE5, 0x00, 0x00, 0x00, 0x01, 0x42, 0x01, 0x06, 0x01, 0x40, 0x00, 0x00, - 0x03, 0x00, 0x80, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x78, 0x00, 0x00, - 0xA0, 0x10, 0x20, 0x61, 0x63, 0x41, 0x00, 0x86, 0x49, 0x1B, 0x2B, 0x20, 0x00, - 0x00, 0x00, 0x01, 0x44, 0x01, 0xC0, 0x71, 0xC0, 0xD9, 0x20, 0x00, 0x00, 0x00, - 0x01, 0x26, 0x01, 0xAF, 0x0C -}; + TEST(OneVPL_Source, Init) { using cfg_param = cv::gapi::wip::oneVPL_cfg_param; @@ -2264,9 +2228,10 @@ TEST(OneVPL_Source, Init) src_params.push_back(cfg_param::create("mfxImplDescription.mfxDecoderDescription.decoder.CodecID", MFX_CODEC_HEVC)); std::stringstream stream(std::ios_base::in | std::ios_base::out | std::ios_base::binary); - EXPECT_TRUE(stream.write(reinterpret_cast(const_cast(hevc_header)), - sizeof(hevc_header))); - std::shared_ptr stream_data_provider = std::make_shared(stream); + EXPECT_TRUE(stream.write(reinterpret_cast(const_cast(streaming::onevpl::hevc_header)), + sizeof(streaming::onevpl::hevc_header))); + std::shared_ptr stream_data_provider = + std::make_shared(stream); cv::Ptr cap; bool cap_created = false; diff --git a/modules/gapi/test/streaming/gapi_streaming_utils_test.cpp b/modules/gapi/test/streaming/gapi_streaming_utils_test.cpp new file mode 100644 index 000000000000..69c8891f6246 --- /dev/null +++ b/modules/gapi/test/streaming/gapi_streaming_utils_test.cpp @@ -0,0 +1,351 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + + +#include "../test_precomp.hpp" + +#include "../common/gapi_streaming_tests_common.hpp" + +#include +#include + +#define private public +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" +#undef private + +#include "streaming/onevpl/accelerators/utils/elastic_barrier.hpp" + +namespace opencv_test +{ +namespace +{ +using cv::gapi::wip::SharedLock; + +struct TestBarrier : public cv::gapi::wip::elastic_barrier { + void on_first_in_impl(size_t visitor_id) { + + static std::atomic thread_counter{}; + thread_counter++; + EXPECT_EQ(thread_counter.load(), 1); + + visitors_in.insert(visitor_id); + last_visitor_id = visitor_id; + + thread_counter--; + EXPECT_EQ(thread_counter.load(), 0); + } + + void on_last_out_impl(size_t visitor_id) { + + static std::atomic thread_counter{}; + thread_counter++; + EXPECT_EQ(thread_counter.load(), 1); + + visitors_out.insert(visitor_id); + last_visitor_id = visitor_id; + + thread_counter--; + EXPECT_EQ(thread_counter.load(), 0); + } + + size_t last_visitor_id = 0; + std::set visitors_in; + std::set visitors_out; +}; + +TEST(SharedLock, Create) { + SharedLock lock; + EXPECT_EQ(lock.counter.load(), 0); +} + +TEST(SharedLock, Read_SingleThread) +{ + SharedLock lock; + + const size_t single_thread_read_count = 100; + for(size_t i = 0; i < single_thread_read_count; i++) { + lock.shared_lock(); + EXPECT_FALSE(lock.owns()); + } + EXPECT_EQ(lock.counter.load(), single_thread_read_count); + + for(size_t i = 0; i < single_thread_read_count; i++) { + lock.unlock_shared(); + EXPECT_FALSE(lock.owns()); + } + + EXPECT_EQ(lock.counter.load(), 0); +} + +TEST(SharedLock, TryLock_SingleThread) +{ + SharedLock lock; + + EXPECT_TRUE(lock.try_lock()); + EXPECT_TRUE(lock.owns()); + + lock.unlock(); + EXPECT_FALSE(lock.owns()); + EXPECT_EQ(lock.counter.load(), 0); +} + +TEST(SharedLock, Write_SingleThread) +{ + SharedLock lock; + + lock.lock(); + EXPECT_TRUE(lock.owns()); + + lock.unlock(); + EXPECT_FALSE(lock.owns()); + EXPECT_EQ(lock.counter.load(), 0); +} + +TEST(SharedLock, TryLockTryLock_SingleThread) +{ + SharedLock lock; + + lock.try_lock(); + EXPECT_FALSE(lock.try_lock()); + lock.unlock(); + + EXPECT_FALSE(lock.owns()); +} + +TEST(SharedLock, ReadTryLock_SingleThread) +{ + SharedLock lock; + + lock.shared_lock(); + EXPECT_FALSE(lock.owns()); + EXPECT_FALSE(lock.try_lock()); + lock.unlock_shared(); + + EXPECT_TRUE(lock.try_lock()); + EXPECT_TRUE(lock.owns()); + lock.unlock(); +} + +TEST(SharedLock, WriteTryLock_SingleThread) +{ + SharedLock lock; + + lock.lock(); + EXPECT_TRUE(lock.owns()); + EXPECT_FALSE(lock.try_lock()); + lock.unlock(); + + EXPECT_TRUE(lock.try_lock()); + EXPECT_TRUE(lock.owns()); + lock.unlock(); +} + + +TEST(SharedLock, Write_MultiThread) +{ + SharedLock lock; + + std::promise barrier; + std::shared_future sync = barrier.get_future(); + + const size_t work_count = 3; + const size_t inc_count = 10000000; + size_t shared_value = 0; + auto work = [&lock, &shared_value](size_t count) { + for (size_t i = 0; i < count; i ++) { + lock.lock(); + shared_value ++; + lock.unlock(); + } + }; + + std::thread worker_thread([&barrier, sync, work, inc_count] () { + + std::thread sub_worker([&barrier, work, inc_count] () { + barrier.set_value(); + work(inc_count); + }); + + sync.wait(); + work(inc_count); + sub_worker.join(); + }); + sync.wait(); + + work(inc_count); + worker_thread.join(); + + EXPECT_EQ(shared_value, inc_count * 3); +} + +TEST(SharedLock, ReadWrite_MultiThread) +{ + SharedLock lock; + + std::promise barrier; + std::future sync = barrier.get_future(); + + const size_t inc_count = 10000000; + size_t shared_value = 0; + auto write_work = [&lock, &shared_value](size_t count) { + for (size_t i = 0; i < count; i ++) { + lock.lock(); + shared_value ++; + lock.unlock(); + } + }; + + auto read_work = [&lock, &shared_value](size_t count) { + + auto old_shared_value = shared_value; + for (size_t i = 0; i < count; i ++) { + lock.shared_lock(); + EXPECT_TRUE(shared_value >= old_shared_value); + old_shared_value = shared_value; + lock.unlock_shared(); + } + }; + + std::thread writer_thread([&barrier, write_work, inc_count] () { + barrier.set_value(); + write_work(inc_count); + }); + sync.wait(); + + read_work(inc_count); + writer_thread.join(); + + EXPECT_EQ(shared_value, inc_count); +} + + +TEST(ElasticBarrier, single_thread_visit) +{ + TestBarrier barrier; + + const size_t max_visit_count = 10000; + size_t visit_id = 0; + for (visit_id = 0; visit_id < max_visit_count; visit_id++) { + barrier.visit_in(visit_id); + EXPECT_EQ(barrier.visitors_in.size(), 1); + } + EXPECT_EQ(barrier.last_visitor_id, 0); + EXPECT_EQ(barrier.visitors_out.size(), 0); + + for (visit_id = 0; visit_id < max_visit_count; visit_id++) { + barrier.visit_out(visit_id); + EXPECT_EQ(barrier.visitors_in.size(), 1); + } + EXPECT_EQ(barrier.last_visitor_id, visit_id - 1); + EXPECT_EQ(barrier.visitors_out.size(), 1); +} + + +TEST(ElasticBarrier, multi_thread_visit) +{ + TestBarrier tested_barrier; + + const size_t max_visit_count = 10000000; + std::atomic visit_in_wait_counter{}; + std::promise start_sync_barrier; + std::shared_future start_sync = start_sync_barrier.get_future(); + std::promise phase_sync_barrier; + std::shared_future phase_sync = phase_sync_barrier.get_future(); + + auto visit_worker_job = [&tested_barrier, + max_visit_count, + &visit_in_wait_counter, + start_sync, + phase_sync] (size_t worker_id) { + + start_sync.wait(); + + // first phase + const size_t begin_range = worker_id * max_visit_count; + const size_t end_range = (worker_id + 1) * max_visit_count; + for (size_t visit_id = begin_range; visit_id < end_range; visit_id++) { + tested_barrier.visit_in(visit_id); + } + + // notify all worker first phase ready + visit_in_wait_counter.fetch_add(1); + + // wait main second phase + phase_sync.wait(); + + // second phase + for (size_t visit_id = begin_range; visit_id < end_range; visit_id++) { + tested_barrier.visit_out(visit_id); + } + }; + + auto visit_main_job = [&tested_barrier, + max_visit_count, + &visit_in_wait_counter, + &phase_sync_barrier] (size_t total_workers_count, + size_t worker_id) { + + const size_t begin_range = worker_id * max_visit_count; + const size_t end_range = (worker_id + 1) * max_visit_count; + for (size_t visit_id = begin_range; visit_id < end_range; visit_id++) { + tested_barrier.visit_in(visit_id); + } + + // wait all workers first phase done + visit_in_wait_counter.fetch_add(1); + while (visit_in_wait_counter.load() != total_workers_count) { + std::this_thread::yield(); + }; + + // TEST invariant: last_visitor_id MUST be one from any FIRST worker visitor_id + bool one_of_available_ids_matched = false; + for (size_t id = 0; id < total_workers_count; id ++) { + size_t expected_last_visitor_for_id = id * max_visit_count; + one_of_available_ids_matched |= + (tested_barrier.last_visitor_id == expected_last_visitor_for_id) ; + } + EXPECT_TRUE(one_of_available_ids_matched); + + // unblock all workers to work out second phase + phase_sync_barrier.set_value(); + + // continue second phase + for (size_t visit_id = begin_range; visit_id < end_range; visit_id++) { + tested_barrier.visit_out(visit_id); + } + }; + + size_t max_worker_count = std::thread::hardware_concurrency(); + if (max_worker_count < 2) { + max_worker_count = 2; // logical 2 threads required at least + } + std::vector workers; + workers.reserve(max_worker_count); + for (size_t worker_id = 1; worker_id < max_worker_count; worker_id++) { + workers.emplace_back(visit_worker_job, worker_id); + } + + // let's go for first phase + start_sync_barrier.set_value(); + + // utilize main thread as well + visit_main_job(max_worker_count, 0); + + // join all threads second phase + for (auto& w : workers) { + w.join(); + } + + // TEST invariant: last_visitor_id MUST be one from any LATTER worker visitor_id + bool one_of_available_ids_matched = false; + for (size_t id = 0; id < max_worker_count; id ++) { + one_of_available_ids_matched |= + (tested_barrier.last_visitor_id == ((id + 1) * max_visit_count - 1)) ; + } + EXPECT_TRUE(one_of_available_ids_matched); +} +} +} // opencv_test diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp index 707d57ba75bf..d425bd526bae 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp @@ -7,7 +7,7 @@ #include "../test_precomp.hpp" -#include "../common/gapi_tests_common.hpp" +#include "../common/gapi_streaming_tests_common.hpp" #include #include @@ -27,12 +27,13 @@ #include #include -#include - #ifdef HAVE_ONEVPL #include "streaming/onevpl/accelerators/surface/surface.hpp" #include "streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp" #include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" +#include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" +#include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp" +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" #include #include "streaming/onevpl/engine/processing_engine_base.hpp" #include "streaming/onevpl/engine/engine_session.hpp" @@ -56,6 +57,11 @@ struct TestProcessingSession : public cv::gapi::wip::EngineSession { TestProcessingSession(mfxSession mfx_session) : EngineSession(mfx_session, {}) { } + + const mfxVideoParam& get_video_param() const override { + static mfxVideoParam empty; + return empty; + } }; struct TestProcessingEngine: public cv::gapi::wip::ProcessingEngineBase { @@ -94,14 +100,67 @@ struct TestProcessingEngine: public cv::gapi::wip::ProcessingEngineBase { ); } - void initialize_session(mfxSession mfx_session, - cv::gapi::wip::DecoderParams&&, - std::shared_ptr) override { + std::shared_ptr + initialize_session(mfxSession mfx_session, + const std::vector&, + std::shared_ptr) override { + + return register_session(mfx_session); + } +}; - register_session(mfx_session); +template +class TestLockableAllocator { +public : + using self_t = TestLockableAllocator; + mfxFrameAllocator get() { + return m_allocator; } +private: + TestLockableAllocator(mfxFrameAllocator allocator) : + m_allocator(allocator) { + } + + static mfxStatus MFX_CDECL lock_cb(mfxHDL, mfxMemId mid, mfxFrameData *ptr) { + auto it = lock_processor_table.find(mid); + EXPECT_TRUE(it != lock_processor_table.end()); + return it->second(mid, ptr); + } + static mfxStatus MFX_CDECL unlock_cb(mfxHDL, mfxMemId mid, mfxFrameData *ptr) { + auto it = unlock_processor_table.find(mid); + EXPECT_TRUE(it != unlock_processor_table.end()); + return it->second(mid, ptr); + } + + template + friend TestLockableAllocator create_test_allocator(mfxMemId, L, U); + + static std::map lock_processor_table; + static std::map unlock_processor_table; + + mfxFrameAllocator m_allocator; }; +template +std::map TestLockableAllocator::lock_processor_table {}; + +template +std::map TestLockableAllocator::unlock_processor_table {}; + +template +TestLockableAllocator +create_test_allocator(mfxMemId mid, LockProcessor lock_p, UnlockProcessor unlock_p) { + mfxFrameAllocator allocator {}; + + TestLockableAllocator::lock_processor_table[mid] = lock_p; + allocator.Lock = &TestLockableAllocator::lock_cb; + + TestLockableAllocator::unlock_processor_table[mid] = unlock_p; + allocator.Unlock = &TestLockableAllocator::unlock_cb; + + return TestLockableAllocator {allocator}; +} + TEST(OneVPL_Source_Surface, InitSurface) { using namespace cv::gapi::wip; @@ -419,7 +478,7 @@ TEST(OneVPL_Source_ProcessingEngine, Init) TestProcessingEngine engine(std::move(accel)); mfxSession mfx_session{}; - engine.initialize_session(mfx_session, DecoderParams{}, std::shared_ptr{}); + engine.initialize_session(mfx_session, {}, std::shared_ptr{}); EXPECT_EQ(engine.get_ready_frames_count(), 0); ProcessingEngineBase::ExecutionStatus ret = engine.process(mfx_session); @@ -442,6 +501,174 @@ TEST(OneVPL_Source_ProcessingEngine, Init) Data frame; engine.get_frame(frame); } + +TEST(OneVPL_Source_DX11_Accel, Init) +{ + using namespace cv::gapi::wip; + VPLDX11AccelerationPolicy accel; + + mfxLoader mfx_handle = MFXLoad(); + + mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + EXPECT_TRUE(cfg_inst_0); + mfxVariant mfx_param_0; + mfx_param_0.Type = MFX_VARIANT_TYPE_U32; + mfx_param_0.Data.U32 = MFX_IMPL_TYPE_HARDWARE; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)"mfxImplDescription.Impl", + mfx_param_0), MFX_ERR_NONE); + + mfxConfig cfg_inst_1 = MFXCreateConfig(mfx_handle); + EXPECT_TRUE(cfg_inst_1); + mfxVariant mfx_param_1; + mfx_param_1.Type = MFX_VARIANT_TYPE_U32; + mfx_param_1.Data.U32 = MFX_ACCEL_MODE_VIA_D3D11; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)"mfxImplDescription.AccelerationMode", + mfx_param_1), MFX_ERR_NONE); + + mfxConfig cfg_inst_2 = MFXCreateConfig(mfx_handle); + EXPECT_TRUE(cfg_inst_2); + mfxVariant mfx_param_2; + mfx_param_2.Type = MFX_VARIANT_TYPE_U32; + mfx_param_2.Data.U32 = MFX_CODEC_HEVC; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)"mfxImplDescription.mfxDecoderDescription.decoder.CodecID", + mfx_param_2), MFX_ERR_NONE); + + // create session + mfxSession mfx_session{}; + mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // assign acceleration + EXPECT_NO_THROW(accel.init(mfx_session)); + + // create proper bitstream + mfxBitstream bitstream{}; + const int BITSTREAM_BUFFER_SIZE = 2000000; + bitstream.MaxLength = BITSTREAM_BUFFER_SIZE; + bitstream.Data = (mfxU8 *)calloc(bitstream.MaxLength, sizeof(mfxU8)); + EXPECT_TRUE(bitstream.Data); + + // simulate read stream + bitstream.DataOffset = 0; + bitstream.DataLength = sizeof(streaming::onevpl::hevc_header) * sizeof(streaming::onevpl::hevc_header[0]); + memcpy(bitstream.Data, streaming::onevpl::hevc_header, bitstream.DataLength); + bitstream.CodecId = MFX_CODEC_HEVC; + + // prepare dec params + mfxVideoParam mfxDecParams {}; + mfxDecParams.mfx.CodecId = bitstream.CodecId; + mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY; + sts = MFXVideoDECODE_DecodeHeader(mfx_session, &bitstream, &mfxDecParams); + EXPECT_EQ(MFX_ERR_NONE, sts); + + mfxFrameAllocRequest request{}; + memset(&request, 0, sizeof(request)); + sts = MFXVideoDECODE_QueryIOSurf(mfx_session, &mfxDecParams, &request); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // Allocate surfaces for decoder + VPLAccelerationPolicy::pool_key_t key = accel.create_surface_pool(request, + mfxDecParams); + auto cand_surface = accel.get_free_surface(key).lock(); + + sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); + EXPECT_EQ(MFX_ERR_NONE, sts); + + MFXVideoDECODE_Close(mfx_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + EXPECT_NO_THROW(accel.deinit(mfx_session)); + MFXClose(mfx_session); + MFXUnload(mfx_handle); +} + +TEST(OneVPL_Source_DX11_FrameLockable, LockUnlock_without_Adaptee) +{ + using namespace cv::gapi::wip; + mfxMemId mid = 0; + int lock_counter = 0; + int unlock_counter = 0; + + std::function lock = + [&lock_counter] (mfxMemId, mfxFrameData *) { + lock_counter ++; + return MFX_ERR_NONE; + }; + std::function unlock = + [&unlock_counter] (mfxMemId, mfxFrameData *) { + unlock_counter++; + return MFX_ERR_NONE; + }; + + auto test_allocator = create_test_allocator(mid, lock, unlock); + LockAdapter adapter(test_allocator.get()); + + mfxFrameData data; + const int exec_count = 123; + for (int i = 0; i < exec_count; i ++) { + EXPECT_EQ(adapter.read_lock(mid, data), 0); + adapter.write_lock(mid, data); + EXPECT_EQ(adapter.unlock_read(mid, data), 0); + adapter.unlock_write(mid, data); + } + + EXPECT_EQ(lock_counter, exec_count * 2); + EXPECT_EQ(unlock_counter, exec_count * 2); +} + +TEST(OneVPL_Source_DX11_FrameLockable, LockUnlock_with_Adaptee) +{ + using namespace cv::gapi::wip; + mfxMemId mid = 0; + int r_lock_counter = 0; + int r_unlock_counter = 0; + int w_lock_counter = 0; + int w_unlock_counter = 0; + + SharedLock adaptee; + std::function lock = + [&r_lock_counter, &w_lock_counter, &adaptee] (mfxMemId, mfxFrameData *) { + if (adaptee.owns()) { + w_lock_counter ++; + } else { + r_lock_counter ++; + } + return MFX_ERR_NONE; + }; + std::function unlock = + [&r_unlock_counter, &w_unlock_counter, &adaptee] (mfxMemId, mfxFrameData *) { + if (adaptee.owns()) { + w_unlock_counter ++; + } else { + r_unlock_counter ++; + } + return MFX_ERR_NONE; + }; + + auto test_allocator = create_test_allocator(mid, lock, unlock); + LockAdapter adapter(test_allocator.get()); + + adapter.set_adaptee(&adaptee); + + mfxFrameData data; + const int exec_count = 123; + for (int i = 0; i < exec_count; i ++) { + EXPECT_EQ(adapter.read_lock(mid, data), 0); + EXPECT_FALSE(adaptee.try_lock()); + + EXPECT_EQ(adapter.unlock_read(mid, data), 1); + EXPECT_TRUE(adaptee.try_lock()); + adaptee.unlock(); + + adapter.write_lock(mid, data); + adapter.unlock_write(mid, data); + } + + EXPECT_EQ(r_lock_counter, exec_count); + EXPECT_EQ(r_unlock_counter, exec_count); + EXPECT_EQ(w_lock_counter, exec_count); + EXPECT_EQ(w_unlock_counter, exec_count); +} } } // namespace opencv_test #endif // HAVE_ONEVPL