-
-
Notifications
You must be signed in to change notification settings - Fork 740
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
renderer_vulkan: Introduce shader HLE system with copy shader impleme…
…ntation. (#1683) * renderer_vulkan: Introduce shader HLE system with copy shader implementation. Co-authored-by: TheTurtle <[email protected]> * buffer_cache: Handle obtaining buffer views partially within buffers. * vk_shader_hle: Make more efficient --------- Co-authored-by: TheTurtle <[email protected]>
- Loading branch information
1 parent
41fd1c8
commit e5e1aba
Showing
9 changed files
with
195 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project | ||
// SPDX-License-Identifier: GPL-2.0-or-later | ||
|
||
#include "shader_recompiler/info.h" | ||
#include "video_core/renderer_vulkan/vk_scheduler.h" | ||
#include "video_core/renderer_vulkan/vk_shader_hle.h" | ||
|
||
#include "vk_rasterizer.h" | ||
|
||
namespace Vulkan { | ||
|
||
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f; | ||
|
||
bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, | ||
Rasterizer& rasterizer) { | ||
auto& scheduler = rasterizer.GetScheduler(); | ||
auto& buffer_cache = rasterizer.GetBufferCache(); | ||
|
||
// Copy shader defines three formatted buffers as inputs: control, source, and destination. | ||
const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info); | ||
const auto src_buf_sharp = info.texture_buffers[1].GetSharp(info); | ||
const auto dst_buf_sharp = info.texture_buffers[2].GetSharp(info); | ||
const auto buf_stride = src_buf_sharp.GetStride(); | ||
ASSERT(buf_stride == dst_buf_sharp.GetStride()); | ||
|
||
struct CopyShaderControl { | ||
u32 dst_idx; | ||
u32 src_idx; | ||
u32 end; | ||
}; | ||
static_assert(sizeof(CopyShaderControl) == 12); | ||
ASSERT(ctl_buf_sharp.GetStride() == sizeof(CopyShaderControl)); | ||
const auto ctl_buf = reinterpret_cast<const CopyShaderControl*>(ctl_buf_sharp.base_address); | ||
|
||
static std::vector<vk::BufferCopy> copies; | ||
copies.clear(); | ||
copies.reserve(regs.cs_program.dim_x); | ||
|
||
for (u32 i = 0; i < regs.cs_program.dim_x; i++) { | ||
const auto& [dst_idx, src_idx, end] = ctl_buf[i]; | ||
const u32 local_dst_offset = dst_idx * buf_stride; | ||
const u32 local_src_offset = src_idx * buf_stride; | ||
const u32 local_size = (end + 1) * buf_stride; | ||
copies.emplace_back(local_src_offset, local_dst_offset, local_size); | ||
} | ||
|
||
scheduler.EndRendering(); | ||
|
||
static constexpr vk::MemoryBarrier READ_BARRIER{ | ||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite, | ||
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, | ||
}; | ||
static constexpr vk::MemoryBarrier WRITE_BARRIER{ | ||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, | ||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, | ||
}; | ||
scheduler.CommandBuffer().pipelineBarrier( | ||
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, | ||
vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {}); | ||
|
||
static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB; | ||
u32 batch_start = 0; | ||
u32 batch_end = 1; | ||
|
||
while (batch_end < copies.size()) { | ||
// Place first copy into the current batch | ||
const auto& copy = copies[batch_start]; | ||
auto src_offset_min = copy.srcOffset; | ||
auto src_offset_max = copy.srcOffset + copy.size; | ||
auto dst_offset_min = copy.dstOffset; | ||
auto dst_offset_max = copy.dstOffset + copy.size; | ||
|
||
for (int i = batch_start + 1; i < copies.size(); i++) { | ||
// Compute new src and dst bounds if we were to batch this copy | ||
const auto [src_offset, dst_offset, size] = copies[i]; | ||
auto new_src_offset_min = std::min(src_offset_min, src_offset); | ||
auto new_src_offset_max = std::max(src_offset_max, src_offset + size); | ||
if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) { | ||
continue; | ||
} | ||
|
||
auto new_dst_offset_min = std::min(dst_offset_min, dst_offset); | ||
auto new_dst_offset_max = std::max(dst_offset_max, dst_offset + size); | ||
if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) { | ||
continue; | ||
} | ||
|
||
// We can batch this copy | ||
src_offset_min = new_src_offset_min; | ||
src_offset_max = new_src_offset_max; | ||
dst_offset_min = new_dst_offset_min; | ||
dst_offset_max = new_dst_offset_max; | ||
if (i != batch_end) { | ||
std::swap(copies[i], copies[batch_end]); | ||
} | ||
++batch_end; | ||
} | ||
|
||
// Obtain buffers for the total source and destination ranges. | ||
const auto [src_buf, src_buf_offset] = | ||
buffer_cache.ObtainBuffer(src_buf_sharp.base_address + src_offset_min, | ||
src_offset_max - src_offset_min, false, false); | ||
const auto [dst_buf, dst_buf_offset] = | ||
buffer_cache.ObtainBuffer(dst_buf_sharp.base_address + dst_offset_min, | ||
dst_offset_max - dst_offset_min, true, false); | ||
|
||
// Apply found buffer base. | ||
const auto vk_copies = std::span{copies}.subspan(batch_start, batch_end - batch_start); | ||
for (auto& copy : vk_copies) { | ||
copy.srcOffset = copy.srcOffset - src_offset_min + src_buf_offset; | ||
copy.dstOffset = copy.dstOffset - dst_offset_min + dst_buf_offset; | ||
} | ||
|
||
// Execute buffer copies. | ||
LOG_TRACE(Render_Vulkan, "HLE buffer copy: src_size = {}, dst_size = {}", | ||
src_offset_max - src_offset_min, dst_offset_max - dst_offset_min); | ||
scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), vk_copies); | ||
batch_start = batch_end; | ||
++batch_end; | ||
} | ||
|
||
scheduler.CommandBuffer().pipelineBarrier( | ||
vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, | ||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); | ||
|
||
return true; | ||
} | ||
|
||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, | ||
Rasterizer& rasterizer) { | ||
switch (info.pgm_hash) { | ||
case COPY_SHADER_HASH: | ||
return ExecuteCopyShaderHLE(info, regs, rasterizer); | ||
default: | ||
return false; | ||
} | ||
} | ||
|
||
} // namespace Vulkan |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project | ||
// SPDX-License-Identifier: GPL-2.0-or-later | ||
|
||
#pragma once | ||
|
||
#include "video_core/amdgpu/liverpool.h" | ||
|
||
namespace Shader { | ||
struct Info; | ||
} | ||
|
||
namespace Vulkan { | ||
|
||
class Rasterizer; | ||
|
||
/// Attempts to execute a shader using HLE if possible. | ||
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs, | ||
Rasterizer& rasterizer); | ||
|
||
} // namespace Vulkan |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters