Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update parallelRDP and MoltenVK #1634

Merged
merged 3 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ares/n64/vulkan/parallel-rdp/COMMIT
Original file line number Diff line number Diff line change
@@ -1 +1 @@
43bc31642cc70d04adb828a285e68cdbde7110a9
fe5becd13638873db90d46e7ba7d48255971f82a
10 changes: 5 additions & 5 deletions ares/n64/vulkan/parallel-rdp/parallel-rdp/rdp_renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ static int normalize_dzpix(int dz)
else if (dz == 0)
return 1;

unsigned bit = 31 - leading_zeroes(dz);
unsigned bit = 31 - Util::leading_zeroes(dz);
return 1 << (bit + 1);
}

Expand Down Expand Up @@ -1680,7 +1680,7 @@ void Renderer::submit_span_setup_jobs(Vulkan::CommandBuffer &cmd, bool upscale)
cmd.set_buffer_view(1, 0, *instance.gpu.span_info_jobs_view);
cmd.set_specialization_constant_mask(3);
cmd.set_specialization_constant(0, (upscale ? caps.upscaling : 1) * ImplementationConstants::DefaultWorkgroupSize);
cmd.set_specialization_constant(1, upscale ? trailing_zeroes(caps.upscaling) : 0u);
cmd.set_specialization_constant(1, upscale ? Util::trailing_zeroes(caps.upscaling) : 0u);

Vulkan::QueryPoolHandle begin_ts, end_ts;
if (caps.timestamp >= 2)
Expand Down Expand Up @@ -1780,7 +1780,7 @@ void Renderer::submit_rasterization(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &
if (caps.timestamp >= 2)
start_ts = cmd.write_timestamp(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);

uint32_t scale_log2_bit = (upscaling ? trailing_zeroes(caps.upscaling) : 0u) << RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET;
uint32_t scale_log2_bit = (upscaling ? Util::trailing_zeroes(caps.upscaling) : 0u) << RASTERIZATION_UPSCALING_LOG2_BIT_OFFSET;

for (size_t i = 0; i < stream.static_raster_state_cache.size(); i++)
{
Expand Down Expand Up @@ -1891,7 +1891,7 @@ void Renderer::submit_tile_binning_combined(Vulkan::CommandBuffer &cmd, bool ups
if (supports_subgroup_size_control(32, subgroup_size))
{
cmd.enable_subgroup_size_control(true);
cmd.set_subgroup_size_log2(true, 5, trailing_zeroes(subgroup_size));
cmd.set_subgroup_size_log2(true, 5, Util::trailing_zeroes(subgroup_size));
}
}
else
Expand Down Expand Up @@ -2092,7 +2092,7 @@ void Renderer::submit_depth_blend(Vulkan::CommandBuffer &cmd, Vulkan::Buffer &tm
cmd.set_specialization_constant(5, Limits::MaxPrimitives);
cmd.set_specialization_constant(6, upscaled ? caps.max_width : Limits::MaxWidth);
cmd.set_specialization_constant(7, uint32_t(force_write_mask || (!is_host_coherent && !upscaled)) |
((upscaled ? trailing_zeroes(caps.upscaling) : 0u) << 1u));
((upscaled ? Util::trailing_zeroes(caps.upscaling) : 0u) << 1u));

if (upscaled)
cmd.set_storage_buffer(0, 0, *upscaling_multisampled_rdram);
Expand Down
102,697 changes: 48,826 additions & 53,871 deletions ares/n64/vulkan/parallel-rdp/parallel-rdp/shaders/slangmosh.hpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ Vulkan::ImageHandle VideoInterface::vram_fetch_stage(const Registers &regs, unsi
async_cmd->set_specialization_constant_mask(7);
async_cmd->set_specialization_constant(0, uint32_t(rdram_size));
async_cmd->set_specialization_constant(1, regs.status & (VI_CONTROL_TYPE_MASK | VI_CONTROL_META_AA_BIT));
async_cmd->set_specialization_constant(2, trailing_zeroes(scaling_factor));
async_cmd->set_specialization_constant(2, Util::trailing_zeroes(scaling_factor));

async_cmd->push_constants(&push, 0, sizeof(push));
async_cmd->dispatch((extract_width + 15) / 16,
Expand Down
4 changes: 4 additions & 0 deletions ares/n64/vulkan/parallel-rdp/util/aligned_alloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,18 @@ struct AlignedAllocation
static void *operator new(size_t size)
{
void *ret = ::Util::memalign_alloc(alignof(T), size);
#ifdef __EXCEPTIONS
if (!ret) throw std::bad_alloc();
#endif
return ret;
}

static void *operator new[](size_t size)
{
void *ret = ::Util::memalign_alloc(alignof(T), size);
#ifdef __EXCEPTIONS
if (!ret) throw std::bad_alloc();
#endif
return ret;
}

Expand Down
37 changes: 23 additions & 14 deletions ares/n64/vulkan/parallel-rdp/util/bitops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,22 @@
namespace Util
{
#ifdef __GNUC__
#define leading_zeroes(x) ((x) == 0 ? 32 : __builtin_clz(x))
#define trailing_zeroes(x) ((x) == 0 ? 32 : __builtin_ctz(x))
#define trailing_ones(x) __builtin_ctz(~uint32_t(x))
#define leading_zeroes64(x) ((x) == 0 ? 64 : __builtin_clzll(x))
#define trailing_zeroes64(x) ((x) == 0 ? 64 : __builtin_ctzll(x))
#define trailing_ones64(x) __builtin_ctzll(~uint64_t(x))
#define popcount32(x) __builtin_popcount(x)
#define leading_zeroes_(x) ((x) == 0 ? 32 : __builtin_clz(x))
#define trailing_zeroes_(x) ((x) == 0 ? 32 : __builtin_ctz(x))
#define trailing_ones_(x) __builtin_ctz(~uint32_t(x))
#define leading_zeroes64_(x) ((x) == 0 ? 64 : __builtin_clzll(x))
#define trailing_zeroes64_(x) ((x) == 0 ? 64 : __builtin_ctzll(x))
#define trailing_ones64_(x) __builtin_ctzll(~uint64_t(x))
#define popcount32_(x) __builtin_popcount(x)

static inline uint32_t leading_zeroes(uint32_t x) { return leading_zeroes_(x); }
static inline uint32_t trailing_zeroes(uint32_t x) { return trailing_zeroes_(x); }
static inline uint32_t trailing_ones(uint32_t x) { return trailing_ones_(x); }
static inline uint32_t leading_zeroes64(uint64_t x) { return leading_zeroes64_(x); }
static inline uint32_t trailing_zeroes64(uint64_t x) { return trailing_zeroes64_(x); }
static inline uint32_t trailing_ones64(uint64_t x) { return trailing_ones64_(x); }
static inline uint32_t popcount32(uint32_t x) { return popcount32_(x); }

#elif defined(_MSC_VER)
namespace Internal
{
Expand Down Expand Up @@ -81,13 +90,13 @@ static inline uint32_t ctz64(uint64_t x)
}
}

#define popcount32(x) ::Util::Internal::popcount32(x)
#define leading_zeroes(x) ::Util::Internal::clz(x)
#define trailing_zeroes(x) ::Util::Internal::ctz(x)
#define trailing_ones(x) ::Util::Internal::ctz(~uint32_t(x))
#define leading_zeroes64(x) ::Util::Internal::clz64(x)
#define trailing_zeroes64(x) ::Util::Internal::ctz64(x)
#define trailing_ones64(x) ::Util::Internal::ctz64(~uint64_t(x))
static inline uint32_t leading_zeroes(uint32_t x) { return Internal::clz(x); }
static inline uint32_t trailing_zeroes(uint32_t x) { return Internal::ctz(x); }
static inline uint32_t trailing_ones(uint32_t x) { return Internal::ctz(~x); }
static inline uint32_t leading_zeroes64(uint64_t x) { return Internal::clz64(x); }
static inline uint32_t trailing_zeroes64(uint64_t x) { return Internal::ctz64(x); }
static inline uint32_t trailing_ones64(uint64_t x) { return Internal::ctz64(~x); }
static inline uint32_t popcount32(uint32_t x) { return Internal::popcount32(x); }
#else
#error "Implement me."
#endif
Expand Down
4 changes: 2 additions & 2 deletions ares/n64/vulkan/parallel-rdp/util/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ int64_t get_current_time_nsecs()
return int64_t(double(li.QuadPart) * static_qpc_freq.inv_freq);
#else
struct timespec ts = {};
#ifdef ANDROID
#if defined(ANDROID) || defined(__FreeBSD__)
constexpr auto timebase = CLOCK_MONOTONIC;
#else
constexpr auto timebase = CLOCK_MONOTONIC_RAW;
Expand All @@ -128,4 +128,4 @@ double Timer::end()
auto nt = get_current_time_nsecs();
return double(nt - t) * 1e-9;
}
}
}
Loading
Loading