From 2e7bd6e9ae308065ef63e16d780388640afff9aa Mon Sep 17 00:00:00 2001 From: past-due <30942300+past-due@users.noreply.github.com> Date: Fri, 26 Mar 2021 17:30:24 -0400 Subject: [PATCH 1/2] Add meshoptimizer submodule --- .gitmodules | 3 +++ 3rdparty/CMakeLists.txt | 7 +++++++ 3rdparty/meshoptimizer | 1 + COPYING.NONGPL | 2 ++ lib/ivis_opengl/CMakeLists.txt | 2 +- pkg/copyright | 4 ++++ 6 files changed, 18 insertions(+), 1 deletion(-) create mode 160000 3rdparty/meshoptimizer diff --git a/.gitmodules b/.gitmodules index e465cb1e56a..8e7da022d39 100644 --- a/.gitmodules +++ b/.gitmodules @@ -65,3 +65,6 @@ [submodule "data/terrain_overrides/high"] path = data/terrain_overrides/high url = https://github.com/Warzone2100/data-terrain-high.git +[submodule "3rdparty/meshoptimizer"] + path = 3rdparty/meshoptimizer + url = https://github.com/zeux/meshoptimizer.git diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 2abaf2280d9..90b0f2cd240 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -194,3 +194,10 @@ if (WZ_PROFILING_NVTX) find_package(CUDAToolkit REQUIRED VERSION 5.0) set(PROFILING_NVTX_INCLUDE ${CUDAToolkit_INCLUDE_DIRS} PARENT_SCOPE) endif () + + +# meshoptimizer +add_subdirectory(meshoptimizer EXCLUDE_FROM_ALL) +set_property(TARGET meshoptimizer PROPERTY FOLDER "3rdparty") +set_property(TARGET meshoptimizer PROPERTY XCODE_ATTRIBUTE_CLANG_WARN_COMMA NO) # -Wcomma +set_property(TARGET meshoptimizer PROPERTY XCODE_ATTRIBUTE_WARNING_CFLAGS "-Wno-cast-align") diff --git a/3rdparty/meshoptimizer b/3rdparty/meshoptimizer new file mode 160000 index 00000000000..c21d3be6ddf --- /dev/null +++ b/3rdparty/meshoptimizer @@ -0,0 +1 @@ +Subproject commit c21d3be6ddf627f8ca852ba4b6db9903b0557858 diff --git a/COPYING.NONGPL b/COPYING.NONGPL index 1b1eaaac8ae..dd2290bcf96 100644 --- a/COPYING.NONGPL +++ b/COPYING.NONGPL @@ -17,6 +17,8 @@ data/base/texpages/page-25-sky-urban.png - MIT, Copyright (c) 2013-2018 Niels Lohmann 3rdparty/LRUCache11/* - ISC License, Copyright (c) 2012-22 SAURAV MOHAPATRA (https://github.com/mohaps/lrucache11) +3rdparty/meshoptimizer/* + - MIT, Copyright (c) 2016-2023 Arseny Kapoulkine (https://github.com/zeux/meshoptimizer/) 3rdparty/micro-ecc/* - BSD, Copyright (c) 2014, Kenneth MacKay (https://github.com/kmackay/micro-ecc) 3rdparty/sha/* diff --git a/lib/ivis_opengl/CMakeLists.txt b/lib/ivis_opengl/CMakeLists.txt index aa88418dc60..9d74cbcac9b 100644 --- a/lib/ivis_opengl/CMakeLists.txt +++ b/lib/ivis_opengl/CMakeLists.txt @@ -68,7 +68,7 @@ include(WZTargetConfiguration) WZ_TARGET_CONFIGURATION(ivis-opengl) target_include_directories(ivis-opengl PRIVATE ${HARFBUZZ_INCLUDE_DIRS} ${FREETYPE_INCLUDE_DIR_ft2build}) -target_link_libraries(ivis-opengl PRIVATE framework launchinfo PNG::PNG ${HARFBUZZ_LIBRARIES} ${FREETYPE_LIBRARIES}) +target_link_libraries(ivis-opengl PRIVATE framework launchinfo PNG::PNG ${HARFBUZZ_LIBRARIES} ${FREETYPE_LIBRARIES} meshoptimizer) target_link_libraries(ivis-opengl PUBLIC glad) target_link_libraries(ivis-opengl PUBLIC optional-lite) if (WZ_DEBUG_GFX_API_LEAKS) diff --git a/pkg/copyright b/pkg/copyright index c105f7f2cce..c96208bf727 100644 --- a/pkg/copyright +++ b/pkg/copyright @@ -155,6 +155,10 @@ Files: 3rdparty/LRUCache11/* Copyright: 2012-2022 SAURAV MOHAPATRA License: ISC +Files: 3rdparty/meshoptimizer/* +Copyright: 2016-2023 Arseny Kapoulkine +License: Expat + Files: 3rdparty/mINI/* Copyright: 2018 Danijel Durakovic License: Expat From af0742c5b53fc786f882e57afd215d0b05ac610e Mon Sep 17 00:00:00 2001 From: past-due <30942300+past-due@users.noreply.github.com> Date: Fri, 26 Mar 2021 17:35:38 -0400 Subject: [PATCH 2/2] imdload: Perform mesh cache + fetch optimization --- lib/ivis_opengl/imdload.cpp | 121 +++++++++++++++++++++++++++++++++--- lib/ivis_opengl/ivisdef.h | 3 +- lib/ivis_opengl/piedraw.cpp | 22 +++---- 3 files changed, 127 insertions(+), 19 deletions(-) diff --git a/lib/ivis_opengl/imdload.cpp b/lib/ivis_opengl/imdload.cpp index bf85af6b97a..27b6a2bd52e 100644 --- a/lib/ivis_opengl/imdload.cpp +++ b/lib/ivis_opengl/imdload.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "lib/framework/frame.h" #include "lib/framework/string_ext.h" @@ -45,6 +46,8 @@ #include using Vector4f = glm::vec4; +#include + // Scale animation numbers from int to float #define INT_SCALE 1000 @@ -1254,6 +1257,99 @@ void finishTangentsGeneration() } } +// NOTE: Only intended to be called from _imd_load_level after all buffers are ready +static inline void _imd_load_level_optimize(iIMDShape &s) +{ + std::vector streams = { + {&vertices[0], sizeof(gfx_api::gfxFloat) * 3, sizeof(gfx_api::gfxFloat) * 3}, + {&normals[0], sizeof(gfx_api::gfxFloat) * 3, sizeof(gfx_api::gfxFloat) * 3}, + {&texcoords[0], sizeof(gfx_api::gfxFloat) * 4, sizeof(gfx_api::gfxFloat) * 4} + }; + if (!tangents.empty()) + { + streams.push_back({&tangents[0], sizeof(gfx_api::gfxFloat) * 4, sizeof(gfx_api::gfxFloat) * 4}); + } + + const size_t index_count = indices.size(); + + // convert to std::vector, as expected by meshopt + static std::vector indices_uint; // Static, to save allocations. + indices_uint.resize(0); + std::transform(indices.begin(), indices.end(), std::back_inserter(indices_uint), + [](uint16_t c) -> unsigned int { return static_cast(c); }); + + // Indexing + size_t initial_vertex_count = vertexCount; + std::vector remap(index_count); + size_t vertex_count = meshopt_generateVertexRemapMulti(&remap[0], &indices_uint[0], index_count, initial_vertex_count, &streams[0], streams.size()); + + if (vertex_count != initial_vertex_count) + { + debug(LOG_3D, "imd[_load_level_optimize] = Reduced vertices: %zu -> %zu", initial_vertex_count, vertex_count); + } + + { + std::vector indexArray; + indexArray.resize(index_count); + meshopt_remapIndexBuffer(&indexArray[0], &indices_uint[0], index_count, &remap[0]); + indices_uint.swap(indexArray); + } + + { + std::vector vertexArray; + vertexArray.resize(vertex_count * 3); + meshopt_remapVertexBuffer(&vertexArray[0], &vertices[0], initial_vertex_count, sizeof(gfx_api::gfxFloat) * 3, &remap[0]); + vertices.swap(vertexArray); + } + + { + std::vector normalArray; + normalArray.resize(vertex_count * 3); + meshopt_remapVertexBuffer(&normalArray[0], &normals[0], initial_vertex_count, sizeof(gfx_api::gfxFloat) * 3, &remap[0]); + normals.swap(normalArray); + } + + { + std::vector textureArray; + textureArray.resize(vertex_count * 4); + meshopt_remapVertexBuffer(&textureArray[0], &texcoords[0], initial_vertex_count, sizeof(gfx_api::gfxFloat) * 4, &remap[0]); + texcoords.swap(textureArray); + } + + if (!tangents.empty()) + { + std::vector tangentArray; + tangentArray.resize(vertex_count * 4); + meshopt_remapVertexBuffer(&tangentArray[0], &tangents[0], initial_vertex_count, sizeof(gfx_api::gfxFloat) * 4, &remap[0]); + tangents.swap(tangentArray); + } + + // Vertex cache optimization + meshopt_optimizeVertexCache(&indices_uint[0], &indices_uint[0], index_count, vertex_count); + + // Overdraw optimization + meshopt_optimizeOverdraw(&indices_uint[0], &indices_uint[0], index_count, &vertices[0], vertex_count, sizeof(gfx_api::gfxFloat) * 3, 1.05f); + + // Vertex fetch optimization + meshopt_optimizeVertexFetchRemap(&remap[0], &indices_uint[0], index_count, vertex_count); + meshopt_remapIndexBuffer(&indices_uint[0], &indices_uint[0], index_count, &remap[0]); + meshopt_remapVertexBuffer(&vertices[0], &vertices[0], vertex_count, sizeof(gfx_api::gfxFloat) * 3, &remap[0]); + meshopt_remapVertexBuffer(&normals[0], &normals[0], vertex_count, sizeof(gfx_api::gfxFloat) * 3, &remap[0]); + meshopt_remapVertexBuffer(&texcoords[0], &texcoords[0], vertex_count, sizeof(gfx_api::gfxFloat) * 4, &remap[0]); + if (!tangents.empty()) + { + meshopt_remapVertexBuffer(&tangents[0], &tangents[0], vertex_count, sizeof(gfx_api::gfxFloat) * 4, &remap[0]); + } + + // update s.vertexCount + s.vertexCount = vertex_count; + + // transform indices back + indices.clear(); + std::transform(indices_uint.begin(), indices_uint.end(), std::back_inserter(indices), + [](unsigned int c) -> uint16_t { return static_cast(c); }); +} + /*! * Load shape levels recursively * \param ppFileData Pointer to the data (usually read from a file) @@ -1543,6 +1639,8 @@ static std::unique_ptr _imd_load_level(const WzString &filename, cons indices.emplace_back(addVertex(s, 2, &p, npol, pie_level_normals)); } + ASSERT(indices.size() == s.polys.size() * 3, "???"); + s.indicesCount = indices.size(); s.vertexCount = vertexCount; // Tangents are optional, only if normals were loaded and passed sanity check above @@ -1554,15 +1652,17 @@ static std::unique_ptr _imd_load_level(const WzString &filename, cons for (size_t i = 0; i < indices.size(); i += 3) calculateTangentsForTriangle(indices[i], indices[i+1], indices[i+2]); finishTangentsGeneration(); - - if (!tangents.empty()) - { - if (!s.buffers[VBO_TANGENT]) - s.buffers[VBO_TANGENT] = gfx_api::context::get().create_buffer_object(gfx_api::buffer::usage::vertex_buffer, gfx_api::context::buffer_storage_hint::static_draw, "tangent buffer"); - s.buffers[VBO_TANGENT]->upload(tangents.size() * sizeof(gfx_api::gfxFloat), tangents.data()); - } + } + else + { + tangents.resize(0); + bitangents.resize(0); } + _imd_load_level_optimize(s); + s.vertexCount = vertexCount; + s.indicesCount = indices.size(); + if (!s.buffers[VBO_VERTEX]) s.buffers[VBO_VERTEX] = gfx_api::context::get().create_buffer_object(gfx_api::buffer::usage::vertex_buffer, gfx_api::context::buffer_storage_hint::static_draw, "vertex buffer"); if (vertices.empty()) @@ -1594,6 +1694,13 @@ static std::unique_ptr _imd_load_level(const WzString &filename, cons debug(LOG_ERROR, "_imd_load_level: file corrupt? - no texcoords?: %s (key: %s)", filename.toUtf8().c_str(), key.c_str()); } s.buffers[VBO_TEXCOORD]->upload(texcoords.size() * sizeof(gfx_api::gfxFloat), texcoords.data()); + + if (!tangents.empty()) + { + if (!s.buffers[VBO_TANGENT]) + s.buffers[VBO_TANGENT] = gfx_api::context::get().create_buffer_object(gfx_api::buffer::usage::vertex_buffer, gfx_api::context::buffer_storage_hint::static_draw, "tangent buffer"); + s.buffers[VBO_TANGENT]->upload(tangents.size() * sizeof(gfx_api::gfxFloat), tangents.data()); + } } indices.resize(0); diff --git a/lib/ivis_opengl/ivisdef.h b/lib/ivis_opengl/ivisdef.h index 5ffe4e50e5e..4640f402937 100644 --- a/lib/ivis_opengl/ivisdef.h +++ b/lib/ivis_opengl/ivisdef.h @@ -201,7 +201,8 @@ struct iIMDShape // The new rendering data gfx_api::buffer* buffers[VBO_COUNT] = { nullptr }; - uint16_t vertexCount = 0; + size_t vertexCount = 0; + size_t indicesCount = 0; // the number of polys * 3 // object animation (animating a level, rather than its texture) std::vector objanimdata; diff --git a/lib/ivis_opengl/piedraw.cpp b/lib/ivis_opengl/piedraw.cpp index 8238bf42a8c..edf20dd7a78 100644 --- a/lib/ivis_opengl/piedraw.cpp +++ b/lib/ivis_opengl/piedraw.cpp @@ -311,8 +311,8 @@ void pie_Draw3DButton(const iIMDShape *shape, PIELIGHT teamcolour, const glm::ma gfx_api::Draw3DShapeOpaque::get().bind_textures(&pie_Texture(textures.texpage), tcmask, normalmap, specularmap); gfx_api::Draw3DShapeOpaque::get().bind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD], pTangentBuffer); gfx_api::context::get().bind_index_buffer(*shape->buffers[VBO_INDEX], gfx_api::index_type::u16); - gfx_api::Draw3DShapeOpaque::get().draw_elements(shape->polys.size() * 3, 0); - polyCount += shape->polys.size(); + gfx_api::Draw3DShapeOpaque::get().draw_elements(shape->indicesCount, 0); + polyCount += shape->indicesCount / 3; gfx_api::Draw3DShapeOpaque::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD], pTangentBuffer); gfx_api::context::get().unbind_index_buffer(*shape->buffers[VBO_INDEX]); } @@ -430,7 +430,7 @@ static void draw3dShapeTemplated(const templatedState &lastState, ShaderOnce& gl AdditivePSO::get().bind_textures(&pie_Texture(textures.texpage), tcmask, normalmap, specularmap); } AdditivePSO::get().set_uniforms_at(2, instanceUniforms); - AdditivePSO::get().draw_elements(shape->polys.size() * 3, 0); + AdditivePSO::get().draw_elements(shape->indicesCount, 0); // AdditivePSO::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD]); } else if (pieFlag & pie_TRANSLUCENT) @@ -448,7 +448,7 @@ static void draw3dShapeTemplated(const templatedState &lastState, ShaderOnce& gl AlphaPSO::get().bind_textures(&pie_Texture(textures.texpage), tcmask, normalmap, specularmap); } AlphaPSO::get().set_uniforms_at(2, instanceUniforms); - AlphaPSO::get().draw_elements(shape->polys.size() * 3, 0); + AlphaPSO::get().draw_elements(shape->indicesCount, 0); // AlphaPSO::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD]); } else @@ -464,7 +464,7 @@ static void draw3dShapeTemplated(const templatedState &lastState, ShaderOnce& gl AlphaNoDepthWRTPSO::get().bind_textures(&pie_Texture(textures.texpage), tcmask, normalmap, specularmap); } AlphaNoDepthWRTPSO::get().set_uniforms_at(2, instanceUniforms); - AlphaNoDepthWRTPSO::get().draw_elements(shape->polys.size() * 3, 0); + AlphaNoDepthWRTPSO::get().draw_elements(shape->indicesCount, 0); // AlphaPSO::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD]); } } @@ -481,7 +481,7 @@ static void draw3dShapeTemplated(const templatedState &lastState, ShaderOnce& gl PremultipliedPSO::get().bind_textures(&pie_Texture(textures.texpage), tcmask, normalmap, specularmap); } PremultipliedPSO::get().set_uniforms_at(2, instanceUniforms); - PremultipliedPSO::get().draw_elements(shape->polys.size() * 3, 0); + PremultipliedPSO::get().draw_elements(shape->indicesCount, 0); // PremultipliedPSO::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD]); } else @@ -497,7 +497,7 @@ static void draw3dShapeTemplated(const templatedState &lastState, ShaderOnce& gl OpaquePSO::get().bind_textures(&pie_Texture(textures.texpage), tcmask, normalmap, specularmap); } OpaquePSO::get().set_uniforms_at(2, instanceUniforms); - OpaquePSO::get().draw_elements(shape->polys.size() * 3, 0); + OpaquePSO::get().draw_elements(shape->indicesCount, 0); // OpaquePSO::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD]); } } @@ -579,7 +579,7 @@ static templatedState pie_Draw3DShape2(const templatedState &lastState, ShaderOn draw3dShapeTemplated(lastState, globalsOnce, globalUniforms, colour, teamcolour, stretchDepth, ecmState, globalUniforms.ViewMatrix * modelMatrix, shape, pieFlag, frame); } - polyCount += shape->polys.size(); + polyCount += (shape->indicesCount / 3); return currentState; } @@ -1412,7 +1412,7 @@ static void drawInstanced3dShapeTemplated_Inner(ShaderOnce& globalsOnce, const g std::make_tuple(instanceDataBuffer, instanceBufferOffset) }); Draw3DInstancedPSO::get().bind_textures(&pie_Texture(textures.texpage), tcmask, normalmap, specularmap, gfx_api::context::get().getDepthTexture(), lightmapTexture); - Draw3DInstancedPSO::get().draw_elements_instanced(shape->polys.size() * 3, 0, instance_count); + Draw3DInstancedPSO::get().draw_elements_instanced(shape->indicesCount, 0, instance_count); // Draw3DInstancedPSO::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD]); } @@ -1439,7 +1439,7 @@ static void drawInstanced3dShapeDepthOnly(ShaderOnce& globalsOnce, const gfx_api std::make_tuple(instanceDataBuffer, instanceBufferOffset) }); // gfx_api::Draw3DShapeDepthOnly_Instanced::get().bind_textures(&pie_Texture(shape->texpage), tcmask, normalmap, specularmap); - gfx_api::Draw3DShapeDepthOnly_Instanced::get().draw_elements_instanced(shape->polys.size() * 3, 0, instance_count); + gfx_api::Draw3DShapeDepthOnly_Instanced::get().draw_elements_instanced(shape->indicesCount, 0, instance_count); // Draw3DInstancedPSO::get().unbind_vertex_buffers(shape->buffers[VBO_VERTEX], shape->buffers[VBO_NORMAL], shape->buffers[VBO_TEXCOORD]); } @@ -1534,7 +1534,7 @@ static void pie_Draw3DShape2_Instanced(ShaderOnce& globalsOnce, const gfx_api::D drawInstanced3dShapeTemplated(globalsOnce, globalUniforms, shape, pieFlag, instanceDataBuffer, instanceBufferOffset, instance_count, lightmapTexture); } - polyCount += shape->polys.size(); + polyCount += shape->indicesCount / 3; } void InstancedMeshRenderer::Draw3DShapes_Instanced(uint64_t currentGameFrame, ShaderOnce& globalsOnce, const gfx_api::Draw3DShapeInstancedGlobalUniforms& globalUniforms, int drawParts, bool depthPass)