From 09ffa360f09cbf878372be6dbbafcc0711210613 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sun, 31 Dec 2023 17:46:24 +0100 Subject: [PATCH 1/3] nvtx: Make nvtx easier to package, and add more scope. --- 3rdparty/CMakeLists.txt | 10 ++++- src/CMakeLists.txt | 2 +- src/advvis.cpp | 2 + src/atmos.cpp | 3 ++ src/bucket3d.cpp | 2 + src/display.cpp | 1 + src/display3d.cpp | 98 ++++++++++++++++++++++++++--------------- src/effects.cpp | 2 + src/profiling.cpp | 3 ++ src/shadowcascades.cpp | 2 + src/terrain.cpp | 3 ++ 11 files changed, 90 insertions(+), 38 deletions(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 6d7dd8bb10d..0f2c6498c1c 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -206,6 +206,14 @@ if (WZ_ENABLE_BASIS_UNIVERSAL) endif(WZ_ENABLE_BASIS_UNIVERSAL) if (WZ_PROFILING_NVTX) - find_package(CUDAToolkit REQUIRED VERSION 5.0) + include(FetchContent) + FetchContent_Declare( + nvtx + GIT_REPOSITORY https://github.com/NVIDIA/NVTX.git + GIT_TAG a1ceb0677f67371ed29a2b1c022794f077db5fe7 + ) + + FetchContent_MakeAvailable(nvtx) set(PROFILING_NVTX_INCLUDE ${CUDAToolkit_INCLUDE_DIRS} PARENT_SCOPE) + endif () diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4fe629bc320..4c65f854e83 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -125,7 +125,7 @@ target_link_libraries(warzone2100 SQLite::SQLite3) target_link_libraries(warzone2100 SQLiteCpp) if (WZ_PROFILING_NVTX) - target_include_directories(warzone2100 PRIVATE ${PROFILING_NVTX_INCLUDE}) + target_link_libraries(warzone2100 nvtx3-cpp) endif() set(_curl_gnutls_thread_safe_fix FALSE) diff --git a/src/advvis.cpp b/src/advvis.cpp index 8ec84d6b917..57568427ec0 100644 --- a/src/advvis.cpp +++ b/src/advvis.cpp @@ -26,6 +26,7 @@ #include "lib/framework/frame.h" #include "advvis.h" +#include "profiling.h" #include "map.h" // ------------------------------------------------------------------------------------ @@ -53,6 +54,7 @@ inline float getTileIllumination(const MAPTILE *psTile) // ------------------------------------------------------------------------------------ void avUpdateTiles() { + WZ_PROFILE_SCOPE(avUpdateTiles); const int len = mapHeight * mapWidth; const int playermask = 1 << selectedPlayer; UDWORD i = 0; diff --git a/src/atmos.cpp b/src/atmos.cpp index 710ebeea35d..5cc13140c60 100644 --- a/src/atmos.cpp +++ b/src/atmos.cpp @@ -33,6 +33,7 @@ #include "loop.h" #include "map.h" #include "miscimd.h" +#include "profiling.h" #include "lib/gamelib/gtime.h" #include @@ -247,6 +248,7 @@ static void atmosAddParticle(const Vector3f &pos, AP_TYPE type) /* Move the particles */ void atmosUpdateSystem() { + WZ_PROFILE_SCOPE(atmosUpdateSystem); UDWORD i; UDWORD numberToAdd; Vector3f pos; @@ -324,6 +326,7 @@ static inline void renderParticleInternal(ATPART *psPart, const glm::mat4 &viewM void atmosDrawParticles(const glm::mat4 &viewMatrix, const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(atmosDrawParticles); UDWORD i; if (weather == WT_NONE) diff --git a/src/bucket3d.cpp b/src/bucket3d.cpp index ea891dcaa20..986b355fa4b 100644 --- a/src/bucket3d.cpp +++ b/src/bucket3d.cpp @@ -34,6 +34,7 @@ #include "display3d.h" #include "effects.h" #include "miscimd.h" +#include "profiling.h" #include @@ -384,6 +385,7 @@ void bucketAddTypeToList(RENDER_TYPE objectType, void *pObject, const glm::mat4 /* render Objects in list */ void bucketRenderCurrentList(const glm::mat4 &viewMatrix, const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(bucketRenderCurrentList); std::sort(bucketArray.begin(), bucketArray.end()); for (auto thisTag = bucketArray.cbegin(); thisTag != bucketArray.cend(); ++thisTag) diff --git a/src/display.cpp b/src/display.cpp index 8cad6cc0b02..8c0fbfbd48f 100644 --- a/src/display.cpp +++ b/src/display.cpp @@ -31,6 +31,7 @@ #include "lib/ivis_opengl/pietypes.h" #include "lib/framework/fixedpoint.h" #include "lib/framework/wzapp.h" +#include "profiling.h" #include "action.h" #include "display.h" diff --git a/src/display3d.cpp b/src/display3d.cpp index 544fed589a8..29e89e5bee7 100644 --- a/src/display3d.cpp +++ b/src/display3d.cpp @@ -82,6 +82,7 @@ #include "advvis.h" #include "cmddroid.h" #include "terrain.h" +#include "profiling.h" #include "warzoneconfig.h" #include "multistat.h" #include "animation.h" @@ -967,6 +968,7 @@ static void setupConnectionStatusForm() /// Render the 3D world void draw3DScene() { + WZ_PROFILE_SCOPE(draw3DScene); wzPerfBegin(PERF_START_FRAME, "Start 3D scene"); /* What frame number are we on? */ @@ -1288,6 +1290,7 @@ glm::mat4 getBiasedShadowMapMVPMatrix(glm::mat4 lightOrthoMatrix, const glm::mat /// Draw the terrain and all droids, missiles and other objects on it static void drawTiles(iView *player) { + WZ_PROFILE_SCOPE(drawTiles); // draw terrain auto currShadowMode = pie_getShadowMode(); @@ -1354,52 +1357,58 @@ static void drawTiles(iView *player) // update the fog of war... FIXME: Remove this const glm::mat4 tileCalcPerspectiveViewMatrix = perspectiveMatrix * baseViewMatrix; auto currTerrainShaderType = getTerrainShaderType(); - for (int i = -visibleTiles.y / 2, idx = 0; i <= visibleTiles.y / 2; i++, ++idx) { - /* Go through the x's */ - for (int j = -visibleTiles.x / 2, jdx = 0; j <= visibleTiles.x / 2; j++, ++jdx) + WZ_PROFILE_SCOPE(init_lightmap); + for (int i = -visibleTiles.y / 2, idx = 0; i <= visibleTiles.y / 2; i++, ++idx) { - Vector2i screen(0, 0); - Position pos; + /* Go through the x's */ + for (int j = -visibleTiles.x / 2, jdx = 0; j <= visibleTiles.x / 2; j++, ++jdx) + { + Vector2i screen(0, 0); + Position pos; - pos.x = world_coord(j); - pos.z = -world_coord(i); - pos.y = 0; + pos.x = world_coord(j); + pos.z = -world_coord(i); + pos.y = 0; - if (tileOnMap(playerXTile + j, playerZTile + i)) - { - MAPTILE *psTile = mapTile(playerXTile + j, playerZTile + i); + if (tileOnMap(playerXTile + j, playerZTile + i)) + { + MAPTILE* psTile = mapTile(playerXTile + j, playerZTile + i); - pos.y = map_TileHeight(playerXTile + j, playerZTile + i); - setTileColour(playerXTile + j, playerZTile + i, pal_SetBrightness((currTerrainShaderType == TerrainShaderType::SINGLE_PASS) ? 0 : static_cast(psTile->level))); + pos.y = map_TileHeight(playerXTile + j, playerZTile + i); + setTileColour(playerXTile + j, playerZTile + i, pal_SetBrightness((currTerrainShaderType == TerrainShaderType::SINGLE_PASS) ? 0 : static_cast(psTile->level))); + } + tileScreenInfo[idx][jdx].z = pie_RotateProjectWithPerspective(&pos, tileCalcPerspectiveViewMatrix, &screen); + tileScreenInfo[idx][jdx].x = screen.x; + tileScreenInfo[idx][jdx].y = screen.y; } - tileScreenInfo[idx][jdx].z = pie_RotateProjectWithPerspective(&pos, tileCalcPerspectiveViewMatrix, &screen); - tileScreenInfo[idx][jdx].x = screen.x; - tileScreenInfo[idx][jdx].y = screen.y; } } // Determine whether each tile in the drawable range is actually visible on-screen // (used for more accurate clipping elsewhere) - for (int idx = 0; idx < visibleTiles.y; ++idx) { - for (int jdx = 0; jdx < visibleTiles.x; ++jdx) + WZ_PROFILE_SCOPE(tile_Culling); + for (int idx = 0; idx < visibleTiles.y; ++idx) { - QUAD quad; + for (int jdx = 0; jdx < visibleTiles.x; ++jdx) + { + QUAD quad; - quad.coords[0].x = tileScreenInfo[idx + 0][jdx + 0].x; - quad.coords[0].y = tileScreenInfo[idx + 0][jdx + 0].y; + quad.coords[0].x = tileScreenInfo[idx + 0][jdx + 0].x; + quad.coords[0].y = tileScreenInfo[idx + 0][jdx + 0].y; - quad.coords[1].x = tileScreenInfo[idx + 0][jdx + 1].x; - quad.coords[1].y = tileScreenInfo[idx + 0][jdx + 1].y; + quad.coords[1].x = tileScreenInfo[idx + 0][jdx + 1].x; + quad.coords[1].y = tileScreenInfo[idx + 0][jdx + 1].y; - quad.coords[2].x = tileScreenInfo[idx + 1][jdx + 1].x; - quad.coords[2].y = tileScreenInfo[idx + 1][jdx + 1].y; + quad.coords[2].x = tileScreenInfo[idx + 1][jdx + 1].x; + quad.coords[2].y = tileScreenInfo[idx + 1][jdx + 1].y; - quad.coords[3].x = tileScreenInfo[idx + 1][jdx + 0].x; - quad.coords[3].y = tileScreenInfo[idx + 1][jdx + 0].y; + quad.coords[3].x = tileScreenInfo[idx + 1][jdx + 0].x; + quad.coords[3].y = tileScreenInfo[idx + 1][jdx + 0].y; - tileScreenVisible[idx][jdx] = quadIntersectsWithScreen(quad); + tileScreenVisible[idx][jdx] = quadIntersectsWithScreen(quad); + } } } @@ -1461,6 +1470,7 @@ static void drawTiles(iView *player) if (currShadowMode == ShadowMode::Shadow_Mapping) { + WZ_PROFILE_SCOPE(ShadowMapping); for (size_t i = 0; i < numShadowCascades; ++i) { gfx_api::context::get().beginDepthPass(i); @@ -1493,7 +1503,10 @@ static void drawTiles(iView *player) wzPerfEnd(PERF_WATER); wzPerfBegin(PERF_MODELS, "3D scene - models"); - pie_DrawAllMeshes(currentGameFrame, perspectiveMatrix, viewMatrix, shadowCascadesInfo, false); + { + WZ_PROFILE_SCOPE(pie_DrawAllMeshes); + pie_DrawAllMeshes(currentGameFrame, perspectiveMatrix, viewMatrix, shadowCascadesInfo, false); + } wzPerfEnd(PERF_MODELS); if (!gamePaused()) @@ -1502,15 +1515,21 @@ static void drawTiles(iView *player) } locateMouse(); - gfx_api::context::get().endSceneRenderPass(); + { + WZ_PROFILE_SCOPE(endSceneRenderPass); + gfx_api::context::get().endSceneRenderPass(); + } // Draw the scene to the default framebuffer - gfx_api::WorldToScreenPSO::get().bind(); - gfx_api::WorldToScreenPSO::get().bind_constants({1.0f}); - gfx_api::WorldToScreenPSO::get().bind_vertex_buffers(pScreenTriangleVBO); - gfx_api::WorldToScreenPSO::get().bind_textures(gfx_api::context::get().getSceneTexture()); - gfx_api::WorldToScreenPSO::get().draw(3, 0); - gfx_api::WorldToScreenPSO::get().unbind_vertex_buffers(pScreenTriangleVBO); + { + WZ_PROFILE_SCOPE(copyToFBO); + gfx_api::WorldToScreenPSO::get().bind(); + gfx_api::WorldToScreenPSO::get().bind_constants({1.0f}); + gfx_api::WorldToScreenPSO::get().bind_vertex_buffers(pScreenTriangleVBO); + gfx_api::WorldToScreenPSO::get().bind_textures(gfx_api::context::get().getSceneTexture()); + gfx_api::WorldToScreenPSO::get().draw(3, 0); + gfx_api::WorldToScreenPSO::get().unbind_vertex_buffers(pScreenTriangleVBO); + } } /// Initialise the fog, skybox and some other stuff @@ -1774,6 +1793,7 @@ static void calcFlagPosScreenCoords(SDWORD *pX, SDWORD *pY, SDWORD *pR, const gl /// Decide whether to render a projectile, and make sure it will be drawn static void display3DProjectiles(const glm::mat4 &viewMatrix, const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(display3DProjectiles); PROJECTILE *psObj = proj_GetFirst(); while (psObj != nullptr) { @@ -1946,6 +1966,7 @@ void renderProjectile(PROJECTILE *psCurr, const glm::mat4 &viewMatrix, const glm /// Draw the buildings static void displayStaticObjects(const glm::mat4 &viewMatrix, const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(displayStaticObjects); // to solve the flickering edges of baseplates // pie_SetDepthOffset(-1.0f); @@ -2213,6 +2234,7 @@ void displayBlueprints(const glm::mat4 &viewMatrix, const glm::mat4 &perspective /// Draw Factory Delivery Points static void displayDelivPoints(const glm::mat4& viewMatrix, const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(displayDelivPoints); if (selectedPlayer >= MAX_PLAYERS) { return; /* no-op */ } for (const auto& psDelivPoint : apsFlagPosLists[selectedPlayer]) { @@ -2226,6 +2248,7 @@ static void displayDelivPoints(const glm::mat4& viewMatrix, const glm::mat4 &per /// Draw the features static void displayFeatures(const glm::mat4 &viewMatrix, const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(displayFeatures); // player can only be 0 for the features. /* Go through all the features */ @@ -2293,6 +2316,7 @@ static void displayProximityMsgs(const glm::mat4& viewMatrix, const glm::mat4 &p /// Draw the droids static void displayDynamicObjects(const glm::mat4 &viewMatrix, const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(displayDynamicObjects); /* Need to go through all the droid lists */ for (unsigned player = 0; player < MAX_PLAYERS; ++player) { @@ -3821,6 +3845,7 @@ static void locateMouse() /// Render the sky and surroundings static void renderSurroundings(const glm::mat4& projectionMatrix, const glm::mat4 &skyboxViewMatrix) { + WZ_PROFILE_SCOPE(renderSurroundings); // Render skybox relative to ground (i.e. undo player y translation) // then move it somewhat below ground level for the blending effect // rotate it @@ -4297,6 +4322,7 @@ static void drawDroidSensorLock(DROID *psDroid) /// Draw the construction lines for all construction droids static void doConstructionLines(const glm::mat4 &viewMatrix) { + WZ_PROFILE_SCOPE(doConstructionLines); for (unsigned i = 0; i < MAX_PLAYERS; i++) { for (DROID *psDroid : apsDroidLists[i]) diff --git a/src/effects.cpp b/src/effects.cpp index c2d15f4347e..43a5c768304 100644 --- a/src/effects.cpp +++ b/src/effects.cpp @@ -69,6 +69,7 @@ #include "multiplay.h" #include "component.h" +#include "profiling.h" #ifndef GLM_ENABLE_EXPERIMENTAL #define GLM_ENABLE_EXPERIMENTAL @@ -475,6 +476,7 @@ void addEffect(const Vector3i *pos, EFFECT_GROUP group, EFFECT_TYPE type, bool s /* Calls all the update functions for each different currently active effect */ void processEffects(const glm::mat4 &perspectiveViewMatrix) { + WZ_PROFILE_SCOPE(processEffects); for (auto it = activeList.begin(); it != activeList.end(); ) { EFFECT *psEffect = *it; diff --git a/src/profiling.cpp b/src/profiling.cpp index c119f0dc024..fd476ba47e3 100644 --- a/src/profiling.cpp +++ b/src/profiling.cpp @@ -25,7 +25,10 @@ #include #ifdef WZ_PROFILING_NVTX +#pragma warning( push ) +#pragma warning( disable : 4191 ) #include +#pragma warning( pop ) #endif #ifdef WZ_PROFILING_VTUNE diff --git a/src/shadowcascades.cpp b/src/shadowcascades.cpp index 87d5ffed774..1bbda03ba31 100644 --- a/src/shadowcascades.cpp +++ b/src/shadowcascades.cpp @@ -46,6 +46,7 @@ #include "shadowcascades.h" #include "display3d.h" +#include "profiling.h" #include "lib/framework/fixedpoint.h" #include "lib/ivis_opengl/piematrix.h" @@ -59,6 +60,7 @@ float cascadeSplitLambda = 0.3f; void calculateShadowCascades(const iView *player, float terrainDistance, const glm::mat4& baseViewMatrix, const glm::vec3& lightInvDir, size_t SHADOW_MAP_CASCADE_COUNT, std::vector& output) { + WZ_PROFILE_SCOPE(calculateShadowCascades); output.clear(); output.resize(SHADOW_MAP_CASCADE_COUNT); diff --git a/src/terrain.cpp b/src/terrain.cpp index d3efa5bebe7..524673ddcf2 100644 --- a/src/terrain.cpp +++ b/src/terrain.cpp @@ -59,6 +59,7 @@ #include "hci.h" #include "loop.h" #include "wzcrashhandlingproviders.h" +#include "profiling.h" #include @@ -1975,6 +1976,7 @@ static void drawTerrainCombined(const glm::mat4 &ModelViewProjection, const glm: void perFrameTerrainUpdates() { + WZ_PROFILE_SCOPE(perFrameTerrainUpdates); /////////////////////////////////// // set up the lightmap texture @@ -2014,6 +2016,7 @@ void drawTerrainDepthOnly(const glm::mat4 &mvp) */ void drawTerrain(const glm::mat4 &mvp, const glm::mat4& viewMatrix, const Vector3f &cameraPos, const Vector3f &sunPos, const ShadowCascadesInfo& shadowCascades) { + WZ_PROFILE_SCOPE(drawTerrain); const glm::vec4& paramsXLight = lightmapValues.paramsXLight; const glm::vec4& paramsYLight = lightmapValues.paramsYLight; const glm::mat4& lightMatrix = lightmapValues.lightMatrix; From d5e4c06583aaf668f3fb842133d59cee0cdbf6d8 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Sun, 31 Dec 2023 17:46:35 +0100 Subject: [PATCH 2/3] freebsd: fix build --- lib/framework/macros.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/framework/macros.h b/lib/framework/macros.h index 18f8a8da7f5..08945d68824 100644 --- a/lib/framework/macros.h +++ b/lib/framework/macros.h @@ -23,8 +23,13 @@ #ifndef MACROS_H #define MACROS_H +#ifndef MIN #define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX #define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif #define ABSDIF(a,b) ((a)>(b) ? (a)-(b) : (b)-(a)) From e8ca34b2cfd57038745be6b37b5aad8e66754fbc Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 1 Jan 2024 00:20:12 +0100 Subject: [PATCH 3/3] nvtx: Implement review suggestion. --- src/profiling.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/profiling.cpp b/src/profiling.cpp index fd476ba47e3..12ebb22bfa8 100644 --- a/src/profiling.cpp +++ b/src/profiling.cpp @@ -25,10 +25,14 @@ #include #ifdef WZ_PROFILING_NVTX -#pragma warning( push ) -#pragma warning( disable : 4191 ) +#if defined( _MSC_VER ) +# pragma warning( push ) +# pragma warning( disable : 4191 ) +#endif #include -#pragma warning( pop ) +#if defined( _MSC_VER ) +# pragma warning( pop ) +#endif #endif #ifdef WZ_PROFILING_VTUNE