diff --git a/CMakeLists.txt b/CMakeLists.txt index 69cbc039b0..cffa6c7e76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,6 +147,7 @@ endif(PLASMA_EXTERNAL_RELEASE) # Pipeline Renderers cmake_dependent_option(PLASMA_PIPELINE_DX "Enable DirectX rendering pipeline?" ON "DirectX_FOUND" OFF) cmake_dependent_option(PLASMA_PIPELINE_GL "Enable OpenGL rendering pipeline?" ON "TARGET epoxy::epoxy" OFF) +cmake_dependent_option(PLASMA_PIPELINE_METAL "Enable Metal rendering pipeline?" ON "APPLE" OFF) if(PLASMA_PIPELINE_DX) add_definitions(-DPLASMA_PIPELINE_DX) @@ -156,6 +157,10 @@ if(PLASMA_PIPELINE_GL) add_definitions(-DPLASMA_PIPELINE_GL) endif(PLASMA_PIPELINE_GL) +if(PLASMA_PIPELINE_METAL) + add_definitions(-DPLASMA_PIPELINE_METAL) +endif(PLASMA_PIPELINE_METAL) + # Allow us to disable certain parts of the build option(PLASMA_BUILD_CLIENT "Do we want to build plClient?" ON) cmake_dependent_option(PLASMA_BUILD_MAX_PLUGIN "Do we want to build the 3ds Max plugin?" OFF "TARGET 3dsm" OFF) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 72b9ace7a8..4da137eac5 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -110,7 +110,6 @@ elseif(APPLE) Mac-Cocoa/PLSServerStatus.h ) list(APPEND plClient_RESOURCES - Mac-Cocoa/Assets.xcassets Mac-Cocoa/banner.png Mac-Cocoa/banner@2x.png Mac-Cocoa/MainMenu.xib @@ -158,8 +157,17 @@ if(APPLE) XCODE_ATTRIBUTE_CODE_SIGN_ENTITLEMENTS "${CMAKE_CURRENT_SOURCE_DIR}/Mac-Cocoa/plClient.entitlements" XCODE_ATTRIBUTE_ENABLE_HARDENED_RUNTIME "YES" XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES" + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER org.Huru.UruExplorer ) target_compile_options(plClient PRIVATE -fobjc-arc) + target_sources(plClient PRIVATE Mac-Cocoa/Assets.xcassets) + set_source_files_properties(Mac-Cocoa/Assets.xcassets ${RESOURCES} PROPERTIES + MACOSX_PACKAGE_LOCATION Resources + ) + install( + TARGETS plClient + DESTINATION client + ) if(PLASMA_APPLE_DEVELOPMENT_TEAM_ID) set_target_properties(plClient PROPERTIES @@ -229,6 +237,8 @@ target_link_libraries( pfPython $<$:pfDXPipeline> $<$:pfGLPipeline> + $<$:pfMetalPipeline> + $<$:pfMetalPipelineShaders> CURL::libcurl "$<$:-framework Cocoa>" "$<$:-framework QuartzCore>" diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm index 2f75d32d7d..3e21430638 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm @@ -64,6 +64,12 @@ - (void)loadServerStatus { NSString* urlString = [NSString stringWithSTString:GetServerStatusUrl()]; NSURL* url = [NSURL URLWithString:urlString]; + + if (!url || !url.host) { + self.serverStatusString = @""; + return; + } + NSURLSessionConfiguration* URLSessionConfiguration = [NSURLSessionConfiguration ephemeralSessionConfiguration]; NSURLSession* session = [NSURLSession sessionWithConfiguration:URLSessionConfiguration diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm index ef2c973705..a2a89b74d7 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm @@ -276,12 +276,6 @@ - (void)resizeDrawable:(CGFloat)scaleFactor } #if PLASMA_PIPELINE_METAL - if (newSize.width == _metalLayer.drawableSize.width && - newSize.height == _metalLayer.drawableSize.height) - { - return; - } - _metalLayer.drawableSize = newSize; #endif [self.delegate renderView:self diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index a861f94403..0101728f20 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -42,6 +42,12 @@ // System Frameworks #import +#ifdef PLASMA_PIPELINE_GL +#import +#endif +#ifdef PLASMA_PIPELINE_METAL +#import +#endif #import // Cocoa client @@ -63,7 +69,13 @@ #include "plCmdParser.h" #include "pfConsoleCore/pfConsoleEngine.h" #include "pfGameGUIMgr/pfGameGUIMgr.h" +#ifdef PLASMA_PIPELINE_GL +#include "pfGLPipeline/plGLPipeline.h" +#endif #include "plInputCore/plInputDevice.h" +#ifdef PLASMA_PIPELINE_METAL +#include "pfMetalPipeline/plMetalPipeline.h" +#endif #include "plMessage/plDisplayScaleChangedMsg.h" #include "plMessageBox/hsMessageBox.h" #include "plNetClient/plNetClientMgr.h" @@ -162,6 +174,8 @@ @interface AppDelegate : NSWindowController device = ((CAMetalLayer *) self.window.contentView.layer).device; +#ifdef HS_DEBUGGING + [self.window setTitle:[NSString stringWithFormat:@"%@ - %@, %@", + productTitle, +#ifdef __arm64__ + @"ARM64", +#else + @"x86_64", +#endif + device.name]]; +#else [self.window setTitle:productTitle]; +#endif + +#else + [NSString stringWithSTString:plProduct::LongName()]; +#endif } - (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication*)sender @@ -517,6 +549,23 @@ - (NSApplicationPresentationOptions)window:(NSWindow*)window NSApplicationPresentationAutoHideMenuBar; } +- (void)observeValueForKeyPath:(NSString *)keyPath ofObject:(id)object change:(NSDictionary *)change context:(void *)context +{ + if (context == DeviceDidChangeContext) { + // this may not happen on the main queue + dispatch_async(dispatch_get_main_queue(), ^{ + [self updateWindowTitle]; + }); + } else { + [super observeValueForKeyPath:keyPath ofObject:object change:change context:context]; + } +} + +- (void)dealloc +{ + [_renderLayer removeObserver:self forKeyPath:@"device" context:DeviceDidChangeContext]; +} + @end void PumpMessageQueueProc() diff --git a/Sources/Plasma/Apps/plClient/plClient.cpp b/Sources/Plasma/Apps/plClient/plClient.cpp index 9f6b741dd9..2ef6ae8a38 100644 --- a/Sources/Plasma/Apps/plClient/plClient.cpp +++ b/Sources/Plasma/Apps/plClient/plClient.cpp @@ -148,6 +148,9 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifdef PLASMA_PIPELINE_GL #include "pfGLPipeline/plGLPipeline.h" #endif +#ifdef PLASMA_PIPELINE_METAL + #include "pfMetalPipeline/plMetalPipeline.h" +#endif #include "pfJournalBook/pfJournalBook.h" #include "pfLocalizationMgr/pfLocalizationMgr.h" #include "pfMoviePlayer/plMoviePlayer.h" @@ -428,6 +431,11 @@ plPipeline* plClient::ICreatePipeline(hsWindowHndl disp, hsWindowHndl hWnd, cons if (renderer == hsG3DDeviceSelector::kDevTypeOpenGL) return new plGLPipeline(disp, hWnd, devMode); #endif + +#ifdef PLASMA_PIPELINE_METAL + if (renderer == hsG3DDeviceSelector::kDevTypeMetal) + return new plMetalPipeline(disp, hWnd, devMode); +#endif return new plNullPipeline(disp, hWnd, devMode); } diff --git a/Sources/Plasma/CoreLib/HeadSpin.h b/Sources/Plasma/CoreLib/HeadSpin.h index ffafc38f65..6f0cf62804 100644 --- a/Sources/Plasma/CoreLib/HeadSpin.h +++ b/Sources/Plasma/CoreLib/HeadSpin.h @@ -73,6 +73,9 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com typedef HINSTANCE HMODULE; typedef long HRESULT; typedef void* HANDLE; +#elif HS_BUILD_FOR_MACOS + typedef void* hsWindowHndl; + typedef void* hsWindowInst; #else typedef int32_t* hsWindowHndl; typedef int32_t* hsWindowInst; diff --git a/Sources/Plasma/CoreLib/plQuality.h b/Sources/Plasma/CoreLib/plQuality.h index 7d2f3b5bec..b0ae10f40e 100644 --- a/Sources/Plasma/CoreLib/plQuality.h +++ b/Sources/Plasma/CoreLib/plQuality.h @@ -64,6 +64,7 @@ class plQuality friend class plClient; friend class plQualitySlider; friend class plDXPipeline; + friend class plMetalPipeline; // Set by the app according to user preference. static void SetQuality(int q); diff --git a/Sources/Plasma/FeatureLib/CMakeLists.txt b/Sources/Plasma/FeatureLib/CMakeLists.txt index 345d0cc065..2586232833 100644 --- a/Sources/Plasma/FeatureLib/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/CMakeLists.txt @@ -23,6 +23,9 @@ add_subdirectory(pfGameScoreMgr) if(PLASMA_PIPELINE_GL) add_subdirectory(pfGLPipeline) endif() +if(PLASMA_PIPELINE_METAL) + add_subdirectory(pfMetalPipeline) +endif() add_subdirectory(pfJournalBook) add_subdirectory(pfLocalizationMgr) add_subdirectory(pfMessage) diff --git a/Sources/Plasma/FeatureLib/inc/CMakeLists.txt b/Sources/Plasma/FeatureLib/inc/CMakeLists.txt index 1469ade8b1..777c9a3e32 100644 --- a/Sources/Plasma/FeatureLib/inc/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/inc/CMakeLists.txt @@ -14,6 +14,9 @@ target_link_libraries(pfFeatureInc pfGameMgr pfJournalBook pfMessage + $<$:pfDXPipeline> + $<$:pfGLPipeline> + $<$:pfMetalPipeline> pfPython pfSurface ) diff --git a/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h b/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h index a42818ffa8..7caacf6adf 100644 --- a/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h +++ b/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h @@ -61,6 +61,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "pfGLPipeline/pfGLPipelineCreatable.h" #endif +#ifdef PLASMA_PIPELINE_METAL + #include "pfMetalPipeline/pfMetalPipelineCreatable.h" +#endif + #include "pfJournalBook/pfJournalBookCreatable.h" #include "pfMessage/pfMessageCreatable.h" #include "pfPython/pfPythonCreatable.h" diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt new file mode 100644 index 0000000000..2fc73c1de7 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -0,0 +1,82 @@ +include(FetchContent) + +FetchContent_Declare( + metalcpp + URL_HASH_SHA256 0afd87ca851465191ae4e3980aa036c7e9e02fe32e7c760ac1a74244aae6023b + URL "https://developer.apple.com/metal/cpp/files/metal-cpp_macOS13.3_iOS16.4.zip" +) + +FetchContent_MakeAvailable(metalcpp) + +set(pfMetalPipeline_SOURCES + plMetalDevice.cpp + plMetalDeviceRefs.cpp + plMetalMaterialShaderRef.cpp + plMetalPipeline.cpp + plMetalPipelineState.cpp + plMetalPlateManager.cpp + plMetalShader.cpp + plMetalFragmentShader.cpp + plMetalVertexShader.cpp + plMetalTextFont.cpp + plMetalEnumerate.mm + plMetalDevicePerformanceShaders.mm +) + +set(pfMetalPipeline_HEADERS + plMetalDevice.h + plMetalDeviceRef.h + plMetalMaterialShaderRef.h + plMetalPipeline.h + plMetalPipelineState.h + plMetalPlateManager.h + plMetalShader.h + plMetalTextFont.h + plMetalFragmentShader.h + plMetalVertexShader.h + ShaderSrc/ShaderTypes.h + ShaderSrc/ShaderVertex.h + pfMetalPipelineCreatable.h +) + +plasma_library(pfMetalPipeline SOURCES ${pfMetalPipeline_SOURCES} ${pfMetalPipeline_HEADERS}) +target_link_libraries(pfMetalPipeline + PUBLIC + CoreLib + pnNucleusInc + plPipeline + "-framework Metal" + "-framework MetalPerformanceShaders" + PRIVATE + plStatusLog + INTERFACE + pnFactory +) + +target_include_directories(pfMetalPipeline PUBLIC ${metalcpp_SOURCE_DIR}) +target_include_directories(pfMetalPipeline PUBLIC "ShaderSrc") + +source_group("Source Files" FILES ${pfMetalPipeline_SOURCES}) +source_group("Header Files" FILES ${pfMetalPipeline_HEADERS}) + +add_library(pfMetalPipelineShaders INTERFACE) +set(pfMetalPipeline_SHADERS + ShaderSrc/FixedPipelineShaders.metal + ShaderSrc/PlateShaders.metal + ShaderSrc/BiasNormals.metal + ShaderSrc/CompCosines.metal + ShaderSrc/WaveSet7.metal + ShaderSrc/Grass.metal + ShaderSrc/WaveDecEnv.metal + ShaderSrc/Avatar.metal + ShaderSrc/WaveDec1Lay_7.metal + ShaderSrc/WaveRip.metal + ShaderSrc/Clear.metal + ShaderSrc/GammaCorrection.metal + ShaderSrc/TextFontShader.metal +) +set_source_files_properties(${pfMetalPipeline_SHADERS} TARGET_DIRECTORY plClient PROPERTIES LANGUAGE METAL) +# source group does not work with an interface library in Xcode, but maybe someday... +source_group("Metal Shaders" FILES ${pfMetalPipeline_SHADERS}) + +target_sources(pfMetalPipelineShaders INTERFACE ${pfMetalPipeline_SHADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal new file mode 100644 index 0000000000..e828db9f79 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal @@ -0,0 +1,77 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + + +typedef struct +{ + float4 position [[position]]; + float2 uvPosition; + half4 color; +} PreprocessAvatarTexturesInOut; + +typedef struct +{ + float2 position [[attribute(0)]]; + float2 uvPostion [[attribute(1)]]; +} PreprocessAvatarVertex; + +vertex PreprocessAvatarTexturesInOut PreprocessAvatarVertexShader(PreprocessAvatarVertex in [[stage_in]]) +{ + return { float4(in.position.x, in.position.y, 0.0, 1.0 ), in.uvPostion }; +} + +fragment half4 PreprocessAvatarFragmentShader(PreprocessAvatarTexturesInOut in [[stage_in]], + texture2d layer [[ texture(0) ]], + constant float4& blendColor [[ buffer(0 )]]) +{ + constexpr sampler colorSampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_zero); + + half4 colorSample = layer.sample(colorSampler, in.uvPosition.xy) * half4(blendColor); + + return colorSample; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal new file mode 100644 index 0000000000..bb055a7feb --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal @@ -0,0 +1,124 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct +{ + float4 TexU0; + float4 TexV0; + + float4 TexU1; + float4 TexV1; + + float4 Numbers; + + float4 ScaleBias; +} vs_BiasNormalsUniforms; + +typedef struct +{ + float4 position [[position]]; + float4 texCoord0; + float4 texCoord1; + //not actually colors, just emulating the registers + float4 color1; + float4 color2; +} vs_BiasNormalsOut; + +vertex vs_BiasNormalsOut vs_BiasNormals(Vertex in [[ stage_in ]], + constant vs_BiasNormalsUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + vs_BiasNormalsOut out; + + out.position = float4(in.position, 1.0); + + out.texCoord0 = float4(dot(float4(in.texCoord1, 1.0), uniforms.TexU0), + dot(float4(in.texCoord1, 1.0), uniforms.TexV0), + 0.f, + 1.f); + + out.texCoord1 = float4(dot(float4(in.texCoord1, 1.0), uniforms.TexU1), + dot(float4(in.texCoord1, 1.0), uniforms.TexV1), + 0.f, + 1.f); + + out.color1 = uniforms.ScaleBias.xxzz; + out.color2 = uniforms.ScaleBias.yyzz; + + return out; +} + +fragment float4 ps_BiasNormals(vs_BiasNormalsOut in [[ stage_in ]], + texture2d t0 [[ texture(0) ]], + texture2d t1 [[ texture(1) ]]) +{ + // Composite the cosines together. + // Input map is cosine(pix) for each of + // the 4 waves. + // + // The constants are set up so: + // Nx = -freq * amp * dirX * cos(pix); + // Ny = -freq * amp * dirY * cos(pix); + // So c[i].x = -freq[i] * amp[i] * dirX[i] + // etc. + // All textures are: + // (r,g,b,a) = (cos(), cos(), 1, 1) + // + // So c[0].z = 1, but all other c[i].z = 0 + // Note also the c4 used for biasing back at the end. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + + float4 sample1 = t0.sample(colorSampler, in.texCoord0.xy); + float4 sample2 = t1.sample(colorSampler, in.texCoord0.xy); + float4 out = float4(sample1.rgb - 0.5 + sample2.rgb - 0.5, sample1.a + sample2.a); + out.rgb = (out.rgb * in.color1.rgb) + in.color2.rgb; + + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal new file mode 100644 index 0000000000..7669bdc516 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal @@ -0,0 +1,84 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +constant const bool ShouldClearDepth [[ function_constant(0) ]]; +constant const bool ShouldClearColor [[ function_constant(1) ]]; + +struct ClearVertexIn +{ + float2 position [[ attribute(0) ]]; +}; + +struct ClearVertexOut +{ + float4 position [[ position ]]; +}; + +struct ClearFragmentOut +{ + float depth [[depth(any), function_constant(ShouldClearDepth)]]; + half4 color [[color(0), function_constant(ShouldClearColor)]]; +}; + +vertex ClearVertexOut clearVertex(ClearVertexIn in [[ stage_in ]]) +{ + ClearVertexOut out; + // Just pass the position through. We're clearing in NDC space. + out.position = float4(in.position, 0.5, 1.0); + return out; +} + +fragment ClearFragmentOut clearFragment(constant half4& clearColor [[ buffer(0), function_constant(ShouldClearColor) ]], + constant float& clearDepth [[ buffer(1), function_constant(ShouldClearDepth) ]]) +{ + ClearFragmentOut out; + if (ShouldClearDepth) { + out.depth = clearDepth; + } + if (ShouldClearColor) { + out.color = clearColor; + } + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal new file mode 100644 index 0000000000..a3620fc26f --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal @@ -0,0 +1,133 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct +{ + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; +} vs_CompCosinesUniforms; + +typedef struct +{ + float4 position [[position]]; + float4 texCoord0; + float4 texCoord1; + float4 texCoord2; + float4 texCoord3; +} vs_CompCosinesnInOut; + +vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[ stage_in ]], + constant vs_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + vs_CompCosinesnInOut out; + + out.position = float4(in.position, 1.0); + + float4 texCoord = float4(0, 0, 0, 1); + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c0); + out.texCoord0 = texCoord; + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c1); + out.texCoord1 = texCoord; + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c2); + out.texCoord2 = texCoord; + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c3); + out.texCoord3 = texCoord; + + return out; +} + +typedef struct +{ + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; + float4 c5; +} ps_CompCosinesUniforms; + +fragment float4 ps_CompCosines(vs_CompCosinesnInOut in [[ stage_in ]], + texture2d t0 [[ texture(0) ]], + texture2d t1 [[ texture(1) ]], + texture2d t2 [[ texture(2) ]], + texture2d t3 [[ texture(3) ]], + constant ps_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + // Composite the cosines together. + // Input map is cosine(pix) for each of + // the 4 waves. + // + // The constants are set up so: + // Nx = -freq * amp * dirX * cos(pix); + // Ny = -freq * amp * dirY * cos(pix); + // So c[i].x = -freq[i] * amp[i] * dirX[i] + // etc. + // All textures are: + // (r,g,b,a) = (cos(), cos(), 1, 1) + // + // So c[0].z = 1, but all other c[i].z = 0 + // Note also the c4 used for biasing back at the end. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + + float4 out = 2.f * (t0.sample(colorSampler, fract(in.texCoord0.xy)) - 0.5f) * uniforms.c0; + out += 2.f * (t1.sample(colorSampler, fract(in.texCoord1.xy)) - 0.5f) * uniforms.c1; + out += 2.f * (t2.sample(colorSampler, fract(in.texCoord2.xy)) - 0.5f) * uniforms.c2; + out += 2.f * (t3.sample(colorSampler, fract(in.texCoord3.xy)) - 0.5f) * uniforms.c3; + // Now bias it back into range [0..1] for output. + out *= uniforms.c4; + out += uniforms.c5; + out.b = 1.f; + out.a = 1.f; + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal new file mode 100644 index 0000000000..6b9bbc8884 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -0,0 +1,701 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + + +#include +using namespace metal; +// File for Metal kernel and shader functions + +#include +#include + +// Including header shared between this Metal shader code and Swift/C code executing Metal API commands +#include "ShaderVertex.h" +#include "ShaderTypes.h" + +#define GMAT_STATE_ENUM_START(name) enum name { +#define GMAT_STATE_ENUM_VALUE(name, val) name = val, +#define GMAT_STATE_ENUM_END(name) }; + +#include "hsGMatStateEnums.h" + +enum plUVWSrcModifiers: uint32_t +{ + kUVWPassThru = 0x00000000, + kUVWIdxMask = 0x0000ffff, + kUVWNormal = 0x00010000, + kUVWPosition = 0x00020000, + kUVWReflect = 0x00030000 +}; + +using namespace metal; + +constant const uint8_t sourceType1 [[ function_constant(FunctionConstantSources + 0) ]]; +constant const uint8_t sourceType2 [[ function_constant(FunctionConstantSources + 1) ]]; +constant const uint8_t sourceType3 [[ function_constant(FunctionConstantSources + 2) ]]; +constant const uint8_t sourceType4 [[ function_constant(FunctionConstantSources + 3) ]]; +constant const uint8_t sourceType5 [[ function_constant(FunctionConstantSources + 4) ]]; +constant const uint8_t sourceType6 [[ function_constant(FunctionConstantSources + 5) ]]; +constant const uint8_t sourceType7 [[ function_constant(FunctionConstantSources + 6) ]]; +constant const uint8_t sourceType8 [[ function_constant(FunctionConstantSources + 7) ]]; + +constant const uint32_t blendModes1 [[ function_constant(FunctionConstantBlendModes + 0) ]]; +constant const uint32_t blendModes2 [[ function_constant(FunctionConstantBlendModes + 1) ]]; +constant const uint32_t blendModes3 [[ function_constant(FunctionConstantBlendModes + 2) ]]; +constant const uint32_t blendModes4 [[ function_constant(FunctionConstantBlendModes + 3) ]]; +constant const uint32_t blendModes5 [[ function_constant(FunctionConstantBlendModes + 4) ]]; +constant const uint32_t blendModes6 [[ function_constant(FunctionConstantBlendModes + 5) ]]; +constant const uint32_t blendModes7 [[ function_constant(FunctionConstantBlendModes + 6) ]]; +constant const uint32_t blendModes8 [[ function_constant(FunctionConstantBlendModes + 7) ]]; + +constant const uint32_t miscFlags1 [[ function_constant(FunctionConstantLayerFlags + 0) ]]; +constant const uint32_t miscFlags2 [[ function_constant(FunctionConstantLayerFlags + 1) ]]; +constant const uint32_t miscFlags3 [[ function_constant(FunctionConstantLayerFlags + 2) ]]; +constant const uint32_t miscFlags4 [[ function_constant(FunctionConstantLayerFlags + 3) ]]; +constant const uint32_t miscFlags5 [[ function_constant(FunctionConstantLayerFlags + 4) ]]; +constant const uint32_t miscFlags6 [[ function_constant(FunctionConstantLayerFlags + 5) ]]; +constant const uint32_t miscFlags7 [[ function_constant(FunctionConstantLayerFlags + 6) ]]; +constant const uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFlags + 7) ]]; + +#define MAX_BLEND_PASSES 8 +constant const uint8_t sourceTypes[MAX_BLEND_PASSES] = { sourceType1, sourceType2, sourceType3, sourceType4, sourceType5, sourceType6, sourceType7, sourceType8}; +constant const uint32_t blendModes[MAX_BLEND_PASSES] = { blendModes1, blendModes2, blendModes3, blendModes4, blendModes5, blendModes6, blendModes7, blendModes8}; +constant const uint32_t miscFlags[MAX_BLEND_PASSES] = { miscFlags1, miscFlags2, miscFlags3, miscFlags4, miscFlags5, miscFlags6, miscFlags7, miscFlags8}; + constant const uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); + +constant const bool has2DTexture1 = (sourceType1 == PassTypeTexture && hasLayer1); +constant const bool has2DTexture2 = (sourceType2 == PassTypeTexture && hasLayer2); +constant const bool has2DTexture3 = (sourceType3 == PassTypeTexture && hasLayer3); +constant const bool has2DTexture4 = (sourceType4 == PassTypeTexture && hasLayer4); +constant const bool has2DTexture5 = (sourceType5 == PassTypeTexture && hasLayer5); +constant const bool has2DTexture6 = (sourceType6 == PassTypeTexture && hasLayer6); +constant const bool has2DTexture7 = (sourceType7 == PassTypeTexture && hasLayer7); +constant const bool has2DTexture8 = (sourceType8 == PassTypeTexture && hasLayer8); + +constant const bool hasCubicTexture1 = (sourceType1 == PassTypeCubicTexture && hasLayer1); +constant const bool hasCubicTexture2 = (sourceType2 == PassTypeCubicTexture && hasLayer2); +constant const bool hasCubicTexture3 = (sourceType3 == PassTypeCubicTexture && hasLayer3); +constant const bool hasCubicTexture4 = (sourceType4 == PassTypeCubicTexture && hasLayer4); +constant const bool hasCubicTexture5 = (sourceType5 == PassTypeCubicTexture && hasLayer5); +constant const bool hasCubicTexture6 = (sourceType6 == PassTypeCubicTexture && hasLayer6); +constant const bool hasCubicTexture7 = (sourceType7 == PassTypeCubicTexture && hasLayer7); +constant const bool hasCubicTexture8 = (sourceType8 == PassTypeCubicTexture && hasLayer8); + +struct FragmentShaderArguments +{ + texture2d textures [[ texture(FragmentShaderArgumentAttributeTextures), function_constant(has2DTexture1) ]]; + texture2d texture2 [[ texture(FragmentShaderArgumentAttributeTextures + 1), function_constant(has2DTexture2) ]]; + texture2d texture3 [[ texture(FragmentShaderArgumentAttributeTextures + 2), function_constant(has2DTexture3) ]]; + texture2d texture4 [[ texture(FragmentShaderArgumentAttributeTextures + 3), function_constant(has2DTexture4) ]]; + texture2d texture5 [[ texture(FragmentShaderArgumentAttributeTextures + 4), function_constant(has2DTexture5) ]]; + texture2d texture6 [[ texture(FragmentShaderArgumentAttributeTextures + 5), function_constant(has2DTexture6) ]]; + texture2d texture7 [[ texture(FragmentShaderArgumentAttributeTextures + 6), function_constant(has2DTexture7) ]]; + texture2d texture8 [[ texture(FragmentShaderArgumentAttributeTextures + 7), function_constant(has2DTexture8) ]]; + texturecube cubicTextures [[ texture(FragmentShaderArgumentAttributeCubicTextures), function_constant(hasCubicTexture1) ]]; + texturecube cubicTexture2 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1), function_constant(hasCubicTexture2) ]]; + texturecube cubicTexture3 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 2), function_constant(hasCubicTexture3) ]]; + texturecube cubicTexture4 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3), function_constant(hasCubicTexture4) ]]; + texturecube cubicTexture5 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 4), function_constant(hasCubicTexture5) ]]; + texturecube cubicTexture6 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 5), function_constant(hasCubicTexture6) ]]; + texturecube cubicTexture7 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 6), function_constant(hasCubicTexture7) ]]; + texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasCubicTexture8) ]]; + const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(FragmentShaderArgumentUniforms) ]]; + half4 sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const; + // number of layers is variable, so have to declare these samplers the ugly way + sampler samplers [[ sampler(0), function_constant(hasLayer1) ]]; + sampler sampler2 [[ sampler(1), function_constant(hasLayer2) ]]; + sampler sampler3 [[ sampler(2), function_constant(hasLayer3) ]]; + sampler sampler4 [[ sampler(3), function_constant(hasLayer4) ]]; + sampler sampler5 [[ sampler(4), function_constant(hasLayer5) ]]; + sampler sampler6 [[ sampler(5), function_constant(hasLayer6) ]]; + sampler sampler7 [[ sampler(6), function_constant(hasLayer7) ]]; + sampler sampler8 [[ sampler(7), function_constant(hasLayer8) ]]; +}; + +typedef struct +{ + float4 position [[position]]; + float3 texCoord1 [[function_constant(hasLayer1)]]; + float3 texCoord2 [[function_constant(hasLayer2)]]; + float3 texCoord3 [[function_constant(hasLayer3)]]; + float3 texCoord4 [[function_constant(hasLayer4)]]; + float3 texCoord5 [[function_constant(hasLayer5)]]; + float3 texCoord6 [[function_constant(hasLayer6)]]; + float3 texCoord7 [[function_constant(hasLayer7)]]; + float3 texCoord8 [[function_constant(hasLayer8)]]; + half4 vtxColor [[ centroid_perspective ]]; + half4 fogColor; +} ColorInOut; + + +typedef struct +{ + float4 position [[position, invariant]]; + float3 texCoord1; +} ShadowCasterInOut; + +vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], + constant plMetalLights & lights [[ buffer(VertexShaderArgumentLights) ]], + constant float4x4 & blendMatrix1 [[ buffer(VertexShaderArgumentBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) +{ + ColorInOut out; + // we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. + const half4 inColor = half4(in.color.b, in.color.g, in.color.r, in.color.a) / half4(255.f); + + const half3 MAmbient = mix(inColor.rgb, uniforms.ambientCol, uniforms.ambientSrc); + const half4 MDiffuse = mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc); + const half3 MEmissive = mix(inColor.rgb, uniforms.emissiveCol, uniforms.emissiveSrc); + + half3 LAmbient = half3(0.h, 0.h, 0.h); + half3 LDiffuse = half3(0.h, 0.h, 0.h); + + const float3 Ndirection = normalize(float4(in.normal, 0.f) * uniforms.localToWorldMatrix).xyz; + + float4 position = float4(in.position, 1.f) * uniforms.localToWorldMatrix; + if (temp_hasOnlyWeight1) { + const float4 position2 = blendMatrix1 * float4(in.position, 1.f); + position = (in.weight1 * position) + ((1.f - in.weight1) * position2); + } + + for (size_t i = 0; i < lights.count; i++) { + constant const plMetalShaderLightSource *lightSource = &lights.lampSources[i]; + if (lightSource->scale == 0.0h) + continue; + + // direction.w is attenuation + float4 direction; + + if (lightSource->position.w == 0.f) { + // Directional Light with no attenuation + direction = float4(-(lightSource->direction).xyz, 1.f); + } else { + // Omni Light in all directions + const float3 v2l = lightSource->position.xyz - position.xyz; + const float distance = length(v2l); + direction.xyz = normalize(v2l); + + direction.w = 1.f / (lightSource->constAtten + lightSource->linAtten * distance + lightSource->quadAtten * pow(distance, 2.f)); + + if (lightSource->spotProps.x > 0.f) { + // Spot Light with cone falloff + const float theta = dot(direction.xyz, normalize(-lightSource->direction).xyz); + // inner cutoff + const float gamma = lightSource->spotProps.y; + // outer cutoff + const float phi = lightSource->spotProps.z; + const float epsilon = (gamma - phi); + const float intensity = clamp((theta - phi) / epsilon, 0.f, 1.f); + + direction.w *= pow(intensity, lightSource->spotProps.x); + } + } + + LAmbient.rgb = LAmbient.rgb + half3(direction.w * (lightSource->ambient.rgb * lightSource->scale)); + const float3 dotResult = dot(Ndirection, direction.xyz); + LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (lightSource->diffuse.rgb * lightSource->scale) * half3(max(0.f, dotResult) * direction.w); + } + + const half3 ambient = (MAmbient.rgb) * clamp(uniforms.globalAmb.rgb + LAmbient.rgb, 0.h, 1.h); + const half3 diffuse = clamp(LDiffuse.rgb, 0.h, 1.h); + const half4 material = half4(clamp(ambient + diffuse + MEmissive.rgb, 0.h, 1.h), + abs(uniforms.invVtxAlpha - MDiffuse.a)); + + out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); + const float4 vCamPosition = position * uniforms.worldToCameraMatrix; + + // Fog + out.fogColor = uniforms.calcFog(vCamPosition); + + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.f)) * uniforms.worldToCameraMatrix; + + for (size_t layer=0; layer 0) { + resultColor.a = exp(-pow(fogValues.y * length(camPosition), fogValues.x)); + } else { + if (fogValues.y > 0.0) { + const float start = fogValues.x; + const float end = fogValues.y; + resultColor.a = (end - length(camPosition.xyz)) / (end - start); + } else { + resultColor.a = 1.0h; + } + } + resultColor.rgb = fogColor; + return resultColor; +} + +half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const +{ + if (passType == PassTypeColor) { + return vertexColor; + } else { + if (miscFlags[index] & kMiscPerspProjection) { + sampleCoord.xy /= sampleCoord.z; + } + + // do the actual sample + if (passType == PassTypeTexture) { + return (&textures)[index].sample((&samplers)[index], sampleCoord.xy); + } else if (passType == PassTypeCubicTexture) { + return (&cubicTextures)[index].sample((&samplers)[index], sampleCoord.xyz); + } else { + return half4(0.h); + } + } +} + +fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], + const FragmentShaderArguments fragmentShaderArgs) +{ + half4 currentColor = in.vtxColor; + + /* + SPECIAL PLASMA RULE: + If there is only one layer, and that layer is not a texture, + skip straight to the vertex color and return it + */ + if (!(passCount==1 && sourceTypes[0] == PassTypeColor)) { + + /* + Note: For loop should be unrolled by the compiler, but it is very sensitive. + Always use size_t for the loop interator type. + */ + for (size_t layer=0; layeralphaThreshold) { + discard_fragment(); + } + + return currentColor; +} + +constexpr void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) +{ + // Local variable to store the color value + if (blendFlags & kBlendInvertColor) { + srcSample.rgb = 1.0h - srcSample.rgb; + } + + // Leave fCurrColor null if we are blending without texture color + if (!(blendFlags & kBlendNoTexColor)) { + destSample.rgb = srcSample.rgb; + } + + if (blendFlags & kBlendInvertAlpha) { + // 1.0 - texture.a + srcSample.a = 1.0h - srcSample.a; + } + + if (!(blendFlags & kBlendNoTexAlpha)) { + // Vertex alpha * base texture alpha + destSample.a = destSample.a * srcSample.a; + } +} + +constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) +{ + // Local variable to store the color value + if (blendFlags & kBlendInvertColor) { + srcSample.rgb = 1.0h - srcSample.rgb; + } + + switch (blendFlags & kBlendMask) + { + + case kBlendAddColorTimesAlpha: + //hsAssert(false, "Blend mode unsupported on upper layers"); + break; + + case kBlendAlpha: + { + if (!(blendFlags & kBlendNoTexColor)) { + if (blendFlags & kBlendInvertAlpha) { + // color = texture.rgb + (texture.a * prev) + destSample.rgb = (srcSample.rgb + (srcSample.a * destSample.rgb)); + } else { + // color = mix(prev, texture.rgb, texture.a) + destSample.rgb = mix(destSample.rgb, srcSample.rgb, srcSample.a); + } + } + + if (blendFlags & kBlendInvertAlpha) { + // 1.0 - texture.a + srcSample.a = 1.0h - srcSample.a; + } else { + // texture.a + srcSample.a = srcSample.a; + } + + switch (blendFlags & ( kBlendAlphaAdd | kBlendAlphaMult )) { + case 0: + destSample.a = destSample.a; + break; + case kBlendAlphaAdd: + destSample.a = srcSample.a + destSample.a; + break; + case kBlendAlphaMult: + destSample.a = srcSample.a * destSample.a; + break; + } + break; + } + + case kBlendAdd: + { + // color = texture.rgb + prev + destSample.rgb = srcSample.rgb + destSample.rgb; + + break; + } + + case kBlendMult: + { + // color = color * prev + destSample.rgb = srcSample.rgb * destSample.rgb; + break; + } + + case kBlendDot3: + { + // color = (color.r * prev.r + color.g * prev.g + color.b * prev.b) + destSample = dot(srcSample.rgb, destSample.rgb); + break; + } + + case kBlendAddSigned: + { + // color = color + prev - 0.5 + destSample.rgb = srcSample.rgb + destSample.rgb - 0.5h; + break; + } + + case kBlendAddSigned2X: + { + // color = (color + prev - 0.5) << 1 + // Note: using CALL here for multiplication to ensure parentheses + destSample.rgb = 2.0h * (srcSample.rgb + destSample.rgb - 0.5h); + break; + } + + case 0: + { + // color = texture.rgb + destSample.rgb = srcSample.rgb; + break; + } + } +} + +vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]]) +{ + ShadowCasterInOut out; + + const float4 vCamPosition = (float4(in.position, 1.0) * uniforms.localToWorldMatrix) * uniforms.worldToCameraMatrix; + + const float4x4 matrix = uniforms.uvTransforms[0].transform; + + out.texCoord1 = (vCamPosition * matrix).xyz; + + out.position = vCamPosition * uniforms.projectionMatrix; + + return out; +} + +fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) +{ + //D3DTTFF_COUNT3, D3DTSS_TCI_CAMERASPACEPOSITION + const half currentAlpha = in.texCoord1.x; + + return half4(1.0h, 1.0h, 1.0h, currentAlpha); +} + +//MARK: Shadow Casting shaders + +/* + In the Direct3D pipeline, lights were created and manipulated to draw shadows in the fixed function pipelines. + + This re-implements shadows in a programmable pipeline without altering the light state. This change should + allow lights to be managed more efficiently in since the same light no longer needs to be changed multiple + times mid render. The Direct3D pipeline would alter lights mid render to control shadow strength onto a mesh. + Instead, this shader takes a shadow state struct that describes the shadow source and has strength as a discrete + property. There is no need to push an entirely new light table. + */ + +vertex ColorInOut shadowCastVertexShader(Vertex in [[ stage_in ]], + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], + constant plShadowState & shadowState [[ buffer(VertexShaderArgumentShadowState) ]]) +{ + ColorInOut out; + + float4 position = (float4(in.position, 1.f) * uniforms.localToWorldMatrix); + const float3 Ndirection = normalize(float4(in.normal, 0.f) * uniforms.localToWorldMatrix).xyz; + // Shadow casting uses the diffuse material color to control opacity + const half4 MDiffuse = uniforms.diffuseCol; + + //w is attenation + float4 direction; + + if (shadowState.directional == true) { + // Directional Light with no attenuation + direction = float4(-(shadowState.lightDirection).xyz, 1.f); + } else { + // Omni Light in all directions + const float3 v2l = shadowState.lightPosition.xyz - position.xyz; + direction.xyz = normalize(v2l); + direction.w = 1.f; + } + + const float3 dotResult = dot(Ndirection, direction.xyz); + const half3 diffuse = MDiffuse.rgb * half3(max(0.h, dotResult)) * shadowState.power; + out.vtxColor = half4(diffuse, 1.f); + + const float4 vCamPosition = position * uniforms.worldToCameraMatrix; + + // Fog + out.fogColor = uniforms.calcFog(vCamPosition); + + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.f)) * uniforms.worldToCameraMatrix; + + for (size_t layer=0; layer texture [[ texture(16) ]], + constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(FragmentShaderArgumentShadowCastUniforms) ]], + FragmentShaderArguments layers, + constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowCastAlphaSrc) ]]) +{ + float3 sampleCoords = in.texCoord1; + if (fragmentUniforms.pointLightCast) { + sampleCoords.xy /= sampleCoords.z; + } + const sampler colorSample = sampler( mag_filter::linear, + min_filter::linear, + address::clamp_to_edge); + + half4 currentColor = texture.sample(colorSample, sampleCoords.xy); + currentColor.rgb *= in.vtxColor.rgb; + + const float2 LUTCoords = in.texCoord2.xy; + const half4 LUTColor = clamp(half4(LUTCoords.x), 0.0h, 1.0h);; + + currentColor.rgb = (1.0h - LUTColor.rgb) * currentColor.rgb; + currentColor.a = LUTColor.a - currentColor.a; + + // only possible alpha sources are layers 0 or 1 + if (alphaSrc == 0 && passCount > 0) { + + half4 layerColor = layers.sampleLayer(0.h, in.vtxColor,sourceTypes[0], in.texCoord3); + + currentColor.rgb *= layerColor.a; + currentColor.rgb *= in.vtxColor.a; + } else if (alphaSrc == 1 && passCount > 1) { + + half4 layerColor = layers.sampleLayer(1, in.vtxColor, sourceTypes[1], in.texCoord3); + + currentColor.rgb *= layerColor.a; + currentColor.rgb *= in.vtxColor.a; + } + + //a lpha blend goes here + + if (currentColor.a <= 0.0h) + discard_fragment(); + + return currentColor; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal new file mode 100644 index 0000000000..bc1be0ff26 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal @@ -0,0 +1,81 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +struct GammaVertexIn +{ + float2 position [[ attribute(0) ]]; + float2 texturePosition [[ attribute(0) ]]; +}; + +struct GammaVertexOut +{ + float4 position [[ position ]]; + float2 texturePosition; +}; + +vertex GammaVertexOut gammaCorrectVertex(constant GammaVertexIn *in [[ buffer(0) ]], + uint vertexID [[ vertex_id ]]) +{ + GammaVertexOut out; + // Just pass the position through. We're clearing in NDC space. + out.position = float4(in[vertexID].position, 0.5f, 1.f); + out.texturePosition = float2(in[vertexID].texturePosition); + return out; +} + +const constant sampler lutSampler = sampler(filter::nearest); + +fragment half4 gammaCorrectFragment(GammaVertexOut in [[stage_in]], + texture2d inputTexture [[texture(0)]], + texture1d_array LUT [[texture(1)]]) +{ + float4 color = inputTexture.read(ushort2(in.position.xy)); + return { + half(float(LUT.sample(lutSampler, color.r, 0).x)/USHRT_MAX), + half(float(LUT.sample(lutSampler, color.g, 1).x)/USHRT_MAX), + half(float(LUT.sample(lutSampler, color.b, 2).x)/USHRT_MAX), + 1.0 + }; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal new file mode 100644 index 0000000000..888ba89d76 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal @@ -0,0 +1,125 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +// ignoring the int and pi constants here and using whats built in +// but reserving space for them in the buffer +typedef struct +{ + matrix_float4x4 Local2NDC; + float4 intConstants; + float4 time; + float4 piConstants; + float4 sinConstants; + float4 waveDistortX; + float4 waveDistortY; + float4 waveDistortZ; + float4 waveDirX; + float4 waveDirY; + float4 waveSpeed; +} vs_GrassUniforms; + +typedef struct +{ + float4 position [[position]]; + float4 color; + float4 texCoord; +} vs_GrassInOut; + +vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], + constant vs_GrassUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + vs_GrassInOut out; + + float4 r0 = (in.position.x * uniforms.waveDirX) + (in.position.y * uniforms.waveDirX); + + r0 += (uniforms.time.x * uniforms.waveSpeed); // scale by speed and add to X,Y input + r0 = fract(r0); + + r0 = (r0 - 0.5f) * M_PI_F * 2.f; + + float4 pow2 = r0 * r0; + float4 pow3 = pow2 * r0; + float4 pow5 = pow2 * pow3; + float4 pow7 = pow2 * pow5; + float4 pow9 = pow2 * pow7; + + r0 += pow3 * uniforms.sinConstants.x; + r0 += pow5 * uniforms.sinConstants.y; + r0 += pow7 * uniforms.sinConstants.z; + r0 += pow9 * uniforms.sinConstants.w; + + float3 offset = float3( + dot(r0, uniforms.waveDistortX), + dot(r0, uniforms.waveDistortY), + dot(r0, uniforms.waveDistortZ) + ); + + offset *= (2.f * (1.f - in.texCoord1.y)); // mult by Y tex coord. So the waves only affect the top verts + + float4 position = float4(in.position.xyz + offset, 1); + out.position = position * uniforms.Local2NDC; + + out.color = float4(in.color.r, in.color.g, in.color.b, in.color.a) / 255.f; + out.texCoord = float4(in.texCoord1, 0.f); + + return out; +} + +fragment half4 ps_GrassShader(vs_GrassInOut in [[stage_in]], + texture2d t0 [[ texture(0) ]]) +{ + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + + half4 out = t0.sample(colorSampler, in.texCoord.xy); + out *= half4(in.color); + if (out.a <= 0.1h) + discard_fragment(); + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal new file mode 100644 index 0000000000..fc4db3f84a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal @@ -0,0 +1,104 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; +// File for Metal kernel and shader functions + +#include +#include + +// Including header shared between this Metal shader code and Swift/C code executing Metal API commands +#import "ShaderTypes.h" + +using namespace metal; + +typedef struct +{ + array, 8> textures [[ id(FragmentShaderArgumentAttributeTextures) ]]; + array, 8> cubicTextures [[ id(FragmentShaderArgumentAttributeCubicTextures) ]]; +} FragmentShaderArguments; + +typedef struct +{ + float2 position [[attribute(VertexAttributePosition)]]; + float3 texCoord [[attribute(VertexAttributeTexcoord)]]; +} PlateVertex; + +typedef struct +{ + float4 position [[position]]; + float3 texCoord; + float4 normal; +} ColorInOut; + +vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], + uint v_id [[ vertex_id ]]) +{ + ColorInOut out; + + float4 position = float4(in.position, 0.f, 1.f); + position = position * uniforms.projectionMatrix; + out.position = ( position * uniforms.localToWorldMatrix); + out.position.y *= -1.f; + out.texCoord = (float4(in.texCoord, 1.0) * uniforms.uvTransforms[0].transform).xyz; + out.texCoord.y = 1.f - out.texCoord.y; + out.normal = float4(0.f, 0.f, 1.f, 0.f); + + return out; +} + +fragment float4 fragmentShader(ColorInOut in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], + constant float & alpha [[ buffer(6) ]], + texture2d colorMap [[ texture(FragmentShaderArgumentTexture) ]]) +{ + constexpr sampler colorSampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear); + + half4 colorSample = colorMap.sample(colorSampler, in.texCoord.xy); + colorSample.a *= alpha; + + return float4(colorSample); +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h new file mode 100644 index 0000000000..2845921eb8 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -0,0 +1,229 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef ShaderTypes_h +#define ShaderTypes_h + +#include + +#ifndef __METAL_VERSION__ +#include + +typedef _Float16 half; +typedef __attribute__((__ext_vector_type__(2))) half half2; +typedef __attribute__((__ext_vector_type__(3))) half half3; +typedef __attribute__((__ext_vector_type__(4))) half half4; +#endif + +enum plMetalVertexShaderArgument +{ + /// Material State + VertexShaderArgumentFixedFunctionUniforms = 2, + /// Uniform table for Plasma dynamic shaders + VertexShaderArgumentMaterialShaderUniforms = 3, + /// Light Table + VertexShaderArgumentLights = 4, + /// Blend matrix for GPU side animation blending + VertexShaderArgumentBlendMatrix1 = 6, + /// Describes the state of a shadow caster for shadow cast shader + VertexShaderArgumentShadowState = 9 +}; + +enum plMetalFragmentShaderArgumentIndex +{ + /// Texture is a legacy argument for the simpler plate shader + FragmentShaderArgumentTexture = 1, + /// Fragment uniforms + FragmentShaderArgumentShadowCastUniforms = 4, + /// Legacy argument buffer + FragmentShaderArgumentUniforms = 5, + /// Layer index of alpha for shadow fragment shader + FragmentShaderArgumentShadowCastAlphaSrc = 8 +}; + +enum plMetalVertexAttribute +{ + /// position of a vertex + VertexAttributePosition = 0, + /// UV of a vertex. Reserves IDs 1-8. + VertexAttributeTexcoord = 1, + /// Normal attribute of a vertex + VertexAttributeNormal = 9, + /// Color attribute of a vertex + VertexAttributeColor = 10, + /// Animation weight of a vertex + VertexAttributeWeights = 11, +}; + +/// Arguments to the shader compiler to control output +enum plMetalFunctionConstant +{ + /// Numbrer of UVs in the FVF vertex layout. + FunctionConstantNumUVs = 0, + /// Number of layers the shader will need to render + FunctionConstantNumLayers = 1, + /// Source type of the material texture. Metal needs to know if the texture will + /// be cubic or 2D in advance. Eight values reserved. + FunctionConstantSources = 2, + /// Blend modes for each of the layers. + FunctionConstantBlendModes = 10, + /// Render flags for each layer. Eight values reserved. + FunctionConstantLayerFlags = 18, + /// Numbrer of weights in the FVF vertex layout. + FunctionConstantNumWeights = 26, +}; + +enum plMetalLayerPassType: uint8_t +{ + PassTypeTexture = 1, + PassTypeCubicTexture = 2, + PassTypeColor = 3 +}; + +struct plMetalFragmentShaderArgumentBuffer +{ + __fp16 alphaThreshold; +}; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalFragmentShaderArgumentBuffer must be a trivial type!"); +#endif + +struct plMetalShadowCastFragmentShaderArgumentBuffer +{ + bool pointLightCast; +}; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalShadowCastFragmentShaderArgumentBuffer must be a trivial type!"); +#endif + +enum plMetalFragmentShaderTextures +{ + FragmentShaderArgumentAttributeTextures = 0, + FragmentShaderArgumentAttributeCubicTextures = 8, + FragmentShaderArgumentAttributeUniforms = 32 +}; + +struct plMetalShaderLightSource +{ + simd::float4 position; + half4 ambient; + half4 diffuse; + half4 specular; + simd::float3 direction; + simd::float4 spotProps; // (falloff, theta, phi) + __fp16 constAtten; + __fp16 linAtten; + __fp16 quadAtten; + __fp16 scale; +}; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalShaderLightSource must be a trivial type!"); +#endif + +struct UVOutDescriptor +{ + uint32_t UVWSrc; + matrix_float4x4 transform; +}; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "UVOutDescriptor must be a trivial type!"); +#endif + +struct VertexUniforms +{ + // transformation + matrix_float4x4 projectionMatrix; + matrix_float4x4 localToWorldMatrix; + matrix_float4x4 cameraToWorldMatrix; + matrix_float4x4 worldToCameraMatrix; + + // lighting + half4 globalAmb; + half3 ambientCol; + uint8_t ambientSrc; + half4 diffuseCol; + uint8_t diffuseSrc; + half3 emissiveCol; + uint8_t emissiveSrc; + half3 specularCol; + uint8_t specularSrc; + bool invVtxAlpha; + + uint8_t fogExponential; + simd::float2 fogValues; + half3 fogColor; + + UVOutDescriptor uvTransforms[8]; +#ifdef __METAL_VERSION__ + float3 sampleLocation(size_t index, thread float3 *texCoords, const float4 normal, const float4 camPosition) constant; + half4 calcFog(float4 camPosition) constant; +#endif +}; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "VertexUniforms must be a trivial type!"); +#endif + +#define kMetalMaxLightCount 32 + +struct plMetalLights +{ + uint8_t count; + plMetalShaderLightSource lampSources[kMetalMaxLightCount]; +}; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalLights must be a trivial type!"); +#endif + +struct plShadowState +{ + simd::float3 lightPosition; + simd::float3 lightDirection; + bool directional; + float power; + half opacity; +}; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plShadowState must be a trivial type!"); +#endif + +#endif /* ShaderTypes_h */ + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h new file mode 100644 index 0000000000..c6b94e1964 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h @@ -0,0 +1,82 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "ShaderTypes.h" + +constant ushort num_uvs [[ function_constant(FunctionConstantNumUVs) ]]; +constant bool hasTexture1 = num_uvs > 0; +constant bool hasTexture2 = num_uvs > 1; +constant bool hasTexture3 = num_uvs > 2; +constant bool hasTexture4 = num_uvs > 3; +constant bool hasTexture5 = num_uvs > 4; +constant bool hasTexture6 = num_uvs > 5; +constant bool hasTexture7 = num_uvs > 6; +constant bool hasTexture8 = num_uvs > 7; + +constant uint8_t num_layers [[ function_constant(FunctionConstantNumLayers) ]]; +constant bool hasLayer1 = num_layers > 0; +constant bool hasLayer2 = num_layers > 1; +constant bool hasLayer3 = num_layers > 2; +constant bool hasLayer4 = num_layers > 3; +constant bool hasLayer5 = num_layers > 4; +constant bool hasLayer6 = num_layers > 5; +constant bool hasLayer7 = num_layers > 6; +constant bool hasLayer8 = num_layers > 7; + +constant uint8_t num_weights [[ function_constant(FunctionConstantNumWeights) ]]; +constant bool hasWeight1 = num_weights > 0; +constant bool temp_hasOnlyWeight1 = num_weights == 1; + +typedef struct +{ + float3 position [[attribute(VertexAttributePosition)]]; + float3 normal [[attribute(VertexAttributeNormal)]]; + float weight1 [[attribute(VertexAttributeWeights), function_constant(hasWeight1)]]; + float3 texCoord1 [[attribute(VertexAttributeTexcoord), function_constant(hasTexture1)]]; + float3 texCoord2 [[attribute(VertexAttributeTexcoord+1), function_constant(hasTexture2)]]; + float3 texCoord3 [[attribute(VertexAttributeTexcoord+2), function_constant(hasTexture3)]]; + float3 texCoord4 [[attribute(VertexAttributeTexcoord+3), function_constant(hasTexture4)]]; + float3 texCoord5 [[attribute(VertexAttributeTexcoord+4), function_constant(hasTexture5)]]; + float3 texCoord6 [[attribute(VertexAttributeTexcoord+5), function_constant(hasTexture6)]]; + float3 texCoord7 [[attribute(VertexAttributeTexcoord+6), function_constant(hasTexture7)]]; + float3 texCoord8 [[attribute(VertexAttributeTexcoord+7), function_constant(hasTexture8)]]; + uchar4 color [[attribute(VertexAttributeColor)]]; +} Vertex; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal new file mode 100644 index 0000000000..8d4b396082 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal @@ -0,0 +1,96 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include +#include + +#import "ShaderTypes.h" + + +using namespace metal; + +typedef struct +{ + packed_float3 position; + uchar4 color; + packed_float3 UV; +} Vertex; + +typedef struct +{ + float4 position [[position]]; + float3 texCoord; + float4 normal; + half4 color; +} ColorInOut; + +vertex ColorInOut textFontVertexShader(constant Vertex *in [[ buffer(0) ]], + constant matrix_float4x4 & transform [[ buffer(1) ]], + uint v_id [[vertex_id]]) +{ + ColorInOut out; + + Vertex vert = in[v_id]; + float4 position = float4(vert.position, 1.0); + out.position = (transform * position); + out.texCoord = vert.UV; + out.normal = float4(0.0, 0.0, 1.0, 0.0); + out.color = half4(vert.color.b, vert.color.g, vert.color.r, vert.color.a) / 255.0f; + + return out; +} + +fragment half4 textFontFragmentShader(ColorInOut in [[stage_in]], + texture2d colorMap [[ texture(0) ]]) +{ + constexpr sampler colorSampler(mip_filter::nearest, + mag_filter::nearest, + min_filter::nearest); + + half4 colorSample = colorMap.sample(colorSampler, in.texCoord.xy); + colorSample *= in.color; + + return colorSample; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal new file mode 100644 index 0000000000..bbce3cfcaa --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal @@ -0,0 +1,285 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct +{ + matrix_float4x4 WorldToNDC; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 Scrunch; // UNUSED + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 Tex0_Row0; + float4 Tex0_Row1; + float4 Tex1_Row0; + float4 Tex1_Row1; + float4 L2WRow0; + float4 L2WRow1; + float4 L2WRow2; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 Bias; // Only using one slot + float4 MatColor; + float4 CameraPos; // Only used by DecalEnv + float4 EnvAdjust; // Only used by DecalEnv + float4 FogSet; + float4 QADirX; + float4 QADirY; + + float4 DirXW; // Only used by DecalEnv + float4 DirYW; // Only used by DecalEnv + float4 WK; // Only used by DecalEnv + float4 DirXSqKW; // Only used by DecalEnv + float4 DirXDirYKW; // Only used by DecalEnv + float4 DirYSqKW; // Only used by DecalEnv +} vs_WaveDev1Lay_7Uniforms; + +typedef struct +{ + float4 position [[position]]; + half4 c0; + float4 texCoord0; + half4 fog; +} vs_WaveDev1Lay_7InOut; + +vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], + constant vs_WaveDev1Lay_7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + vs_WaveDev1Lay_7InOut out; + // Store our input position in world space in r6 + float4 worldPosition = float4(0); + worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); + worldPosition.y = dot(float4(in.position, 1.0), uniforms.L2WRow1); + worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); + // Fill out our w (m4x3 doesn't touch w). + worldPosition.w = 1.0; + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = illumination + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c22 = waterlevel + offset + // c23 = (maxAtten - minAtten) / depthFalloff + // c24 = minAtten. + // And in particular: + // c22.w = waterlevel + // c23.w = 1.f; + // c24.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance += uniforms.DirectionY * worldPosition.yyyy; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = (distance * uniforms.Frequency) + uniforms.Phase; + + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + distance += uniforms.PiConsts.zzzz; + distance *= (1.0f/(2.0f * M_PI_F)); + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= (2.0f * M_PI_F); + // dist += -kPi; + distance += -M_PI_F; + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = max(filter, uniforms.NumericConsts.xxxx); + filter = min(filter, uniforms.NumericConsts.zzzz); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = float4(0); + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= kAmplitude.xyzw; // Combine? + //METAL NOTE: cosDist is now r7 + cosDist *= uniforms.Amplitude; + // cosDist *= filter; + cosDist *= filter; + // Pos = (in.x + S, in.y + R, r6.z) + // S = sum(k Dir.x A cos()) + // R = sum(k Dir.y A cos()) + // c30 = k Dir.x A + // c31 = k Dir.y A + // S = sum(cosDist * c30); + worldPosition.xy += float2( + dot(cosDist, uniforms.QADirX), + dot(cosDist, uniforms.QADirY) + ); + + // Bias our vert up a bit to compensate for precision errors. + // In particular, our filter coefficients are coming in as + // interpolated bytes, so there's bound to be a lot of slop + // from that. We've got a free slot in c25.x, so we'll use that. + // A better implementation would be to bias and scale our screen + // vert, effectively pushing the vert toward the camera without + // actually moving it, but this is easier and might work just + // as well. + worldPosition.z += uniforms.Bias.x; + + // + // // Transform position to screen + // + // + out.position = worldPosition * uniforms.WorldToNDC; + out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; + + // Output color is vertex green + // Output alpha is vertex red (vtx alpha is used for wave filtering) + // Whole thing modulated by material color/opacity. + + out.c0 = half4(in.color.yyyz)/255.0 * half4(uniforms.MatColor); + + // Usual texture transform + out.texCoord0.x = dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0); + out.texCoord0.y = dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1); + out.texCoord0.z = 0.0f; + out.texCoord0.w = 0.0f; + + return out; +} + +fragment half4 ps_CbaseAbase(vs_WaveDev1Lay_7InOut in [[stage_in]], + texture2d texture [[ texture(0) ]]) +{ + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + return texture.sample(colorSampler, in.texCoord0.xy) * in.c0; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal new file mode 100644 index 0000000000..b70c05c335 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -0,0 +1,439 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct +{ + matrix_float4x4 WorldToNDC; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 Scrunch; // UNUSED + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 Tex0_Row0; + float4 Tex0_Row1; + float4 Tex1_Row0; + float4 Tex1_Row1; + float4 L2WRow0; + float4 L2WRow1; + float4 L2WRow2; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 Bias; // Only using one slot + float4 MatColor; + float4 CameraPos; // Only used by DecalEnv + float4 EnvAdjust; // Only used by DecalEnv + float4 FogSet; + float4 QADirX; + float4 QADirY; + + float4 DirXW; // Only used by DecalEnv + float4 DirYW; // Only used by DecalEnv + float4 WK; // Only used by DecalEnv + float4 DirXSqKW; // Only used by DecalEnv + float4 DirXDirYKW; // Only used by DecalEnv + float4 DirYSqKW; // Only used by DecalEnv +} vs_WaveDecEnv7Uniforms; + +typedef struct +{ + float4 position [[position]]; + float4 c1; + float4 texCoord0; + float4 texCoord1; + float4 texCoord2; + float4 texCoord3; + float fog; +} vs_WaveDecEnv7InOut; + +vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[ stage_in ]], + constant vs_WaveDecEnv7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + vs_WaveDecEnv7InOut out; + + // Store our input position in world space in r6 + float4 worldPosition = float4(0); + worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); + worldPosition.y = dot(float4(in.position, 1.0), uniforms.L2WRow1); + worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); + // Fill out our w (m4x3 doesn't touch w). + worldPosition.w = 1.0; + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = illumination + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c22 = waterlevel + offset + // c23 = (maxAtten - minAtten) / depthFalloff + // c24 = minAtten. + // And in particular: + // c22.w = waterlevel + // c23.w = 1.f; + // c24.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance += uniforms.DirectionY * worldPosition.yyyy; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = (distance * uniforms.Frequency) + uniforms.Phase; + + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + distance += uniforms.PiConsts.zzzz; + distance *= 1.0f / uniforms.PiConsts.wwww; + + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= uniforms.PiConsts.wwww; + // dist += -kPi; + distance -= uniforms.PiConsts.zzzz; + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = max(filter, uniforms.NumericConsts.xxxx); + filter = min(filter, uniforms.NumericConsts.zzzz); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = float4(0); + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= filter; + cosDist *= filter; + // Pos = (in.x + S, in.y + R, r6.z) + // S = sum(k Dir.x A cos()) + // R = sum(k Dir.y A cos()) + // c30 = k Dir.x A + // c31 = k Dir.y A + // S = sum(cosDist * c30); + worldPosition.xy += float2( + dot(cosDist, uniforms.QADirX), + dot(cosDist, uniforms.QADirY) + ); + + // Bias our vert up a bit to compensate for precision errors. + // In particular, our filter coefficients are coming in as + // interpolated bytes, so there's bound to be a lot of slop + // from that. We've got a free slot in c25.x, so we'll use that. + // A better implementation would be to bias and scale our screen + // vert, effectively pushing the vert toward the camera without + // actually moving it, but this is easier and might work just + // as well. + worldPosition.z += uniforms.Bias.x; + + // + // // Transform position to screen + // + // + out.position = worldPosition * uniforms.WorldToNDC; + out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; + + // Now onto texture coordinate generation. + // + // First is the usual texture transform + out.texCoord0 = float4( + dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0), + dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1), + uniforms.NumericConsts.zz + ); + + // Calculate our basis vectors as input into our tex3x3vspec + // First we get our basis set off our surface. This is + // Okay, here we go: + // W == sum(k w Dir.x^2 A sin()) x + // V == sum(k w Dir.x Dir.y A sin()) x + // U == sum(k w Dir.y^2 A sin()) x + // + // T == sum(A sin()) + // + // S == sum(k Dir.x A cos()) + // R == sum(k Dir.y A cos()) + // + // Q == sum(k w A cos()) x + // + // M == sum(A cos()) + // + // P == sum(w Dir.x A cos()) x + // N == sum(w Dir.y A cos()) x + // + // Then: + // Pos = (in.x + S, in.y + R, waterheight + T) // Already done above. + // + // Bin = (1 - W, -V, P) + // Tan = (-V, 1 - U, N) + // Nor = (-P, -N, 1 - Q) + // + // The matrix + // |Bx, Tx, Nx| + // |By, Ty, Ny| + // |Bz, Tz, Nz| + // is surface2world, but we still need to fold in + // texture2surface. We'll go with the generalized + // (not assuming a flat surface) partials of dPos/dU and dPos/dV + // as coming in as uv coords v8 and v9. + // Then, if r5 = v8 X v9, then texture to surface is + // |v8.x, v9.x, r5.x| + // |v8.y, v9.y, r5.y| + // |v8.z, v9.z, r5.z| + // + // So, let's say we calc 3 vectors, + // r7 = (Bx, Tx, Nx) + // r8 = (By, Ty, Ny) + // r9 = (Bz, Tz, Nz) + // + // Then surface2world * texture2surface = + // |r7 dot v8, r7 dot v9, r7 dot r5| + // |r8 dot v8, r8 dot v9, r8 dot r5| + // |r9 dot v8, r9 dot v9, r9 dot r5| + // + // We will need r5 as v8 X v9 + + float4 r7 = float4(in.texCoord2, 1.0); + float4 r5 = float4(0); + r5.xyz = r7.yzx * in.texCoord3.zxy; + r5.xyz = (r7.zxy * -in.texCoord3.yzx) + r5.xyz; + + // Okay, r1 currently has the vector of cosines, and r2 has vector of sines. + // Everything will want that times amplitude, so go ahead and fold that in. + cosDist *= uniforms.Amplitude; + + r7.x = dot(sinDist, -uniforms.DirXSqKW); + r7.y = dot(sinDist, -uniforms.DirXDirYKW); + r7.z = dot(cosDist, -uniforms.DirXW); + r7.x += uniforms.NumericConsts.z; + + float4 r8 = float4(0); + r8.x = dot(sinDist, -uniforms.DirXDirYKW); + r8.y = dot(sinDist, -uniforms.DirYSqKW); + r8.z = dot(cosDist, -uniforms.DirYW); + r8.y = r8.y + uniforms.NumericConsts.z; + + float4 r9 = out.position; + r9.z = dot(cosDist, -uniforms.WK); + r9.x = -r7.z; + r9.y = -r8.z; + r9.z = r9.z + uniforms.NumericConsts.z; + + // Okay, got everything we need, construct r1-3 as surface2world*texture2surface. + float4 r1, r2, r3 = float4(0); + r1.x = dot(r7.xyz, in.texCoord2); + r1.y = dot(r7.xyz, in.texCoord3); + r1.z = dot(r7.xyz, r5.xyz); + + r2.x = dot(r8.xyz, in.texCoord2); + r2.y = dot(r8.xyz, in.texCoord3); + r2.z = dot(r8.xyz, r5.xyz); + + r3.x = dot(r9.xyz, in.texCoord2); + r3.y = dot(r9.xyz, in.texCoord3); + r3.z = dot(r9.xyz, r5.xyz); + + // Following section is debug only to skip the per-vert tangent space axes. + //add r1, c13.zxxx, r7.zzxw; + //add r2, c13.xzxx, r7.zzyw; + // + //mov r3.x, -r7.x; + //mov r3.y, -r7.y; + //mov r3.zw, c13.zz; + + // See vs_WaveFixedFin6.inl for derivation of the following + float4 r0 = worldPosition - uniforms.CameraPos; + r0 *= rsqrt(dot(r0.xyz, r0.xyz)); + + float4 r10 = float4(0); + r10.x = dot(r0.xyz, uniforms.EnvAdjust.xyz); + r10.y = (r10.x * r10.x) - uniforms.EnvAdjust.w; + + r10.z = (r10.y * rsqrt(r10.y)) + r10.x; + r0.xyz = (r0.xyz * r10.zzz) - uniforms.EnvAdjust.xyz; + + // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. + r0.xyz = normalize(r0.xyz); + + r1.w = -r0.x; + r2.w = -r0.y; + r3.w = -r0.z; + + // Now r1-r3 are texture2world, with the eye-ray vector in .w. We just + // need to normalize them and bung them into output UV's 1-3. + // Note we're accounting for our environment map being flipped from + // D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2. + r10.w = uniforms.NumericConsts.z; + r10.x = rsqrt(dot(r1.xyz, r1.xyz)); + out.texCoord1 = r1 * r10.xxxw; + + r10.x = rsqrt(dot(r3.xyz, r3.xyz)); + out.texCoord2 = r3 * r10.xxxw; + + r10.x = rsqrt(dot(r2.xyz, r2.xyz)); + out.texCoord3 = r2 * r10.xxxw; + + float4 matColor = uniforms.MatColor; + out.c1 = clamp(float4(in.color).yyyz/255.0 * matColor, 0.0, 1.0); + + return out; +} + +fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[ stage_in ]], + texture2d normalMap [[ texture(0) ]], + texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1) ]]) +{ + // Very simular to ps_WaveFixed.inl. Only the final coloring is different. + // Even though so far they are identical. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + float4 t0 = 2 * (normalMap.sample(colorSampler, in.texCoord0.xy) - 0.5); + float u = dot(in.texCoord1.xyz, t0.xyz); + float v = dot(in.texCoord2.xyz, t0.xyz); + float w = dot(in.texCoord3.xyz, t0.xyz); + + float3 N = float3(u, v, w); + float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); + + //float3 coord = reflect(E, N); + float3 coord = 2*(dot(N, E) / dot(N, N))*N - E; + + // t3 now has our reflected environment map value + // We've (presumably) attenuated the effect on a vertex basis + // and have our color w/ attenuated alpha in v0. So all we need + // is to multiply t3 by v0 into r0 and we're done. + float4 out = float4(environmentMap.sample(colorSampler, coord)); + out.rgb = (out.rgb * in.c1.rgb); + out.a = t0.a * in.c1.a; + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal new file mode 100644 index 0000000000..1b59e2bed4 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal @@ -0,0 +1,305 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct +{ + matrix_float4x4 WorldToNDC; + float4 FogSet; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 QADirX; + float4 QADirY; + float4 Scrunch; + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 CameraPos; + float4 WindRot; + float4 Tex0_Row0; + float4 Tex0_Row1; + float4 Tex0_Row2; + float4 Tex1_Row0; + float4 Tex1_Row1; + float4 Tex1_Row2; + float4 L2WRow0; + float4 L2WRow1; + float4 L2WRow2; + float4 L2WRow3; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 TexConsts; + float4 LifeConsts; + float4 RampBias; +} vs_WaveRip7Uniforms; + +typedef struct +{ + float4 position [[position]]; + half4 c1; + float2 texCoord0; + float fog; +} waveRipInOut; + +vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], + constant vs_WaveRip7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + waveRipInOut out; + + // Store our input position in world space in r6 + float4 worldPosition = float4(0); + worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); + worldPosition.y = dot(float4(in.position, 1.0), uniforms.L2WRow1); + worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); + // Fill out our w (m4x3 doesn't touch w). + worldPosition.w = 1.0; + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = illumination + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c22 = waterlevel + offset + // c23 = (maxAtten - minAtten) / depthFalloff + // c24 = minAtten. + // And in particular: + // c22.w = waterlevel + // c23.w = 1.f; + // c24.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance += uniforms.DirectionY * worldPosition.yyyy; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = (distance * uniforms.Frequency) + uniforms.Phase; + + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + distance += uniforms.PiConsts.zzzz; + distance *= 1.0f / uniforms.PiConsts.wwww; + + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= uniforms.PiConsts.wwww; + // dist += -kPi; + distance -= uniforms.PiConsts.zzzz; + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = clamp(filter, 0.0f, 1.0f); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = float4(0); + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= filter; + cosDist *= filter; + // Pos = (in.x + S, in.y + R, r6.z) + // S = sum(k Dir.x A cos()) + // R = sum(k Dir.y A cos()) + // c30 = k Dir.x A + // c31 = k Dir.y A + // S = sum(cosDist * c30); + worldPosition.xy += float2( + dot(cosDist, uniforms.QADirX), + dot(cosDist, uniforms.QADirY) + ); + + // Bias our vert up a bit to compensate for precision errors. + // In particular, our filter coefficients are coming in as + // interpolated bytes, so there's bound to be a lot of slop + // from that. We've got a free slot in c25.x, so we'll use that. + // A better implementation would be to bias and scale our screen + // vert, effectively pushing the vert toward the camera without + // actually moving it, but this is easier and might work just + // as well. + worldPosition.z += uniforms.RampBias.z; + + // + // // Transform position to screen + // + // + out.position = worldPosition * uniforms.WorldToNDC; + out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; + + // Dyna Stuff + // Constants + // c33 = fC1U, fC2U, fC1V, fC2V + // c34 = fInitAtten, t, life, 1.f / (life-decay) + // c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE + // + // Vertex Info + // v7.z = fBirth (because we don't use it for anything else). + // + // Initialize r1.zw to 0,1 + + // Calc r1.x = age, r1.y = atten + // age = t - birth. + const float age = uniforms.LifeConsts.y - in.texCoord1.z; + // atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay)); + // first clamp0_1(age/ramp) + const float atten = clamp(age * uniforms.RampBias.y, 0.0f, 1.0f) + * clamp((uniforms.LifeConsts.z - age) * uniforms.LifeConsts.w, 0.0f, 1.0f); + + // color is (atten, atten, atten, 1.f) + // Need to calculate opacity we would have had from vs_WaveFixedFin7.inl + // Right now that's just modulating by r4.y. + + out.c1 = (depth.y * uniforms.LifeConsts.x) * half4(atten, atten, atten, 1.0h); + + // UVW = (inUVW - 0.5) * scale + 0.5 + // where: + // scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f + float2 scale = age * uniforms.TexConsts.yw; + scale += 1.0f; + scale = (1.0f/scale); + scale *= uniforms.TexConsts.xz; + out.texCoord0 = in.texCoord1.xy - 0.5f; + out.texCoord0 *= scale.xy; + out.texCoord0 += 0.5f; + + return out; +} + +fragment half4 ps_WaveRip(waveRipInOut in [[stage_in]], + texture2d texture [[ texture(0) ]]) +{ + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_edge); + half4 t0 = texture.sample(colorSampler, in.texCoord0.xy); + + return t0 * in.c1; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal new file mode 100644 index 0000000000..d0efefcce6 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -0,0 +1,482 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct +{ + matrix_float4x4 WorldToNDC; + float4 WaterTint; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 UVScale; + float4 SpecAtten; + float4 Scrunch; + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 CameraPos; + float4 WindRot; + float4 EnvAdjust; + float4 EnvTint; + float4 LocalToWorldRow1; + float4 LocalToWorldRow2; + float4 LocalToWorldRow3; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 FogSet; + float4 DirXK; + float4 DirYK; + float4 DirXW; + float4 DirYW; + float4 WK; + float4 DirXSqKW; + float4 DirXDirYKW; + float4 DirYSqKW; +} vs_WaveFixedFin7Uniforms; + +typedef struct +{ + float4 position [[position]]; + float4 c1; + float4 c2; + float4 texCoord0; + float4 texCoord1; + float4 texCoord2; + float4 texCoord3; + float fog; +} vs_WaveFixedFin7InOut; + +vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], + constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ + vs_WaveFixedFin7InOut out; + + // Store our input position in world space in r6 + float3 column1 = float3(uniforms.LocalToWorldRow1[0], uniforms.LocalToWorldRow2[0], uniforms.LocalToWorldRow3[0]); + float3 column2 = float3(uniforms.LocalToWorldRow1[1], uniforms.LocalToWorldRow2[1], uniforms.LocalToWorldRow3[1]); + float3 column3 = float3(uniforms.LocalToWorldRow1[2], uniforms.LocalToWorldRow2[2], uniforms.LocalToWorldRow3[2]); + float3 column4 = float3(uniforms.LocalToWorldRow1[3], uniforms.LocalToWorldRow2[3], uniforms.LocalToWorldRow3[3]); + + matrix_float4x3 localToWorld; + localToWorld[0] = column1; + localToWorld[1] = column2; + localToWorld[2] = column3; + localToWorld[3] = column4; + + float4 worldPosition = float4(localToWorld * float4(in.position, 1.0), 1.0); + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = reflection strength (transparency) + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c25 = waterlevel + offset + // c26 = (maxAtten - minAtten) / depthFalloff + // c27 = minAtten. + // And in particular: + // c25.w = waterlevel + // c26.w = 1.f; + // c27.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance = (uniforms.DirectionY * worldPosition.yyyy) + distance; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = distance * uniforms.Frequency; + distance = distance + uniforms.Phase; + // + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + float4 piRecip = 1.0f / uniforms.PiConsts.wwww; + distance = distance + uniforms.PiConsts.zzzz; + distance *= piRecip; + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= uniforms.PiConsts.wwww; + // dist += -kPi; + distance -= uniforms.PiConsts.zzzz; + + //Metals pow function does not like negative bases + //Doing the same thing as the DX assembly until I know more about why + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = max(filter, uniforms.NumericConsts.xxxx); + filter = min(filter, uniforms.NumericConsts.zzzz); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = 0; + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= kAmplitude.xyzw; // Combine? + //METAL NOTE: cosDist is now r7 + cosDist *= uniforms.Amplitude; + // cosDist *= filter; + cosDist *= filter; + // r7 is now M = sum(Ai * cos()) + + // Okay, here we go: + // W == sum(k w Dir.x^2 A sin()) + // V == sum(k w Dir.x Dir.y A sin()) + // U == sum(k w Dir.y^2 A sin()) + // + // T == sum(A sin()) + // + // S == sum(k Dir.x A cos()) + // R == sum(k Dir.y A cos()) + // + // Q == sum(k w A cos()) + // + // M == sum(A cos()) + // + // P == sum(w Dir.x A cos()) + // N == sum(w Dir.y A cos()) + // + // Then: + // Pos = (in.x + S, in.y + R, waterheight + T) + // + // Bin = (1 - W, -V, P) + // Tan = (-V, 1 - U, N) + // Nor = (-P, -N, 1 - Q) + // + // But we want the transpose of that to go into r1-r3 + + worldPosition.x += dot(cosDist, uniforms.DirXK); + worldPosition.y += dot(cosDist, uniforms.DirYK); + + float4 r1, r2, r3 = 0; + + r1.x = dot(sinDist, -uniforms.DirXSqKW); + r2.x = dot(sinDist, -uniforms.DirXDirYKW); + r3.x = dot(cosDist, uniforms.DirXW); + r1.x = r1.x + uniforms.NumericConsts.z; + + r1.y = dot(sinDist, -uniforms.DirXDirYKW); + r2.y = dot(sinDist, -uniforms.DirYSqKW); + r3.y = dot(cosDist, uniforms.DirYW); + r2.y = r2.y + uniforms.NumericConsts.z; + + r1.z = dot(cosDist, -uniforms.DirXW); + r2.z = dot(cosDist, -uniforms.DirYW); + r3.z = dot(sinDist, -uniforms.WK); + r3.z = r3.z + uniforms.NumericConsts.z; + + // Calculate our normalized vector from camera to vtx. + // We'll use that a couple of times coming up. + float4 r5 = worldPosition - uniforms.CameraPos; + float4 r10; + r10.x = rsqrt(dot(r5.xyz, r5.xyz)); + r5 = r5 * r10.xxxx; + r5.w = 1.0 / r10.x; + + // Calculate our specular attenuation from and into r5.w. + // r5.w starts off the distance from vtx to camera. + // Once we've turned it into an attenuation factor, we + // scale the x and y of our normal map (through the transform bases) + // so that in the distance, the normal map is flat. Note that the + // geometry in the distance isn't necessarily flat. We want to apply + // this scale to the normal read from the normal map before it is + // transformed into surface space. + r5.w += uniforms.SpecAtten.x; + r5.w *= uniforms.SpecAtten.y; + r5.w = min(r5.w, uniforms.NumericConsts.z); + r5.w = max(r5.w, uniforms.NumericConsts.x); + r5.w *= r5.w; // Square it to account for perspective + r5.w *= uniforms.SpecAtten.z; + + // So, our "finitized" eyeray is: + // camPos + D * t - envCenter = D * t - (envCenter - camPos) + // with + // D = (pos - camPos) / |pos - camPos| // normalized usual eyeray + // and + // t = D dot F + sqrt( (D dot F)^2 - G ) + // with + // F = (envCenter - camPos) => c19.xyz + // G = F^2 - R^2 => c19.w + // R = environment radius. => unused + // + // This all derives from the positive root of equation + // (camPos + (pos - camPos) * t - envCenter)^2 = R^2, + // In other words, where on a sphere of radius R centered about envCenter + // does the ray from the real camera position through this point hit. + // + // Note that F, G, and R are all constants (one point, two scalars). + // + // So first we calculate D into r0, + // then D dot F into r10.x, + // then (D dot F)^2 - G into r10.y + // then rsq( (D dot F)^2 - G ) into r9.x; + // then t = r10.z = r10.x + r10.y * r9.x; + // and + // r0 = D * t - (envCenter - camPos) + // = r0 * r10.zzzz - F; + // + //https://developer.download.nvidia.com/books/HTML/gpugems/gpugems_ch01.html + + + float4 r0 = float4(0); + + { + float3 D = r5.xyz; + float3 F = uniforms.EnvAdjust.xyz; + float G = uniforms.EnvAdjust.w; + // METAL NOTE: HLSL 1.1 always applies an abs operation to values it's about to sqrt + float3 t = dot(D.xyz, F.xyz) + sqrt(abs(pow(abs(dot(D.xyz, F.xyz)), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) + r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) + } + + // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. + r0.xyz = normalize(r0.xyz); + + r1.w = -r0.x; + r2.w = -r0.y; + r3.w = -r0.z; + + r0.zw = uniforms.NumericConsts.xz; + + float4 r11 = float4(0); + + r0.x = dot(r1.xyz, r1.xyz); + r0.xy = rsqrt(r0.x); + r0.x *= r5.w; + out.texCoord1 = r1 * r0.xxyw; + r11.x = r1.z * r0.y; + + r0.x = dot(r2.xyz, r2.xyz); + r0.xy = rsqrt(r0.x); + r0.x *= r5.w; + out.texCoord3 = r2 * r0.xxyw; + r11.y = r2.z * r0.y; + + r0.x = dot(r3.xyz, r3.xyz); + r0.xy = rsqrt(r0.x); + r0.x *= r5.w; + out.texCoord2 = r3 * r0.xxyw; + r11.z = r3.z * r0.y; + + /* + // Want: + // oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x) + // oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y) + // ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z) + // with BIN, TAN, and NORM normalized. + // Unnormalized, we have + // BIN = (1, 0, -r7.x) where r7 == accumCos + // TAN = (0, 1, -r7.y) + // NORM= (r7.x, r7.y, 1) + // So, unnormalized, we have + // oT1 = (1, 0, r7.x, view2pos.x) + // oT2 = (0, 1, r7.y, view2pos.y) + // oT3 = (-r7.x, -r7.y, 1, view2pos.z) + // which is just reversing the signs on the accumCos + // terms above. So the normalized version is just + // reversing the signs on the normalized version above. + */ + //mov oT3, r4; + + // + // // Transform position to screen + // + // + float4 r9; + r9 = worldPosition * uniforms.WorldToNDC; + r10.x = r9.w + uniforms.FogSet.x; + out.fog = r10.x * uniforms.FogSet.y; + out.position = r9; + + // Transform our uvw + out.texCoord0 = float4(in.position.xy * uniforms.UVScale.x, + 0, 1); + + // Questionble attenuation follows + // vector from this point to camera and normalize stashed in r5 + // Dot that with the computed normal + r1.x = dot(-r5, r11); + r1.x = r1.x * inColor.z; + r1.xyzw = uniforms.NumericConsts.z - r1.x; + r1.w += uniforms.NumericConsts.z; + r1.w *= uniforms.NumericConsts.y; + // No need to clamp, since the destination register (in the pixel shader) + // will saturate [0..1] anyway. + r1 *= depth.yyyx; // HACKTESTCOLOR + //R in the in color is the alpha value, but remember it's encoded ARGB + r1.w *= inColor.g; + r1.w *= uniforms.WaterTint.w; + out.c1 = clamp(r1 * uniforms.EnvTint, 0, 1); + out.c2 = uniforms.WaterTint; // SEENORM + + return out; +} + +fragment float4 ps_WaveFixed(vs_WaveFixedFin7InOut in [[stage_in]], + texture2d normalMap [[ texture(0) ]], + texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3) ]]) +{ + // Short pixel shader. Use the texm3x3vspec to do a per-pixel + // reflected lookup into our environment map. + // Input: + // t0 - Normal map in tangent space. Apply _bx2 modifier to shift + // [0..255] -> [-1..1] + // t1 - UVW = tangent + eye2pos.x, map ignored. + // t2 - UVW = binormal + eye2pos.y, map ignored + // t3 - UVW = normal + eye2pos.z, map = environment cube map + // v0 - attenuating color/alpha. + // See docs on texm3x3vspec for explanation of the eye2pos wackiness. + // Output: + // r0 = reflected lookup from environment map X input v0. + // Since environment map has alpha = 255, the output of this + // shader can be used for either alpha or additive blending, + // as long as v0 is fed in appropriately. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + float3 t0 = 2 * (normalMap.sample(colorSampler, in.texCoord0.xy).rgb - 0.5); + float u = dot(in.texCoord1.xyz, t0); + float v = dot(in.texCoord2.xyz, t0); + float w = dot(in.texCoord3.xyz, t0); + + float3 N = float3(u, v, w); + float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); + + //float3 coord = reflect(E, N); + float3 coord = 2*(dot(N, E) / dot(N, N))*N - E; + + float4 out = float4(environmentMap.sample(colorSampler, coord)); + out = (out * in.c1) + in.c2; + out.a = in.c1.a; + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/pfMetalPipelineCreatable.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/pfMetalPipelineCreatable.h new file mode 100644 index 0000000000..c09dd945ed --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/pfMetalPipelineCreatable.h @@ -0,0 +1,49 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef pfMetalPipelineCreatable_inc +#define pfMetalPipelineCreatable_inc + +#include "plMetalPipeline.h" +REGISTER_NONCREATABLE(plMetalPipeline); + +#endif // pfGLPipelineCreatable_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp new file mode 100644 index 0000000000..1166cefc4e --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -0,0 +1,1352 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +// We need to define these once and only one for Metal somewhere +// in a cpp file before the Metal-cpp include (via plMetalDevice) +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION + +#include "plMetalDevice.h" + +#include "hsDarwin.h" +#include "hsThread.h" + +#include "plDrawable/plGBufferGroup.h" +#include "plGImage/plCubicEnvironmap.h" +#include "plGImage/plMipmap.h" +#include "plPipeline/plRenderTarget.h" + +#include "pfMetalPipeline/plMetalPipeline.h" +#include "pfMetalPipeline/plMetalPipelineState.h" +#include "pfMetalPipeline/ShaderSrc/ShaderTypes.h" + +/// Macros for getting/setting data in a vertex buffer +template +static inline void inlCopy(uint8_t*& src, uint8_t*& dst) +{ + T* src_ptr = reinterpret_cast(src); + T* dst_ptr = reinterpret_cast(dst); + *dst_ptr = *src_ptr; + src += sizeof(T); + dst += sizeof(T); +} + +static inline void inlCopy(const uint8_t*& src, uint8_t*& dst, size_t sz) +{ + memcpy(dst, src, sz); + src += sz; + dst += sz; +} + +template +static inline const uint8_t* inlExtract(const uint8_t* src, T* val) +{ + const T* ptr = reinterpret_cast(src); + *val = *ptr++; + return reinterpret_cast(ptr); +} + +template<> +inline const uint8_t* inlExtract(const uint8_t* src, hsPoint3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 1.f; + return reinterpret_cast(src_ptr); +} + +template<> +inline const uint8_t* inlExtract(const uint8_t* src, hsVector3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 0.f; + return reinterpret_cast(src_ptr); +} + +template +static inline void inlSkip(uint8_t*& src) +{ + src += sizeof(T) * N; +} + +template +static inline uint8_t* inlStuff(uint8_t* dst, const T* val) +{ + T* ptr = reinterpret_cast(dst); + *ptr++ = *val; + return reinterpret_cast(ptr); +} + +matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst) +{ + constexpr auto matrixSize = sizeof(matrix_float4x4); + if (src.fFlags & hsMatrix44::kIsIdent) { + memcpy(dst, &matrix_identity_float4x4, matrixSize); + } else { + memcpy(dst, &src.fMap, matrixSize); + } + + return dst; +} + +bool plMetalDevice::InitDevice() +{ + // FIXME: Should Metal adopt InitDevice like OGL? + hsAssert(0, "InitDevice not implemented for Metal rendering"); +} + +void plMetalDevice::Shutdown() +{ + // FIXME: Should Metal adopt Shutdown like OGL? + hsAssert(0, "Shutdown not implemented for Metal rendering"); +} + +void plMetalDevice::SetMaxAnsiotropy(uint8_t maxAnsiotropy) +{ + // setup the material pass samplers + // load them all at once and then let the shader pick + + if (maxAnsiotropy == 0) + maxAnsiotropy = 1; + + if (fSamplerStates[0] != nullptr) { + ReleaseSamplerStates(); + } + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMaxAnisotropy(maxAnsiotropy); + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeRepeat); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeRepeat); + fSamplerStates[0] = fMetalDevice->newSamplerState(samplerDescriptor); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeClampToEdge); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeRepeat); + fSamplerStates[1] = fMetalDevice->newSamplerState(samplerDescriptor); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeRepeat); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeClampToEdge); + fSamplerStates[2] = fMetalDevice->newSamplerState(samplerDescriptor); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeClampToEdge); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeClampToEdge); + fSamplerStates[3] = fMetalDevice->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); +} + +void plMetalDevice::SetMSAASampleCount(uint8_t sampleCount) +{ + // Plasma has some MSAA levels that don't completely correspond to what Metal can do + // Best fit them to levels Metal can do. Once they are best fit see if the hardware + // is capable. + + uint8_t actualSampleCount = 1; + if (sampleCount == 6) { + actualSampleCount = 8; + } else if (sampleCount == 4) { + actualSampleCount = 4; + } else if (sampleCount == 2) { + actualSampleCount = 2; + } + + while (actualSampleCount != 1) { + if (fMetalDevice->supportsTextureSampleCount(actualSampleCount)) { + break; + } + actualSampleCount /= 2; + } + + fSampleCount = actualSampleCount; +} + +void plMetalDevice::ReleaseSamplerStates() +{ + fSamplerStates[0]->release(); + fSamplerStates[0] = nullptr; + + fSamplerStates[1]->release(); + fSamplerStates[1] = nullptr; + + fSamplerStates[2]->release(); + fSamplerStates[2] = nullptr; + + fSamplerStates[3]->release(); + fSamplerStates[3] = nullptr; +} + +void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth) +{ + /* + In Metal, a clear is an argument to the drawable loading operation, + not an operation that can be done freely at any time. So lets handle + a clear two ways: + 1) If we're in the middle of a rendering pass, manually clear. + 2) If we're at the begining of a render pass, note the clear color + we should use to clear the framebuffer at load. + */ + + if (fCurrentRenderTargetCommandEncoder) { + // We're mid flight, we'll need to manually paint the clear color + + half4 clearColor; + clearColor[0] = clearColor.r; + clearColor[1] = clearColor.g; + clearColor[2] = clearColor.b; + clearColor[3] = clearColor.a; + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalClearPipelineState(this, shouldClearColor, shouldClearDepth).GetRenderPipelineState(); + + const MTL::RenderPipelineState* pipelineState = linkedPipeline->pipelineState; + CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); + + float clearCoords[8] = { + -1, -1, + 1, -1, + -1, 1, + 1, 1}; + float clearDepth = 1.0f; + CurrentRenderCommandEncoder()->setDepthStencilState(fNoZReadStencilState); + + CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + CurrentRenderCommandEncoder()->setVertexBytes(&clearCoords, sizeof(clearCoords), 0); + CurrentRenderCommandEncoder()->setFragmentBytes(&clearColor, sizeof(clearColor), 0); + CurrentRenderCommandEncoder()->setFragmentBytes(&clearDepth, sizeof(float), 1); + CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } else { + // Render has not started yet! Note which clear color we should use + // for clearing the render buffer when we load it. + + if (shouldClearColor) { + if (fCurrentRenderTarget) { + fClearRenderTargetColor = clearColor; + fShouldClearRenderTarget = shouldClearColor; + if (shouldClearDepth) { + fClearRenderTargetDepth = clearDepth; + } + } else { + fClearDrawableColor = clearColor; + fShouldClearDrawable = shouldClearColor; + if (shouldClearDepth) { + fClearDrawableDepth = clearDepth; + } + } + } + + /* + Clear needs to count as a render operation, but Metal treats + it as an argument when starting a new render encoder. If a + render pass only cleared, but never rendered any content, + the clear would never happen because no render encoder would + be created. + + Force render encoder creation to force the clear to happen. + */ + + CurrentRenderCommandEncoder(); + } +} + +void plMetalDevice::BeginNewRenderPass() +{ + // lazily create the screen render encoder if it does not yet exist + if (!fCurrentOffscreenCommandBuffer && !fCurrentRenderTargetCommandEncoder) { + SetRenderTarget(nullptr); + } + + if (fCurrentRenderTargetCommandEncoder) { + // if we have an existing render target, submit it's commands and release it + // if we need to come back to this render target, we can always create a new render + // pass descriptor and submit more commands + fCurrentRenderTargetCommandEncoder->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nullptr; + } + + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); + + if (fCurrentRenderTarget) { + renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearRenderTargetColor.x, fClearRenderTargetColor.y, fClearRenderTargetColor.z, fClearRenderTargetColor.w)); + if (fShouldClearRenderTarget) { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + } + + if (fCurrentRenderTarget->GetZDepth()) { + plMetalRenderTargetRef* deviceTarget = (plMetalRenderTargetRef*)fCurrentRenderTarget->GetDeviceRef(); + renderPassDescriptor->depthAttachment()->setTexture(deviceTarget->fDepthBuffer); + renderPassDescriptor->depthAttachment()->setClearDepth(fClearRenderTargetDepth); + renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); + } + + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); + + fCurrentRenderTargetCommandEncoder = fCurrentOffscreenCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearDrawableColor.x, fClearDrawableColor.y, fClearDrawableColor.z, fClearDrawableColor.w)); + if (fShouldClearDrawable) { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + } + + renderPassDescriptor->depthAttachment()->setClearDepth(fClearDrawableDepth); + renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); + renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); + renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + + if (fSampleCount == 1) { + if (NeedsPostprocessing()) { + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentUnprocessedOutputTexture); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); + } + } else { + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); + + // if we need postprocessing, output to the main pass texture + // otherwise we can go straight to the drawable + if (NeedsPostprocessing()) { + renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentUnprocessedOutputTexture); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); + } + + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); + } + + fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); + } + + fCurrentRenderTargetCommandEncoder->setFragmentSamplerStates(fSamplerStates, NS::Range::Make(0, 4)); +} + +void plMetalDevice::SetRenderTarget(plRenderTarget* target) +{ + /* + If we're being asked to set the render target to the current drawable, + but we're being asked to set the render target to the drawable, don't do anything. + We used to allow starting new passes on the same drawable but that would break + memoryless buffers on Apple Silicon that don't survive between passes. + */ + if ((!fCurrentRenderTarget && !target) && fCurrentRenderTargetCommandEncoder) { + return; + } + if (fCurrentRenderTargetCommandEncoder) { + // if we have an existing render target, submit it's commands and release it + // if we need to come back to this render target, we can always create a new render + // pass descriptor and submit more commands + fCurrentRenderTargetCommandEncoder->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nullptr; + } + + if (fCurrentOffscreenCommandBuffer) { + if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { + // if our target was offscreen, go ahead and blit back. Something will want this data. + MTL::BlitCommandEncoder* blitEncoder = fCurrentOffscreenCommandBuffer->blitCommandEncoder(); + blitEncoder->synchronizeResource(fCurrentFragmentOutputTexture); + blitEncoder->endEncoding(); + } + fCurrentOffscreenCommandBuffer->commit(); + if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { + // if it's an offscreen buffer, wait for completion + // something is probably going to want to syncronously grab data + fCurrentOffscreenCommandBuffer->waitUntilCompleted(); + } + fCurrentOffscreenCommandBuffer->release(); + fCurrentOffscreenCommandBuffer = nullptr; + } + + fCurrentRenderTarget = target; + + if (fCurrentRenderTarget && fShouldClearRenderTarget == false) { + // clear if a clear color wasn't already set + fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearRenderTarget = true; + fClearRenderTargetDepth = 1.0; + } + + if (fCurrentRenderTarget) { + if (!target->GetDeviceRef()) { + fPipeline->MakeRenderTargetRef(target); + } + plMetalRenderTargetRef* deviceTarget = (plMetalRenderTargetRef*)target->GetDeviceRef(); + fCurrentOffscreenCommandBuffer = fCommandQueue->commandBuffer(); + fCurrentOffscreenCommandBuffer->retain(); + fCurrentFragmentOutputTexture = deviceTarget->fTexture; + + if (deviceTarget->fDepthBuffer) { + fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; + } else { + fCurrentDepthFormat = MTL::PixelFormatInvalid; + } + } else { + fCurrentFragmentOutputTexture = fCurrentDrawable->texture(); + fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; + } +} + +plMetalDevice::plMetalDevice() + : fErrorMsg(), + fActiveThread(hsThread::ThisThreadHash()), + fCurrentDrawable(), + fCommandQueue(), + fCurrentRenderTargetCommandEncoder(), + fCurrentDrawableDepthTexture(), + fCurrentFragmentOutputTexture(), + fCurrentCommandBuffer(), + fCurrentOffscreenCommandBuffer(), + fCurrentRenderTarget(), + fNewPipelineStateMap(), + fCurrentFragmentMSAAOutputTexture(), + fCurrentUnprocessedOutputTexture(), + fGammaLUTTexture(), + fGammaAdjustState(), + fBlitCommandBuffer(), + fBlitCommandEncoder() +{ + fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; + fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; + fSamplerStates[0] = nullptr; + + fMetalDevice = MTL::CreateSystemDefaultDevice(); + fCommandQueue = fMetalDevice->newCommandQueue(); + + // set up all the depth stencil states + MTL::DepthStencilDescriptor* depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); + depthDescriptor->setDepthWriteEnabled(true); + depthDescriptor->setLabel(NS::String::string("No Z Read", NS::UTF8StringEncoding)); + fNoZReadStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionLessEqual); + depthDescriptor->setDepthWriteEnabled(false); + depthDescriptor->setLabel(NS::String::string("No Z Write", NS::UTF8StringEncoding)); + fNoZWriteStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); + depthDescriptor->setDepthWriteEnabled(false); + depthDescriptor->setLabel(NS::String::string("No Z Read or Write", NS::UTF8StringEncoding)); + fNoZReadOrWriteStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionLessEqual); + depthDescriptor->setLabel(NS::String::string("Z Read and Write", NS::UTF8StringEncoding)); + depthDescriptor->setDepthWriteEnabled(true); + fDefaultStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionGreaterEqual); + depthDescriptor->setLabel(NS::String::string("Reverse Z", NS::UTF8StringEncoding)); + depthDescriptor->setDepthWriteEnabled(true); + fReverseZStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->release(); +} + +void plMetalDevice::SetViewport() +{ + CurrentRenderCommandEncoder()->setViewport({(double)fPipeline->GetViewTransform().GetViewPortLeft(), + (double)fPipeline->GetViewTransform().GetViewPortTop(), + (double)fPipeline->GetViewTransform().GetViewPortWidth(), + (double)fPipeline->GetViewTransform().GetViewPortHeight(), + 0.f, 1.f}); +} + +bool plMetalDevice::BeginRender() +{ + if (fActiveThread == hsThread::ThisThreadHash()) { + return true; + } + + fActiveThread = hsThread::ThisThreadHash(); + + return true; +} + +static uint32_t IGetBufferFormatSize(uint8_t format) +{ + uint32_t size = sizeof(hsPoint3) * 2 + sizeof(hsColor32) * 2; // Position and normal, and two packed colors + + switch (format & plGBufferGroup::kSkinWeightMask) { + case plGBufferGroup::kSkinNoWeights: + break; + case plGBufferGroup::kSkin1Weight: + size += sizeof(float); + break; + default: + hsAssert(false, "Invalid skin weight value in IGetBufferFormatSize()"); + } + + size += sizeof(hsPoint3) * plGBufferGroup::CalcNumUVs(format); + + return size; +} + +void plMetalDevice::SetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalDevice::VertexBufferRef* vRef) +{ + uint8_t format = owner->GetVertexFormat(); + + if (format & plGBufferGroup::kSkinIndices) { + format &= ~(plGBufferGroup::kSkinWeightMask | plGBufferGroup::kSkinIndices); + format |= plGBufferGroup::kSkinNoWeights; // Should do nothing, but just in case... + vRef->SetSkinned(true); + vRef->SetVolatile(true); + } + + uint32_t vertSize = vertSize = IGetBufferFormatSize(format); // vertex stride + uint32_t numVerts = owner->GetVertBufferCount(idx); + + vRef->fOwner = owner; + vRef->fCount = numVerts; + vRef->fVertexSize = vertSize; + vRef->fFormat = format; + vRef->fRefTime = 0; + + vRef->SetDirty(true); + vRef->SetRebuiltSinceUsed(true); + vRef->fData = nullptr; + + vRef->SetVolatile(vRef->Volatile() || owner->AreVertsVolatile()); + + vRef->fIndex = idx; + + const uint32_t vertStart = owner->GetVertBufferStart(idx) * vertSize; + const uint32_t size = owner->GetVertBufferEnd(idx) * vertSize - vertStart; + + owner->SetVertexBufferRef(idx, vRef); + + hsRefCnt_SafeUnRef(vRef); +} + +void plMetalDevice::CheckStaticVertexBuffer(plMetalDevice::VertexBufferRef* vRef, plGBufferGroup* owner, uint32_t idx) +{ + hsAssert(!vRef->Volatile(), "Creating a managed vertex buffer for a volatile buffer ref"); + + if (!vRef->GetBuffer()) { + FillVertexBufferRef(vRef, owner, idx); + + // This is currently a no op, but this would let the buffer know it can + // unload the system memory copy, since we have a managed version now. + owner->PurgeVertBuffer(idx); + } +} + +void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx) +{ + const uint32_t vertSize = ref->fVertexSize; + const uint32_t vertStart = group->GetVertBufferStart(idx) * vertSize; + const uint32_t size = group->GetVertBufferEnd(idx) * vertSize - vertStart; + + if (ref->GetBuffer()) { + hsAssert(size <= ref->GetBuffer()->length(), "Allocated buffer does not fit fill data"); + } + + if (!size) { + return; + } + + MTL::Buffer* metalBuffer = fMetalDevice->newBuffer(size, MTL::StorageModeManaged); + ref->SetBuffer(metalBuffer); + uint8_t* buffer = (uint8_t*)ref->GetBuffer()->contents(); + + if (ref->fData) { + memcpy(buffer, ref->fData + vertStart, size); + } else { + hsAssert(0 == vertStart, "Offsets on non-interleaved data not supported"); + hsAssert(group->GetVertBufferCount(idx) * vertSize == size, "Trailing dead space on non-interleaved data not supported"); + + uint8_t* ptr = buffer; + + const uint32_t vertSmallSize = group->GetVertexLiteStride() - sizeof(hsPoint3) * 2; + uint8_t* srcVPtr = group->GetVertBufferData(idx); + plGBufferColor* const srcCPtr = group->GetColorBufferData(idx); + + const size_t numCells = group->GetNumCells(idx); + for (size_t i = 0; i < numCells; i++) { + plGBufferCell* cell = group->GetCell(idx, i); + + if (cell->fColorStart == uint32_t(-1)) { + /// Interleaved, do straight copy + memcpy(ptr, srcVPtr + cell->fVtxStart, cell->fLength * vertSize); + ptr += cell->fLength * vertSize; + hsAssert(size <= cell->fLength * vertSize, "Interleaved copy size mismatch"); + } else { + hsStatusMessage("Non interleaved data"); + + /// Separated, gotta interleave + uint8_t* tempVPtr = srcVPtr + cell->fVtxStart; + plGBufferColor* tempCPtr = srcCPtr + cell->fColorStart; + int j; + for (j = 0; j < cell->fLength; j++) { + memcpy(ptr, tempVPtr, sizeof(hsPoint3) * 2); + ptr += sizeof(hsPoint3) * 2; + tempVPtr += sizeof(hsPoint3) * 2; + + memcpy(ptr, &tempCPtr->fDiffuse, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + memcpy(ptr, &tempCPtr->fSpecular, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + + memcpy(ptr, tempVPtr, vertSmallSize); + ptr += vertSmallSize; + tempVPtr += vertSmallSize; + tempCPtr++; + } + } + } + + hsAssert((ptr - buffer) == size, "Didn't fill the buffer?"); + } + + metalBuffer->release(); + + /// Unlock and clean up + ref->SetRebuiltSinceUsed(true); + ref->SetDirty(false); +} + +void plMetalDevice::FillVolatileVertexBufferRef(plMetalDevice::VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx) +{ + uint8_t* dst = ref->fData; + uint8_t* src = group->GetVertBufferData(idx); + + size_t uvChanSize = plGBufferGroup::CalcNumUVs(group->GetVertexFormat()) * sizeof(hsPoint3); + uint8_t numWeights = (group->GetVertexFormat() & plGBufferGroup::kSkinWeightMask) >> 4; + + for (uint32_t i = 0; i < ref->fCount; ++i) { + inlCopy(src, dst); // pre-pos + + src += numWeights * sizeof(float); // weights + + if (group->GetVertexFormat() & plGBufferGroup::kSkinIndices) + inlSkip(src); // indices + + inlCopy(src, dst); // pre-normal + inlCopy(src, dst); // diffuse + inlCopy(src, dst); // specular + + // UVWs + memcpy(dst, src, uvChanSize); + src += uvChanSize; + dst += uvChanSize; + } +} + +void plMetalDevice::SetupIndexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalDevice::IndexBufferRef* iRef) +{ + uint32_t numIndices = owner->GetIndexBufferCount(idx); + iRef->fCount = numIndices; + iRef->fOwner = owner; + iRef->fIndex = idx; + iRef->fRefTime = 0; + + iRef->SetDirty(true); + iRef->SetRebuiltSinceUsed(true); + + owner->SetIndexBufferRef(idx, iRef); + hsRefCnt_SafeUnRef(iRef); + + iRef->SetVolatile(owner->AreIdxVolatile()); +} + +void plMetalDevice::CheckIndexBuffer(plMetalDevice::IndexBufferRef* iRef) +{ + if (!iRef->GetBuffer() && iRef->fCount) { + iRef->SetVolatile(false); + + iRef->SetDirty(true); + iRef->SetRebuiltSinceUsed(true); + } +} + +void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef* iRef, plGBufferGroup* owner, uint32_t idx) +{ + uint32_t startIdx = owner->GetIndexBufferStart(idx); + uint32_t fullSize = owner->GetIndexBufferCount(idx) * sizeof(uint16_t); + uint32_t size = (owner->GetIndexBufferEnd(idx) - startIdx) * sizeof(uint16_t); + + if (!size) { + return; + } + + iRef->PrepareForWrite(); + MTL::Buffer* indexBuffer = iRef->GetBuffer(); + if (!indexBuffer || indexBuffer->length() < fullSize) { + indexBuffer = fMetalDevice->newBuffer(fullSize, MTL::ResourceStorageModeManaged); + iRef->SetBuffer(indexBuffer); + indexBuffer->release(); + } + + memcpy(((uint16_t*)indexBuffer->contents()) + startIdx, owner->GetIndexBufferData(idx) + startIdx, size); + indexBuffer->didModifyRange(NS::Range(startIdx, size)); + + iRef->SetDirty(false); +} + +void plMetalDevice::SetupTextureRef(plBitmap* img, plMetalDevice::TextureRef* tRef) +{ + tRef->fOwner = img; + + plBitmap* imageToCheck = img; + + // if it's a cubic texture, check the first face. The root img will give a false format that will cause us to decode wrong. + plCubicEnvironmap* cubicImg = dynamic_cast(img); + if (cubicImg) { + imageToCheck = cubicImg->GetFace(0); + } + + if (imageToCheck->IsCompressed()) { + switch (imageToCheck->fDirectXInfo.fCompressionType) { + case plBitmap::DirectXInfo::kDXT1: + tRef->fFormat = MTL::PixelFormatBC1_RGBA; + break; + case plBitmap::DirectXInfo::kDXT5: + tRef->fFormat = MTL::PixelFormatBC3_RGBA; + break; + } + } else { + switch (imageToCheck->fUncompressedInfo.fType) { + case plBitmap::UncompressedInfo::kRGB8888: + tRef->fFormat = MTL::PixelFormatBGRA8Unorm; + break; + case plBitmap::UncompressedInfo::kRGB4444: + // we'll convert this on load to 8 bits per channel + // Metal doesn't support 4 bits per channel on all hardware + tRef->fFormat = MTL::PixelFormatBGRA8Unorm; + break; + case plBitmap::UncompressedInfo::kRGB1555: + tRef->fFormat = MTL::PixelFormatBGR5A1Unorm; + break; + case plBitmap::UncompressedInfo::kInten8: + tRef->fFormat = MTL::PixelFormatR8Uint; + break; + case plBitmap::UncompressedInfo::kAInten88: + tRef->fFormat = MTL::PixelFormatRG8Uint; + break; + } + } + + tRef->SetDirty(true); + + img->SetDeviceRef(tRef); + hsRefCnt_SafeUnRef(tRef); +} + +void plMetalDevice::ReleaseFramebufferObjects() +{ + if (fCurrentUnprocessedOutputTexture) + fCurrentUnprocessedOutputTexture->release(); + fCurrentFragmentOutputTexture = nullptr; + + if (fGammaAdjustState) + fGammaAdjustState->release(); + fGammaAdjustState = nullptr; +} + +void plMetalDevice::SetFramebufferFormat(MTL::PixelFormat format) +{ + if (fFramebufferFormat != format) { + ReleaseFramebufferObjects(); + fFramebufferFormat = format; + } +} + +void plMetalDevice::CheckTexture(plMetalDevice::TextureRef* tRef) +{ + if (!tRef->fTexture) { + tRef->SetDirty(true); + } +} + +uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef* tRef, plMipmap* mipmap) +{ + if (mipmap->IsCompressed()) { + mipmap->SetCurrLevel(tRef->fLevels); + while ((mipmap->GetCurrWidth() | mipmap->GetCurrHeight()) & 0x03) { + tRef->fLevels--; + hsAssert(tRef->fLevels >= 0, "How was this ever compressed?"); + if (tRef->fLevels < 0) { + tRef->fLevels = -1; + break; + } + mipmap->SetCurrLevel(tRef->fLevels); + } + } +} + +void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef* tRef, plMipmap* img, uint slice) +{ + if (img->IsCompressed()) { + /* + Some cubic assets have inconsistant mipmap sizes between their faces. + The DX pipeline maintains seperate structures noting the expected + mipmap sizes, and ignores the actual face sizes. This hack + makes the Metal pipeline ignore the actual face sizes and behave + as if all face sizes are equivelent to the first face. It does this + by computing the expected mipmap sizes on the fly. + This hack could be disabled if cube maps in the assets were + fixed to be consistant. + */ +#define HACK_LEVEL_SIZE 1 + +#if HACK_LEVEL_SIZE + NS::UInteger width = tRef->fTexture->width(); + NS::UInteger height = tRef->fTexture->height(); +#endif + + if (tRef->fLevels == -1) { + hsAssert(1, "Bad texture found"); + return; + } + + for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { + img->SetCurrLevel(lvl); +#if HACK_LEVEL_SIZE + NS::UInteger levelWidth = (width / exp2(lvl)); + NS::UInteger levelHeight = (height / exp2(lvl)); +#else + NS::UInteger levelWidth = img->GetCurrWidth(); + NS::UInteger levelHeight = img->GetCurrHeight(); +#endif + + switch (img->fDirectXInfo.fCompressionType) { + case plBitmap::DirectXInfo::kDXT1: + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, levelWidth, levelHeight), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), levelWidth * 2, 0); + break; + case plBitmap::DirectXInfo::kDXT5: + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); + break; + } + } + } else { + for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { + img->SetCurrLevel(lvl); + + if (img->GetCurrLevelPtr()) { + if (img->fUncompressedInfo.fType == plBitmap::UncompressedInfo::kRGB4444) { + struct RGBA4444Component + { + unsigned r : 4; + unsigned g : 4; + unsigned b : 4; + unsigned a : 4; + }; + + RGBA4444Component* in = (RGBA4444Component*)img->GetCurrLevelPtr(); + auto out = std::make_unique(img->GetCurrHeight() * img->GetCurrWidth()); + + for (int i = 0; i < (img->GetCurrWidth() * img->GetCurrHeight()); i++) { + out[i].r = in[i].r; + out[i].g = in[i].g; + out[i].b = in[i].b; + out[i].a = in[i].a; + } + + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, out.get(), img->GetCurrWidth() * 4, 0); + } else { + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); + } + } else { + hsAssert(0, "Texture with no image data?\n"); + } + } + } + + CFStringRef name = CFStringCreateWithSTString(img->GetKeyName()); + tRef->fTexture->setLabel(reinterpret_cast(name)); + CFRelease(name); + tRef->SetDirty(false); +} + +void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* img) +{ + if (!img->GetImage()) { + return; + } + + if (tRef->fTexture) { + tRef->fTexture->release(); + } + + tRef->fLevels = img->GetNumLevels() - 1; + // FIXME: Is this texture check actually needed + // if(!tRef->fTexture) { + ConfigureAllowedLevels(tRef, img); + + bool textureIsValid = tRef->fLevels > 0; + + // texture doesn't exist yet, create it + bool supportsMipMap = tRef->fLevels && textureIsValid; + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor(tRef->fFormat, img->GetWidth(), img->GetHeight(), supportsMipMap); + descriptor->setUsage(MTL::TextureUsageShaderRead); + + // Metal gets mad if we set this with 0, only set it if we know there are mipmaps + if (supportsMipMap) { + descriptor->setMipmapLevelCount(tRef->fLevels + 1); + } + + descriptor->setStorageMode(MTL::StorageModeManaged); + + tRef->fTexture = fMetalDevice->newTexture(descriptor); + PopulateTexture(tRef, img, 0); + //} + + tRef->SetDirty(false); +} + +void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef* tRef, plCubicEnvironmap* img) +{ + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::textureCubeDescriptor(tRef->fFormat, img->GetFace(0)->GetWidth(), tRef->fLevels != 0); + + if (tRef->fLevels != 0) { + descriptor->setMipmapLevelCount(tRef->fLevels + 1); + } + descriptor->setUsage(MTL::TextureUsageShaderRead); + + tRef->fTexture = fMetalDevice->newTexture(descriptor); + + static constexpr uint kFaceMapping[] = { + 1, // kLeftFace + 0, // kRightFace + 4, // kFrontFace + 5, // kBackFace + 2, // kTopFace + 3 // kBottomFace + }; + for (size_t i = 0; i < 6; i++) { + PopulateTexture(tRef, img->GetFace(i), kFaceMapping[i]); + } + + tRef->SetDirty(false); +} + +void plMetalDevice::SetProjectionMatrix(const hsMatrix44& src) +{ + hsMatrix2SIMD(src, &fMatrixProj); +} + +void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src) +{ + hsMatrix44 inv; + src.GetInverse(&inv); + + hsMatrix2SIMD(src, &fMatrixW2C); + hsMatrix2SIMD(inv, &fMatrixC2W); +} + +void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src) +{ + hsMatrix44 inv; + src.GetInverse(&inv); + + hsMatrix2SIMD(src, &fMatrixL2W); + hsMatrix2SIMD(inv, &fMatrixW2L); +} + +void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) +{ + fCurrentCommandBuffer = fCommandQueue->commandBuffer(); + fCurrentCommandBuffer->retain(); + + SetFramebufferFormat(drawable->texture()->pixelFormat()); + + bool depthNeedsRebuild = fCurrentDrawableDepthTexture == nullptr; + depthNeedsRebuild |= drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || drawable->texture()->height() != fCurrentDrawableDepthTexture->height(); + + // cache the depth buffer, we'll just clear it every time. + if (depthNeedsRebuild) { + if (fCurrentDrawableDepthTexture) { + fCurrentDrawableDepthTexture->release(); + fCurrentFragmentMSAAOutputTexture->release(); + } + + MTL::TextureDescriptor* depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + drawable->texture()->width(), + drawable->texture()->height(), + false); + if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + + if (fSampleCount != 1) { + // MSSA depth and color output + depthTextureDescriptor->setSampleCount(fSampleCount); + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + depthTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); + if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); + + MTL::TextureDescriptor* msaaColorTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), + drawable->texture()->width(), + drawable->texture()->height(), + false); + msaaColorTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { + msaaColorTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + msaaColorTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + msaaColorTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); + msaaColorTextureDescriptor->setSampleCount(fSampleCount); + fCurrentFragmentMSAAOutputTexture = fMetalDevice->newTexture(msaaColorTextureDescriptor); + } else { + fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); + } + } + + // Do we need to create a unprocessed output texture? + // If the depth needs to be rebuilt - we probably need to rebuild this one too + if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) { + MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false); + mainPassDescriptor->setStorageMode(MTL::StorageModePrivate); + mainPassDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageRenderTarget); + fCurrentUnprocessedOutputTexture->release(); + fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor); + } + + fCurrentDrawable = drawable->retain(); +} + +void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable** condOut) +{ + fConditionMap[record] = new std::condition_variable(); + if (condOut) { + *condOut = fConditionMap[record]; + } + + if (fNewPipelineStateMap[record] != nullptr) { + // The shader is already compiled. + return; + } + + MTL::Library* library = fMetalDevice->newDefaultLibrary(); + + std::shared_ptr pipelineState = record.state; + + MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + descriptor->setLabel(pipelineState->GetDescription()); + + const MTL::Function* vertexFunction = pipelineState->GetVertexFunction(library); + const MTL::Function* fragmentFunction = pipelineState->GetFragmentFunction(library); + descriptor->setVertexFunction(vertexFunction); + descriptor->setFragmentFunction(fragmentFunction); + + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); + pipelineState->ConfigureBlend(descriptor->colorAttachments()->object(0)); + + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); + pipelineState->ConfigureVertexDescriptor(vertexDescriptor); + descriptor->setVertexDescriptor(vertexDescriptor); + descriptor->setDepthAttachmentPixelFormat(record.depthFormat); + descriptor->colorAttachments()->object(0)->setPixelFormat(record.colorFormat); + + descriptor->setSampleCount(record.sampleCount); + + NS::Error* error; + fMetalDevice->newRenderPipelineState(descriptor, ^(MTL::RenderPipelineState* pipelineState, NS::Error* error) { + if (error) { + // leave the condition in place for now, we don't want to + // retry if the shader is defective. the condition will + // prevent retries + hsAssert(0, error->localizedDescription()->cString(NS::UTF8StringEncoding)); + } else { + plMetalLinkedPipeline* linkedPipeline = new plMetalLinkedPipeline(); + linkedPipeline->pipelineState = pipelineState->retain(); + linkedPipeline->fragFunction = fragmentFunction; + linkedPipeline->vertexFunction = vertexFunction; + + fNewPipelineStateMap[record] = linkedPipeline; + // signal that we're done + fConditionMap[record]->notify_all(); + } + }); + + descriptor->release(); + library->release(); +} + +plMetalDevice::plMetalLinkedPipeline* plMetalDevice::PipelineState(plMetalPipelineState* pipelineState) +{ + MTL::PixelFormat depthFormat = fCurrentDepthFormat; + MTL::PixelFormat colorFormat = fCurrentFragmentOutputTexture->pixelFormat(); + + plMetalPipelineRecord record = { + depthFormat, + colorFormat, + CurrentTargetSampleCount()}; + + record.state = std::shared_ptr(pipelineState->Clone()); + + plMetalLinkedPipeline* renderState = fNewPipelineStateMap[record]; + + // if it exists, return it, we're done + if (renderState) { + return renderState; + } + + // check and see if we're already building it. If so, wait. + // Note: even if it already exists, this lock will be kept, and it will + // let us through. This is to prevent race conditions where the render state + // was null, but maybe in the time it took us to get here the state compiled. + std::condition_variable* alreadyBuildingCondition = fConditionMap[record]; + if (alreadyBuildingCondition) { + std::unique_lock lock(fPipelineCreationMtx); + alreadyBuildingCondition->wait(lock); + + // should be returning the render state here, if not it failed to build + // we'll allow the null return + return fNewPipelineStateMap[record]; + } + + // it doesn't exist, start a build and wait + // only render thread is allowed to start builds, + // shouldn't be race conditions here + StartPipelineBuild(record, &alreadyBuildingCondition); + std::unique_lock lock(fPipelineCreationMtx); + alreadyBuildingCondition->wait(lock); + + // should be returning the render state here, if not it failed to build + // we'll allow the null return + return fNewPipelineStateMap[record]; +} + +std::condition_variable* plMetalDevice::PrewarmPipelineStateFor(plMetalPipelineState* pipelineState) +{ + MTL::PixelFormat depthFormat = fCurrentDepthFormat; + MTL::PixelFormat colorFormat = fCurrentFragmentOutputTexture->pixelFormat(); + + plMetalPipelineRecord record = { + depthFormat, + colorFormat, + CurrentTargetSampleCount()}; + + record.state = std::shared_ptr(pipelineState->Clone()); + // only render thread is allowed to prewarm, no race conditions around + // fConditionMap creation + if (!fNewPipelineStateMap[record] && fConditionMap[record]) { + std::condition_variable* condOut; + StartPipelineBuild(record, &condOut); + return condOut; + } + return nullptr; +} + +bool plMetalDevice::plMetalPipelineRecord::operator==(const plMetalPipelineRecord& p) const +{ + return depthFormat == p.depthFormat && + colorFormat == p.colorFormat && + sampleCount == p.sampleCount && + state->operator==(*p.state); +} + +MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() +{ + if (fCurrentOffscreenCommandBuffer) { + return fCurrentOffscreenCommandBuffer; + } + return fCurrentCommandBuffer; +} + +void plMetalDevice::SubmitCommandBuffer() +{ + if (fBlitCommandEncoder) { + fBlitCommandEncoder->endEncoding(); + fBlitCommandBuffer->commit(); + + fBlitCommandBuffer->release(); + fBlitCommandEncoder->release(); + + fBlitCommandBuffer = nullptr; + fBlitCommandEncoder = nullptr; + } + + fCurrentRenderTargetCommandEncoder->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; + + if (NeedsPostprocessing()) { + PostprocessIntoDrawable(); + } + + fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); + fCurrentCommandBuffer->commit(); + fCurrentCommandBuffer->release(); + fCurrentCommandBuffer = nullptr; + + fCurrentDrawable->release(); + fCurrentDrawable = nullptr; + + // Reset the clear colors for the next pass + // Metal clears on framebuffer load - so don't cause a clear + // command in this pass to affect the next pass. + fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fClearDrawableColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearRenderTarget = false; + fShouldClearDrawable = false; + fClearRenderTargetDepth = 1.0; + fClearDrawableDepth = 1.0; +} + +MTL::SamplerState* plMetalDevice::SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState) const +{ + return fSamplerStates[sampleState]; +} + +void plMetalDevice::CreateGammaAdjustState() +{ + MTL::RenderPipelineDescriptor* gammaDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + MTL::Library* library = fMetalDevice->newDefaultLibrary(); + + gammaDescriptor->setVertexFunction(library->newFunction(MTLSTR("gammaCorrectVertex"))->autorelease()); + gammaDescriptor->setFragmentFunction(library->newFunction(MTLSTR("gammaCorrectFragment"))->autorelease()); + + library->release(); + + gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(fFramebufferFormat); + + NS::Error* error; + fGammaAdjustState->release(); + fGammaAdjustState = fMetalDevice->newRenderPipelineState(gammaDescriptor, &error); + gammaDescriptor->release(); +} + +void plMetalDevice::PostprocessIntoDrawable() +{ + if (!fGammaAdjustState) { + CreateGammaAdjustState(); + } + + // Gamma adjust + MTL::RenderPassDescriptor* gammaPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionDontCare); + gammaPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture()); + gammaPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); + + MTL::RenderCommandEncoder* gammaAdjustEncoder = fCurrentCommandBuffer->renderCommandEncoder(gammaPassDescriptor); + + gammaAdjustEncoder->setRenderPipelineState(fGammaAdjustState); + + static const float fullFrameCoords[16] = { + // first pair is vertex, second pair is texture + -1, -1, 0, 1, + 1, -1, 1, 1, + -1, 1, 0, 0, + 1, 1, 1, 0 + }; + gammaAdjustEncoder->setVertexBytes(&fullFrameCoords, sizeof(fullFrameCoords), 0); + gammaAdjustEncoder->setFragmentTexture(fCurrentUnprocessedOutputTexture, 0); + gammaAdjustEncoder->setFragmentTexture(fGammaLUTTexture, 1); + gammaAdjustEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + gammaAdjustEncoder->endEncoding(); +} + +size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMetalPipelineRecord const& s) const noexcept +{ + size_t value = std::hash()(s.depthFormat); + value ^= std::hash()(s.colorFormat); + value ^= std::hash()(*s.state); + value ^= std::hash()(s.sampleCount); + return value; +} + +MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() +{ + // return the current render command encoder + // if a framebuffer wasn't set, assume screen, emulating GL + if (fCurrentRenderTargetCommandEncoder) { + return fCurrentRenderTargetCommandEncoder; + } + + if (!fCurrentRenderTargetCommandEncoder) { + BeginNewRenderPass(); + + if (fCurrentRenderTarget) { + fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearRenderTarget = false; + fClearRenderTargetDepth = 1.0; + } else { + fClearDrawableColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearDrawable = false; + fClearDrawableDepth = 1.0; + } + } + + return fCurrentRenderTargetCommandEncoder; +} + +CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() const +{ + return fCurrentDrawable; +} + +void plMetalDevice::BlitTexture(MTL::Texture* src, MTL::Texture* dst) +{ + // FIXME: BlitTexture current unused - this used to create private GPU only textures through a copy from a CPU texture. + if (fBlitCommandEncoder == nullptr) { + fBlitCommandBuffer = fCommandQueue->commandBuffer()->retain(); + // enqueue so we go to the front of the line before render + fBlitCommandBuffer->enqueue(); + fBlitCommandEncoder = fBlitCommandBuffer->blitCommandEncoder()->retain(); + } + + fBlitCommandEncoder->copyFromTexture(src, 0, 0, MTL::Origin(0, 0, 0), MTL::Size(src->width(), src->height(), 0), dst, 0, 0, MTL::Origin(0, 0, 0)); +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h new file mode 100644 index 0000000000..b7d493659b --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -0,0 +1,270 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalDevice_h_ +#define _plMetalDevice_h_ + +#include + +#include +#include +#include +#include + +#include "HeadSpin.h" +#include "hsGMatState.h" +#include "hsMatrix44.h" +#include "plMetalDeviceRef.h" +#include "plSurface/plShader.h" +#include "plSurface/plShaderTable.h" + +class plMetalPipeline; +class plRenderTarget; +class plBitmap; +class plMipmap; +class plCubicEnvironmap; +class plLayerInterface; +class plMetalPipelineState; + +// NOTE: Results of this will be row major +matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst); + +class plMetalDevice +{ + friend plMetalPipeline; + friend class plMetalMaterialShaderRef; + friend class plMetalPlateManager; + friend class plMetalPipelineState; + +public: + typedef plMetalVertexBufferRef VertexBufferRef; + typedef plMetalIndexBufferRef IndexBufferRef; + typedef plMetalTextureRef TextureRef; + +public: + plMetalPipeline* fPipeline; + + hsWindowHndl fDevice; + hsWindowHndl fWindow; + + const char* fErrorMsg; + + MTL::RenderCommandEncoder* CurrentRenderCommandEncoder(); + MTL::Device* fMetalDevice; + MTL::CommandQueue* fCommandQueue; + MTL::Buffer* fCurrentIndexBuffer; + + size_t fActiveThread; + matrix_float4x4 fMatrixProj; + matrix_float4x4 fMatrixL2W; + matrix_float4x4 fMatrixW2L; + matrix_float4x4 fMatrixW2C; + matrix_float4x4 fMatrixC2W; + +public: + struct plMetalLinkedPipeline + { + const MTL::RenderPipelineState* pipelineState; + const MTL::Function* fragFunction; + const MTL::Function* vertexFunction; + }; + + plMetalDevice(); + + bool InitDevice(); + + void Shutdown(); + + /** + * Set rendering to the specified render target. + * + * Null rendertarget is the primary. Invalidates the state as required by + * experience, not documentation. + */ + void SetRenderTarget(plRenderTarget* target); + + /** Translate our viewport into a GL viewport. */ + void SetViewport(); + + bool BeginRender(); + + /* Device Ref Functions **************************************************/ + void SetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, VertexBufferRef* vRef); + void CheckStaticVertexBuffer(VertexBufferRef* vRef, plGBufferGroup* owner, uint32_t idx); + void FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx); + void FillVolatileVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx); + void SetupIndexBufferRef(plGBufferGroup* owner, uint32_t idx, IndexBufferRef* iRef); + void CheckIndexBuffer(IndexBufferRef* iRef); + void FillIndexBufferRef(IndexBufferRef* iRef, plGBufferGroup* owner, uint32_t idx); + + void SetupTextureRef(plBitmap* img, TextureRef* tRef); + void CheckTexture(TextureRef* tRef); + void MakeTextureRef(TextureRef* tRef, plMipmap* img); + void MakeCubicTextureRef(TextureRef* tRef, plCubicEnvironmap* img); + + const char* GetErrorString() const { return fErrorMsg; } + + void SetProjectionMatrix(const hsMatrix44& src); + void SetWorldToCameraMatrix(const hsMatrix44& src); + void SetLocalToWorldMatrix(const hsMatrix44& src); + + void PopulateTexture(plMetalDevice::TextureRef* tRef, plMipmap* img, uint slice); + uint ConfigureAllowedLevels(plMetalDevice::TextureRef* tRef, plMipmap* mipmap); + + // stencil states are expensive to make, they should be cached + // FIXME: There should be a function to pair these with hsGMatState + MTL::DepthStencilState* fNoZReadStencilState; + MTL::DepthStencilState* fNoZWriteStencilState; + MTL::DepthStencilState* fNoZReadOrWriteStencilState; + MTL::DepthStencilState* fReverseZStencilState; + MTL::DepthStencilState* fDefaultStencilState; + uint8_t fSampleCount; + + /// Create a new command buffer to encode all the operations needed to draw a frame + // Currently requires a CA drawable and not a Metal drawable. In since CA drawable is only abstract implementation I know about, not sure where we would find others? + void CreateNewCommandBuffer(CA::MetalDrawable* drawable); + MTL::CommandBuffer* GetCurrentCommandBuffer(); + CA::MetalDrawable* GetCurrentDrawable() const; + /// Submit the command buffer to the GPU and draws all the render passes. Clears the current command buffer. + void SubmitCommandBuffer(); + void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); + + void SetMaxAnsiotropy(uint8_t maxAnsiotropy); + void SetMSAASampleCount(uint8_t sampleCount); + + MTL::SamplerState* SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState) const; + + NS::UInteger CurrentTargetSampleCount() const + { + if (fCurrentRenderTarget) { + return 1; + } else { + return fSampleCount; + } + } + + void BlitTexture(MTL::Texture* src, MTL::Texture* dst); + + void EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma); + + MTL::PixelFormat GetFramebufferFormat() const { return fFramebufferFormat; }; + +private: + struct plMetalPipelineRecord + { + MTL::PixelFormat depthFormat; + MTL::PixelFormat colorFormat; + NS::UInteger sampleCount; + std::shared_ptr state; + + bool operator==(const plMetalPipelineRecord& p) const; + }; + + struct plMetalPipelineRecordHashFunction + { + std::size_t operator()(plMetalPipelineRecord const& s) const noexcept; + }; + + std::unordered_map fNewPipelineStateMap; + // the condition map allows consumers of pipeline states to wait until the pipeline state is ready + std::unordered_map fConditionMap; + std::mutex fPipelineCreationMtx; + void StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable** condOut); + std::condition_variable* PrewarmPipelineStateFor(plMetalPipelineState* pipelineState); + + void SetOutputLayer(CA::MetalLayer* layer) { fLayer = layer; } + CA::MetalLayer* GetOutputLayer() const { return fLayer; }; + +protected: + plMetalLinkedPipeline* PipelineState(plMetalPipelineState* pipelineState); + + MTL::Texture* fGammaLUTTexture; + + void SetFramebufferFormat(MTL::PixelFormat format); + +private: + MTL::PixelFormat fFramebufferFormat; + + // these are internal bits for backing the current render pass + // private because the functions should be used to keep a consistant + // render pass state + MTL::CommandBuffer* fCurrentCommandBuffer; + MTL::CommandBuffer* fCurrentOffscreenCommandBuffer; + MTL::RenderCommandEncoder* fCurrentRenderTargetCommandEncoder; + + CA::MetalLayer* fLayer; + + MTL::Texture* fCurrentDrawableDepthTexture; + MTL::Texture* fCurrentFragmentOutputTexture; + MTL::Texture* fCurrentUnprocessedOutputTexture; + MTL::Texture* fCurrentFragmentMSAAOutputTexture; + + CA::MetalDrawable* fCurrentDrawable; + MTL::PixelFormat fCurrentDepthFormat; + simd_float4 fClearRenderTargetColor; + simd_float4 fClearDrawableColor; + bool fShouldClearRenderTarget; + bool fShouldClearDrawable; + float fClearRenderTargetDepth; + float fClearDrawableDepth; + plRenderTarget* fCurrentRenderTarget; + MTL::SamplerState* fSamplerStates[4]; + + MTL::CommandBuffer* fBlitCommandBuffer; + MTL::BlitCommandEncoder* fBlitCommandEncoder; + + bool NeedsPostprocessing() const + { + return fGammaLUTTexture != nullptr; + } + void PostprocessIntoDrawable(); + void CreateGammaAdjustState(); + MTL::RenderPipelineState* fGammaAdjustState; + + void BeginNewRenderPass(); + void ReleaseSamplerStates(); + void ReleaseFramebufferObjects(); + + // Blur states + std::unordered_map fBlurShaders; +}; + +#endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm new file mode 100644 index 0000000000..666f1a7008 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm @@ -0,0 +1,102 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +#include +#include +#include "plMetalDevice.h" + +void plMetalDevice::EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, + float sigma) +{ + // FIXME: Blurring currently ends a pass - and restarting a pass will possibly clear one or more + // buffers Technically shadow blurring only happens at the end of the render pass though... + CurrentRenderCommandEncoder()->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; + + // look up the shader by sigma value + MPSImageGaussianBlur* blur = (MPSImageGaussianBlur*)fBlurShaders[sigma]; + + // we don't have one, need to create one + if (!blur) { + blur = [[MPSImageGaussianBlur alloc] initWithDevice:(id)fMetalDevice + sigma:sigma]; + fBlurShaders[sigma] = (NS::Object*)blur; + } + + // we'd like to do the blur in place, but Metal might not let us. + // if it allocates a new texture, we'll have to glit that data back to the original + id destTexture = (id)texture; + bool result = + [blur encodeToCommandBuffer:(id)commandBuffer + inPlaceTexture:(id*)&destTexture + fallbackCopyAllocator:^id( + MPSKernel* kernel, id commandBuffer, id texture) { + // this copy allocator will release the original texture - that texture is + // important, don't let it + [texture retain]; + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor( + (MTL::PixelFormat)texture.pixelFormat, texture.width, texture.height, false); + descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + return (id)fMetalDevice->newTexture(descriptor)->autorelease(); + }]; + + // did Metal change our original texture? + if (destTexture != (id)texture) { + // we'll need to blit the dest texture back to the source + // we just committed a compute pass, buffer should be free for us to create + // a blit encoder + id blitEncoder = + [(id)GetCurrentCommandBuffer() blitCommandEncoder]; + [blitEncoder copyFromTexture:destTexture + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 1) + toTexture:(id)texture + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + [blitEncoder endEncoding]; + } +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h new file mode 100644 index 0000000000..5d9dbd2321 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -0,0 +1,303 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalDeviceRef_inc_ +#define _plMetalDeviceRef_inc_ + +#include +#include + +#include "HeadSpin.h" +#include "hsGDeviceRef.h" + +class plGBufferGroup; +class plBitmap; +class plRenderTarget; + +class plMetalDeviceRef : public hsGDeviceRef +{ +protected: + plMetalDeviceRef* fNext; + plMetalDeviceRef** fBack; + +public: + void Unlink(); + void Link(plMetalDeviceRef** back); + plMetalDeviceRef* GetNext() const { return fNext; } + bool IsLinked() { return fBack != nullptr; } const + + bool HasFlag(uint32_t f) const { return 0 != (fFlags & f); } + void SetFlag(uint32_t f, bool on) + { + if (on) + fFlags |= f; + else + fFlags &= ~f; + } + + virtual void Release() = 0; + + plMetalDeviceRef(); + virtual ~plMetalDeviceRef(); +}; + +/* + The buffer pool stores and recycles buffers so that Plasma can encode GPU commands and render in + parallel. That means we can't touch buffers the GPU is using, and if a pass or frame rewrites a + buffer we have to make sure it's not stomping on something that is already attached to a frame. + Because Metal can triple buffer, the first dimension of caching is hard coded to 3. Some ages + will also rewrite buffers an unspecified number of times between render passes. For example: A r + eflection render and a main render might have different index buffers. So the second dimension of + caching uses an unbounded vector that will hold enough buffers to render in any one age. + + Buffer pools do not allocate buffers, they only store them. The outside caller is responsible for + allocating a buffer and then setting it. The buffer pool will retain any buffers within the pool, + and automatically release them when they are overwritten or the pool is deallocated. + + Because buffers are only stored on write, and no allocations happen within the pool, overhead is + kept low for static buffers. Completely static buffers will never expand the pool if they only write once. + */ +class plMetalBufferPoolRef : public plMetalDeviceRef +{ +public: + uint32_t fCurrentFrame; + uint32_t fCurrentPass; + uint32_t fLastWriteFrameTime; + + plMetalBufferPoolRef() : plMetalDeviceRef(), + fLastWriteFrameTime(), + fCurrentPass(), + fCurrentFrame(), + fBuffer() + { + } + + // Prepare for write must be called anytime a new pass is going to write a buffer. It moves internal record keeping to reflect that either a new frame or new pass is about to write to the pool. + void PrepareForWrite() + { + // if we've moved frames since the last time a write happened, reset our current pass index to 0, otherwise increment the current pass + if (fLastWriteFrameTime != fFrameTime) { + fCurrentPass = 0; + fLastWriteFrameTime = fFrameTime; + fCurrentFrame = (++fCurrentFrame % 3); + } else { + fCurrentPass++; + } + + // update the current buffer focused, if the is no buffer to focus set it to null + uint32_t currentSize = uint32_t(fBuffers[fCurrentFrame].size()); + if (fCurrentPass < currentSize) { + fBuffer = fBuffers[fCurrentFrame][fCurrentPass]; + } else { + fBuffer = nullptr; + } + } + + static void SetFrameTime(uint32_t frameTime) { fFrameTime = frameTime; }; + + MTL::Buffer* GetBuffer() const { return fBuffer; }; + + void SetBuffer(MTL::Buffer* buffer) + { + fBuffer = buffer->retain(); + uint32_t currentSize = uint32_t(fBuffers[fCurrentFrame].size()); + // if the current vector doesn't have enough room for the entry, resize it + if (fCurrentPass >= currentSize) { + fBuffers[fCurrentFrame].resize(++currentSize); + } else if (fBuffers[fCurrentFrame][fCurrentPass]) { + // if we're replacing an existing entry, release the old one + fBuffers[fCurrentFrame][fCurrentPass]->release(); + } + fBuffers[fCurrentFrame][fCurrentPass] = fBuffer; + } + + void Release() override + { + for (int i = 0; i < 3; i++) { + for (auto buffer : fBuffers[i]) { + buffer->release(); + } + } + fBuffer = nullptr; + } + +private: + static uint32_t fFrameTime; + MTL::Buffer* fBuffer; + std::vector fBuffers[3]; +}; + +class plMetalVertexBufferRef : public plMetalBufferPoolRef +{ +public: + plGBufferGroup* fOwner; + uint32_t fCount; + uint32_t fIndex; + uint32_t fVertexSize; + int32_t fOffset; + uint8_t fFormat; + uint8_t* fData; + + uint32_t fRefTime; + + enum + { + kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef + kVolatile = 0x20, + kSkinned = 0x40 + }; + + bool RebuiltSinceUsed() const { return HasFlag(kRebuiltSinceUsed); } + void SetRebuiltSinceUsed(bool b) { SetFlag(kRebuiltSinceUsed, b); } + + bool Volatile() const { return HasFlag(kVolatile); } + void SetVolatile(bool b) { SetFlag(kVolatile, b); } + + bool Skinned() const { return HasFlag(kSkinned); } + void SetSkinned(bool b) { SetFlag(kSkinned, b); } + + bool Expired(uint32_t t) const { return Volatile() && (IsDirty() || (fRefTime != t)); } + void SetRefTime(uint32_t t) { fRefTime = t; } + + plMetalVertexBufferRef() : plMetalBufferPoolRef(), + fCount(), + fIndex(), + fVertexSize(), + fOffset(), + fOwner(), + fData(), + fFormat(), + fRefTime() + { + } + + virtual ~plMetalVertexBufferRef(); + + void Link(plMetalVertexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalVertexBufferRef* const GetNext() const { return (plMetalVertexBufferRef*)fNext; } + + void Release() override; +}; + +class plMetalIndexBufferRef : public plMetalBufferPoolRef +{ +public: + uint32_t fCount; + uint32_t fIndex; + plGBufferGroup* fOwner; + uint32_t fRefTime; + uint32_t fLastWriteFrameTime; + + enum + { + kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef + kVolatile = 0x20 + }; + + bool RebuiltSinceUsed() const { return HasFlag(kRebuiltSinceUsed); } + void SetRebuiltSinceUsed(bool b) { SetFlag(kRebuiltSinceUsed, b); } + + bool Volatile() const { return HasFlag(kVolatile); } + void SetVolatile(bool b) { SetFlag(kVolatile, b); } + + bool Expired(uint32_t t) const { return Volatile() && (IsDirty() || (fRefTime != t)); } + void SetRefTime(uint32_t t) { fRefTime = t; } + + void Release() override; + + void Link(plMetalIndexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalIndexBufferRef* const GetNext() { return (plMetalIndexBufferRef*)fNext; } + virtual ~plMetalIndexBufferRef(); + + plMetalIndexBufferRef() : plMetalBufferPoolRef(), + fCount(), + fIndex(), + fRefTime(), + fLastWriteFrameTime(), + fOwner() + { + } +}; + +class plMetalTextureRef : public plMetalDeviceRef +{ +public: + plBitmap* fOwner; + + int32_t fLevels; + MTL::Texture* fTexture; + MTL::PixelFormat fFormat; + + void Link(plMetalTextureRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalTextureRef* const GetNext() { return (plMetalTextureRef*)fNext; } + + plMetalTextureRef() : plMetalDeviceRef(), + fOwner(), + fTexture(), + fLevels(1) + { + } + + virtual ~plMetalTextureRef(); + + void Release() override; +}; + +class plMetalRenderTargetRef : public plMetalTextureRef +{ +public: + MTL::Texture* fDepthBuffer; + + void Link(plMetalRenderTargetRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalRenderTargetRef* GetNext() const { return (plMetalRenderTargetRef*)fNext; } + + plMetalRenderTargetRef() : fDepthBuffer() + { + } + + virtual ~plMetalRenderTargetRef(); + + void Release() override; + + virtual void SetOwner(plRenderTarget* targ) { fOwner = (plBitmap*)targ; } +}; + +#endif // _plGLDeviceRef_inc_ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp new file mode 100644 index 0000000000..87d4f6181a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp @@ -0,0 +1,159 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "plMetalDeviceRef.h" +#include "plMetalPipeline.h" +#include "plPipeline/hsWinRef.h" +#include "plProfile.h" +#include "plStatusLog/plStatusLog.h" + +plProfile_CreateMemCounter("Vertices", "Memory", MemVertex); +plProfile_CreateMemCounter("Indices", "Memory", MemIndex); +plProfile_CreateMemCounter("Textures", "Memory", MemTexture); + +/***************************************************************************** + ** Generic plGLDeviceRef Functions ** + *****************************************************************************/ +plMetalDeviceRef::plMetalDeviceRef() + : fNext(), + fBack() +{ +} + +plMetalDeviceRef::~plMetalDeviceRef() +{ + if (fNext != nullptr || fBack != nullptr) + Unlink(); +} + +void plMetalDeviceRef::Unlink() +{ + hsAssert(fBack, "plGLDeviceRef not in list"); + + if (fNext) + fNext->fBack = fBack; + *fBack = fNext; + + fBack = nullptr; + fNext = nullptr; +} + +uint32_t plMetalBufferPoolRef::fFrameTime(0); + +void plMetalDeviceRef::Link(plMetalDeviceRef **back) +{ + hsAssert(fNext == nullptr && fBack == nullptr, "Trying to link a plMetalDeviceRef that's already linked"); + + fNext = *back; + if (*back) + (*back)->fBack = &fNext; + fBack = back; + *back = this; +} + +/***************************************************************************** + ** Vertex buffer cleanup Functions ** + *****************************************************************************/ + +plMetalVertexBufferRef::~plMetalVertexBufferRef() +{ + delete fData; + Release(); +} + +void plMetalVertexBufferRef::Release() +{ + SetDirty(true); +} + +/***************************************************************************** + ** Index buffer cleanup Functions ** + *****************************************************************************/ + +plMetalIndexBufferRef::~plMetalIndexBufferRef() +{ + Release(); +} + +void plMetalIndexBufferRef::Release() +{ + SetDirty(true); +} + +/***************************************************************************** + ** Texture cleanup Functions ** + *****************************************************************************/ + +void plMetalTextureRef::Release() +{ + if (fTexture) { + fTexture->release(); + fTexture = nullptr; + } + SetDirty(true); +} + +plMetalTextureRef::~plMetalTextureRef() +{ + Release(); + + if (fNext != nullptr || fBack != nullptr) + Unlink(); +} + +/***************************************************************************** + ** FrameBuffer cleanup Functions ** + *****************************************************************************/ + +plMetalRenderTargetRef::~plMetalRenderTargetRef() +{ + Release(); +} + +void plMetalRenderTargetRef::Release() +{ + if (fDepthBuffer) { + fDepthBuffer->release(); + fDepthBuffer = nullptr; + } + plMetalTextureRef::Release(); + SetDirty(true); +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm new file mode 100644 index 0000000000..eaef5f157e --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm @@ -0,0 +1,95 @@ + +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include "HeadSpin.h" + +#include + +#include + +#include +#include "plMetalPipeline.h" + +void plMetalEnumerate::Enumerate(std::vector& records) +{ + // For now - just use the default device. If there is a high power discrete device - this will + // spin it up. This will also automatically pin us to an eGPU if present and the user has + // configured us to use it. + MTL::Device* device = MTL::CreateSystemDefaultDevice(); + + if (device) { + hsG3DDeviceRecord devRec; + devRec.SetG3DDeviceType(hsG3DDeviceSelector::kDevTypeMetal); + devRec.SetDriverName("Metal"); + devRec.SetDeviceDesc(device->name()->utf8String()); + // Metal has ways to query capabilities, but doesn't expose a flat version + // Populate with the OS version + @autoreleasepool { + NSProcessInfo* processInfo = [NSProcessInfo processInfo]; + NSOperatingSystemVersion version = processInfo.operatingSystemVersion; + NSString* versionString = + [NSString stringWithFormat:@"%li.%li.%li", (long)version.majorVersion, + (long)version.minorVersion, version.patchVersion]; + devRec.SetDriverVersion([versionString cStringUsingEncoding:NSUTF8StringEncoding]); + } + devRec.SetDriverDesc(device->name()->utf8String()); + + devRec.SetCap(hsG3DDeviceSelector::kCapsMipmap); + devRec.SetCap(hsG3DDeviceSelector::kCapsPerspective); + devRec.SetCap(hsG3DDeviceSelector::kCapsCompressTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsDoesSmallTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsPixelShader); + devRec.SetCap(hsG3DDeviceSelector::kCapsHardware); + + devRec.SetLayersAtOnce(8); + + // Just make a fake mode so the device selector will let it through + hsG3DDeviceMode devMode; + devMode.SetWidth(hsG3DDeviceSelector::kDefaultWidth); + devMode.SetHeight(hsG3DDeviceSelector::kDefaultHeight); + devMode.SetColorDepth(hsG3DDeviceSelector::kDefaultDepth); + devRec.GetModes().emplace_back(devMode); + + records.emplace_back(devRec); + } +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp new file mode 100644 index 0000000000..8daad2b76e --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp @@ -0,0 +1,75 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "plMetalFragmentShader.h" + +#include + +#include "HeadSpin.h" +#include "hsWindows.h" +#include "plDrawable/plGBufferGroup.h" +#include "plMetalPipeline.h" +#include "plSurface/plShader.h" + +plMetalFragmentShader::plMetalFragmentShader(plShader* owner) + : plMetalShader(owner) +{ +} + +plMetalFragmentShader::~plMetalFragmentShader() +{ + Release(); +} + +void plMetalFragmentShader::Release() +{ + fPipe = nullptr; +} + +bool plMetalFragmentShader::ISetConstants(plMetalPipeline* pipe) +{ + if (fOwner->GetNumConsts()) { + float* ptr = (float*)fOwner->GetConstBasePtr(); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(simd_float4), VertexShaderArgumentMaterialShaderUniforms); + } + + return true; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h new file mode 100644 index 0000000000..c0b9c9ac1a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h @@ -0,0 +1,64 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plMetalFragmentShader_inc +#define plMetalFragmentShader_inc + +#include "plMetalShader.h" + +class plShader; +class plMetalPipeline; + +class plMetalFragmentShader : public plMetalShader +{ +protected: +public: + bool ISetConstants(plMetalPipeline* pipe) override; // On error, sets error string. + plMetalFragmentShader(plShader* owner); + virtual ~plMetalFragmentShader(); + + void Release() override; + void Link(plMetalFragmentShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalFragmentShader* const GetNext() const { return (plMetalFragmentShader*)fNext; } +}; + +#endif // plMetalFragmentShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp new file mode 100644 index 0000000000..725eb9eb74 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -0,0 +1,539 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include "plMetalMaterialShaderRef.h" + +#include "HeadSpin.h" +#include "hsBitVector.h" +#include "hsGMatState.inl" +#include "plPipeline.h" + +#include "plMetalDevice.h" +#include "plMetalPipeline.h" + +#include "plDrawable/plGBufferGroup.h" +#include "plGImage/plCubicEnvironmap.h" +#include "plGImage/plMipmap.h" +#include "plPipeDebugFlags.h" +#include "plPipeline/plCubicRenderTarget.h" +#include "plPipeline/plRenderTarget.h" +#include "plSurface/hsGMaterial.h" +#include "plSurface/plLayerInterface.h" + +#include +#include + +plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe) : fPipeline(pipe), + fMaterial(mat), + fFragFunction(), + fNumPasses() +{ + fDevice = pipe->fDevice.fMetalDevice; + fFragFunction = pipe->fFragFunction; + CheckMateralRef(); +} + +plMetalMaterialShaderRef::~plMetalMaterialShaderRef() +{ + Release(); +} + +void plMetalMaterialShaderRef::Release() +{ + for (auto& buffer : fPassArgumentBuffers) { + buffer->release(); + buffer = nullptr; + } + fPassArgumentBuffers.clear(); + + fNumPasses = 0; +} + +void plMetalMaterialShaderRef::CheckMateralRef() +{ + if (IsDirty()) { + /* + Something (like avatars) might have modified our textures. + If we're dirty - clear all cached state. + */ + fNumPasses = 0; + fPassIndices.clear(); + fPassLengths.clear(); + fFragmentShaderDescriptions.clear(); + + for (MTL::Buffer* buffer : fPassArgumentBuffers) { + buffer->release(); + } + fPassArgumentBuffers.clear(); + } + if (fNumPasses == 0) { + ILoopOverLayers(); + + for (size_t i = 0; i < fMaterial->GetNumLayers(); i++) { + plLayerInterface* layer = fMaterial->GetLayer(i); + if (!layer) { + continue; + } + + fPipeline->CheckTextureRef(layer); + } + } + SetDirty(false); +} + +// fast encode doesn't support piggybacks or push over layers, but it does use preloaded data on the GPU so it's much faster. Use this encoder if there are no piggybacks or pushover layers +void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass) +{ + /* + NOTE: + This code is all that remains of the UBO path - which has slowly been cut down + by piggybacks interfering with UBOs, and a lot of uniforms moving into precompiled + sections of the shaders. + + plMetalFragmentShaderArgumentBuffer literally just has one float left - which could + be factored out. The only reason this code hasn't been deleted is because plates + still relies on it - but plates also needs to be updated anyway. + + UBOs in theory are more efficient. So we either need to figure out how to do UBOs + or finally delete this code for good. + */ + for (uint32_t i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { + plLayerInterface* layer = fMaterial->GetLayer(i); + + if (!layer) { + continue; + } + + fPipeline->CheckTextureRef(layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + continue; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + // if (!texRef->fTexture) { + // continue; + // } + + hsAssert(i - GetPassIndex(pass) >= 0, "Bad pass index during encode"); + EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); + IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer); + } + + encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, FragmentShaderArgumentUniforms); +} + +void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encoder, + VertexUniforms* vertexUniforms, + const uint pass, + plMetalFragmentShaderDescription* passDescription, + std::vector* piggyBacks, + const std::function preEncodeTransform, + const std::function postEncodeTransform) +{ + std::vector layers = GetLayersForPass(pass); + + if (piggyBacks) { + layers.insert(layers.end(), piggyBacks->begin(), piggyBacks->end()); + } + + plMetalFragmentShaderArgumentBuffer uniforms; + + IHandleMaterial( + GetPassIndex(pass), passDescription, &uniforms, piggyBacks, + [this, &preEncodeTransform, &encoder, &pass, &vertexUniforms](plLayerInterface* layer, uint32_t index) { + layer = preEncodeTransform(layer, index); + IBuildLayerTexture(encoder, index, layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + assert(index - GetPassIndex(pass) >= 0); + EncodeTransform(layer, &vertexUniforms->uvTransforms[index]); + + return layer; + }, + [&postEncodeTransform](plLayerInterface* layer, uint32_t index) { + return postEncodeTransform(layer, index); + } + ); + + encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), FragmentShaderArgumentUniforms); +} + +void plMetalMaterialShaderRef::EncodeTransform(const plLayerInterface* layer, UVOutDescriptor* transform) +{ + matrix_float4x4 tXfm; + hsMatrix2SIMD(layer->GetTransform(), &tXfm); + transform->transform = tXfm; + transform->UVWSrc = layer->GetUVWSrc(); +} + +// This is old - supporting the plate code. +// FIXME: Replace the plate codes path to texturing +void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder* encoder, uint pass) +{ + plLayerInterface* layer = fMaterial->GetLayer(pass); + if (!layer) { + return; + } + fPipeline->CheckTextureRef(layer); + + // Load the image + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + return; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + return; + } + + if (plCubicEnvironmap::ConvertNoRef(layer->GetTexture()) != nullptr) { + } else if (plMipmap::ConvertNoRef(layer->GetTexture()) != nullptr || plRenderTarget::ConvertNoRef(layer->GetTexture()) != nullptr) { + encoder->setFragmentTexture(texRef->fTexture, FragmentShaderArgumentTexture); + } +} + +void plMetalMaterialShaderRef::ILoopOverLayers() +{ + uint32_t pass = 0; + + for (uint32_t j = 0; j < fMaterial->GetNumLayers();) { + uint32_t currLayer = j; + + // Create "fast encode" buffers + // Fast encode can be used when there are no piggybacks or pushover layers. We'll load as much of the + // base state of this layer as we can onto the GPU. Using fast encode, the renderer can avoid encoding + // a lot of the render state, it will be on the GPU already. + // I'd like to encode more data here, and use a heap. The heap hasn't happened yet because heaps are + // private memory, and we don't have a window yet for a blit phase into private memory. + MTL::Buffer* argumentBuffer = fDevice->newBuffer(sizeof(plMetalFragmentShaderArgumentBuffer), MTL::ResourceStorageModeManaged); + + plMetalFragmentShaderArgumentBuffer* layerBuffer = (plMetalFragmentShaderArgumentBuffer*)argumentBuffer->contents(); + + plMetalFragmentShaderDescription passDescription; + + j = IHandleMaterial( + currLayer, &passDescription, layerBuffer, nullptr, + [](plLayerInterface* layer, uint32_t index) { + return layer; + }, + [](plLayerInterface* layer, uint32_t index) { + return layer; + } + ); + + if (j == -1) + break; + + passDescription.CacheHash(); + fFragmentShaderDescriptions.push_back(passDescription); + + std::vector layers(j); + + pass++; + + // encode the colors for this pass into our buffer for fast rendering + for (int layerOffset = 0; layerOffset < j - currLayer; layerOffset++) { + plLayerInterface* layer = fMaterial->GetLayer(currLayer + layerOffset); + layers[layerOffset] = layer; + IBuildLayerTexture(nullptr, layerOffset, layer); + } + + fPasses.push_back(layers); + + argumentBuffer->didModifyRange(NS::Range(0, argumentBuffer->length())); + + fPassArgumentBuffers.push_back(argumentBuffer); + + fPassIndices.push_back(currLayer); + fPassLengths.push_back(j - currLayer); + fNumPasses++; + +#if 0 + ISetFogParameters(fMaterial->GetLayer(iCurrMat)); +#endif + } +} + +const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerInterface* layer) const +{ + hsGMatState state; + state.Composite(layer->GetState(), fPipeline->GetMaterialOverride(true), fPipeline->GetMaterialOverride(false)); + return state; +} + +void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, const uint32_t offsetFromRootLayer, plLayerInterface* layer) +{ + // Reminder: Encoder is allowed to be null when Plasma is precompiling pipeline states + // Metal needs to know if a shader is 2D or Cubic to compile shaders + // A null encoder signifies we should build the texture but not bind state + + fPipeline->CheckTextureRef(layer); + plBitmap* texture = layer->GetTexture(); + + if (texture != nullptr && encoder) { + plMetalTextureRef* deviceTexture = (plMetalTextureRef*)texture->GetDeviceRef(); + if (!deviceTexture) { + // FIXME: Better way to address missing textures than null pointers + encoder->setFragmentTexture(nullptr, FragmentShaderArgumentAttributeCubicTextures + offsetFromRootLayer); + encoder->setFragmentTexture(nullptr, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); + return; + } + hsAssert(offsetFromRootLayer <= 8, "Too many layers requested"); + if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { + encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeCubicTextures + offsetFromRootLayer); + } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { + encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); + } + + if (fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag != layer->GetClampFlags()) { + MTL::SamplerState* samplerState = fPipeline->fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(layer->GetClampFlags())); + encoder->setFragmentSamplerState(samplerState, offsetFromRootLayer); + + fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag = hsGMatState::hsGMatClampFlags(layer->GetClampFlags()); + } + } +} + +uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) +{ + uint32_t currNumLayers = 1; + + plLayerInterface* lay = fMaterial->GetLayer(which); + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagNoMultitexture)) { + return currNumLayers; + } + + if ((fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (lay->GetMiscFlags() & hsGMatState::kMiscBumpChans)) { + currNumLayers = 2; + return currNumLayers; + } + + if ((lay->GetBlendFlags() & hsGMatState::kBlendNoColor) || + (lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner)) { + return currNumLayers; + } + + uint32_t maxLayers = 8; + if (which + maxLayers > fMaterial->GetNumLayers()) { + maxLayers = uint32_t(fMaterial->GetNumLayers()) - which; + } + + for (uint32_t i = currNumLayers; i < maxLayers; i++) { + plLayerInterface* lay = fMaterial->GetLayer(which + i); + + // Ignoring max UVW limit + + if ((lay->GetMiscFlags() & hsGMatState::kMiscBindNext) && (i + 1 >= maxLayers)) { + break; + } + + if (lay->GetMiscFlags() & hsGMatState::kMiscRestartPassHere) { + break; + } + + if (!(fMaterial->GetLayer(which + i - 1)->GetMiscFlags() & hsGMatState::kMiscBindNext) && !ICanEatLayer(lay)) { + break; + } + + currNumLayers++; + } + + return currNumLayers; +} + +bool plMetalMaterialShaderRef::ICanEatLayer(plLayerInterface* lay) +{ + if (!lay->GetTexture()) { + return false; + } + + if ((lay->GetBlendFlags() & hsGMatState::kBlendNoColor) || + (lay->GetBlendFlags() & hsGMatState::kBlendAddColorTimesAlpha) || + (lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner)) { + return false; + } + + if ((lay->GetBlendFlags() & hsGMatState::kBlendAlpha) && (lay->GetAmbientColor().a < 1.f)) { + return false; + } + + if (!(lay->GetZFlags() & hsGMatState::kZNoZWrite)) { + return false; + } + + return true; +} + +uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, + plMetalFragmentShaderDescription* passDescription, + plMetalFragmentShaderArgumentBuffer* uniforms, + std::vector* piggybacks, + const std::function& preEncodeTransform, + const std::function& postEncodeTransform) +{ + if (!fMaterial || layer >= fMaterial->GetNumLayers() || !fMaterial->GetLayer(layer)) { + return -1; + } + + if (false /*ISkipBumpMap(fMaterial, layer)*/) { + return -1; + } + + static_assert(std::is_trivial_v, "plMetalFragmentShaderDescription must be a POD type!"); + memset(passDescription, 0, sizeof(plMetalFragmentShaderDescription)); + + // Ignoring the bit about ATI Radeon and UVW limits + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagNoDecals) && (fMaterial->GetCompositeFlags() & hsGMaterial::kCompDecal)) { + return -1; + } + + // Ignoring the bit about self-rendering cube maps + + plLayerInterface* currLay = fMaterial->GetLayer(layer); + currLay = preEncodeTransform(currLay, 0); + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (currLay->GetMiscFlags() & hsGMatState::kMiscBumpDu)) { + currLay = fMaterial->GetLayer(++layer); + } + + // currLay = IPushOverAllLayer(currLay); + + hsGMatState state = ICompositeLayerState(currLay); + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagDisableSpecular)) { + state.fShadeFlags &= ~hsGMatState::kShadeSpecular; + } + + // Stuff about ZInc + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagNoAlphaBlending)) { + state.fBlendFlags &= ~hsGMatState::kBlendMask; + } + + if ((fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (state.fMiscFlags & hsGMatState::kMiscBumpChans)) { + switch (state.fMiscFlags & hsGMatState::kMiscBumpChans) { + case hsGMatState::kMiscBumpDu: + break; + case hsGMatState::kMiscBumpDv: + if (!(fMaterial->GetLayer(layer - 2)->GetBlendFlags() & hsGMatState::kBlendAdd)) { + state.fBlendFlags &= ~hsGMatState::kBlendMask; + state.fBlendFlags |= hsGMatState::kBlendMADD; + } + break; + case hsGMatState::kMiscBumpDw: + if (!(fMaterial->GetLayer(layer - 1)->GetBlendFlags() & hsGMatState::kBlendAdd)) { + state.fBlendFlags &= ~hsGMatState::kBlendMask; + state.fBlendFlags |= hsGMatState::kBlendMADD; + } + break; + default: + break; + } + } + + uint32_t currNumLayers = ILayersAtOnce(layer); + + if (state.fMiscFlags & (hsGMatState::kMiscBumpDu | hsGMatState::kMiscBumpDw)) { + // ISetBumpMatrices(currLay); + } + + passDescription->Populate(currLay, 0); + + postEncodeTransform(currLay, 0); + + int32_t i = 1; + for (i = 1; i < currNumLayers; i++) { + plLayerInterface* layPtr = fMaterial->GetLayer(layer + i); + if (!layPtr) { + return -1; + } + layPtr = preEncodeTransform(layPtr, i); + + passDescription->Populate(layPtr, i); + + layPtr = postEncodeTransform(layPtr, i); + } + + if (piggybacks) { + for (int32_t currPiggyback = 0; currPiggyback < piggybacks->size(); currPiggyback++) { + plLayerInterface* layPtr = piggybacks->at(currPiggyback); + if (!layPtr) { + return -1; + } + layPtr = preEncodeTransform(layPtr, i + currPiggyback); + + passDescription->Populate(layPtr, i + currPiggyback); + + layPtr = postEncodeTransform(layPtr, i + currPiggyback); + } + } + + passDescription->fNumLayers = (piggybacks ? piggybacks->size() : 0) + currNumLayers; + + if (state.fBlendFlags & (hsGMatState::kBlendTest | hsGMatState::kBlendAlpha | hsGMatState::kBlendAddColorTimesAlpha) && + !(state.fBlendFlags & hsGMatState::kBlendAlphaAlways)) { + // AlphaTestHigh is used for reducing sort artifacts on textures that + // are mostly opaque or transparent, but have regions of translucency + // in transition. Like a texture for a bush billboard. It lets there be + // some transparency falloff, but quit drawing before it gets so + // transparent that draw order problems (halos) become apparent. + if (state.fBlendFlags & hsGMatState::kBlendAlphaTestHigh) { + uniforms->alphaThreshold = 64.f / 255.f; + } else { + uniforms->alphaThreshold = 1.f / 255.f; + } + } else { + uniforms->alphaThreshold = 0.f; + } + + return layer + currNumLayers; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h new file mode 100644 index 0000000000..ae643e56cb --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -0,0 +1,128 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalMaterialShaderRef_inc_ +#define _plMetalMaterialShaderRef_inc_ + +#include +#include + +#include "ShaderTypes.h" +#include "hsGMatState.h" +#include "plMetalDeviceRef.h" +#include "plMetalPipelineState.h" + +class hsGMaterial; +class plMetalPipeline; +class plLayerInterface; + +class plMetalMaterialShaderRef : public plMetalDeviceRef +{ +protected: + plMetalPipeline* fPipeline; + hsGMaterial* fMaterial; + // temporary holder for the fragment shader to use, we don't own this reference + MTL::Function* fFragFunction; + +private: + std::vector fPassIndices; + // FIXME: This should be retained/released + MTL::Device* fDevice; + std::vector fPassArgumentBuffers; + +public: + void Link(plMetalMaterialShaderRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalMaterialShaderRef* GetNext() const { return (plMetalMaterialShaderRef*)fNext; } + + plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe); + ~plMetalMaterialShaderRef(); + + void Release() override; + void CheckMateralRef(); + + uint32_t GetNumPasses() const { return fNumPasses; } + + uint32_t GetPassIndex(const size_t which) const { return fPassIndices[which]; } + const std::vector GetLayersForPass(size_t pass) const { return fPasses[pass]; } + + void EncodeArguments(MTL::RenderCommandEncoder* encoder, + VertexUniforms* vertexUniforms, + const uint pass, + plMetalFragmentShaderDescription* passDescription, + std::vector* piggyBacks, + const std::function preEncodeTransform, + const std::function postEncodeTransform); + void FastEncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass); + // probably not a good idea to call prepareTextures directly + // mostly just a hack to keep plates working for now + void prepareTextures(MTL::RenderCommandEncoder* encoder, uint pass); + std::vector fPassLengths; + + // Set the current Plasma state based on the input layer state and the material overrides. + // fMatOverOn overrides to set a state bit whether it is set in the layer or not. + // fMatOverOff overrides to clear a state bit whether it is set in the layer or not.s + const hsGMatState ICompositeLayerState(const plLayerInterface* layer) const; + + const struct plMetalFragmentShaderDescription GetFragmentShaderDescription(size_t which) const + { + return fFragmentShaderDescriptions[which]; + } + +private: + void ILoopOverLayers(); + + uint32_t fNumPasses; + uint32_t IHandleMaterial(uint32_t layer, + plMetalFragmentShaderDescription* passDescription, + plMetalFragmentShaderArgumentBuffer* uniforms, + std::vector* piggybacks, + const std::function& preEncodeTransform, + const std::function& postEncodeTransform); + bool ICanEatLayer(plLayerInterface* lay); + uint32_t ILayersAtOnce(uint32_t which); + + void IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, const uint32_t offsetFromRootLayer, plLayerInterface* layer); + void EncodeTransform(const plLayerInterface* layer, UVOutDescriptor *transform); + std::vector> fPasses; + std::vector fFragmentShaderDescriptions; +}; + +#endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp new file mode 100644 index 0000000000..59aebe384a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -0,0 +1,4382 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "plMetalPipeline.h" + +#import + +#include +#include + +#include "HeadSpin.h" +#include "hsGMatState.inl" +#include "hsTimer.h" + +#include "pfCamera/plVirtualCamNeu.h" +#include "plAvatar/plAvatarClothing.h" +#include "plDrawable/plAuxSpan.h" +#include "plDrawable/plDrawableSpans.h" +#include "plDrawable/plGBufferGroup.h" +#include "plGImage/plCubicEnvironmap.h" +#include "plGImage/plMipmap.h" +#include "plGLight/plLightInfo.h" +#include "plGLight/plShadowCaster.h" +#include "plGLight/plShadowSlave.h" +#include "plMessage/plDeviceRecreateMsg.h" +#include "plMetalFragmentShader.h" +#include "plMetalMaterialShaderRef.h" +#include "plMetalPipelineState.h" +#include "plMetalPlateManager.h" +#include "plMetalTextFont.h" +#include "plMetalVertexShader.h" +#include "plPipeDebugFlags.h" +#include "plPipeResReq.h" +#include "plPipeline/plCubicRenderTarget.h" +#include "plPipeline/plDebugText.h" +#include "plPipeline/plDynamicEnvMap.h" +#include "plProfile.h" +#include "plQuality.h" +#include "plScene/plRenderRequest.h" +#include "plSurface/hsGMaterial.h" +#include "plSurface/plLayer.h" +#include "plSurface/plLayerShadowBase.h" +#include "plTweak.h" +#include "plgDispatch.h" + +#include "pnMessage/plPipeResMakeMsg.h" +#include "pnNetCommon/plNetApp.h" // for dbg logging + +uint32_t fDbgSetupInitFlags; // HACK temp only + +plProfile_CreateCounter("Feed Triangles", "Draw", DrawFeedTriangles); +plProfile_CreateCounter("Draw Prim Static", "Draw", DrawPrimStatic); +plProfile_CreateMemCounter("Total Texture Size", "Draw", TotalTexSize); +plProfile_CreateCounter("Layer Change", "Draw", LayChange); +plProfile_Extern(DrawTriangles); +plProfile_Extern(MatChange); + +plProfile_CreateTimer("PrepShadows", "PipeT", PrepShadows); +plProfile_CreateTimer("PrepDrawable", "PipeT", PrepDrawable); +plProfile_CreateTimer(" Skin", "PipeT", Skin); +plProfile_CreateTimer("RenderSpan", "PipeT", RenderSpan); +plProfile_CreateTimer(" MergeCheck", "PipeT", MergeCheck); +plProfile_CreateTimer(" MergeSpan", "PipeT", MergeSpan); +plProfile_CreateTimer(" SpanTransforms", "PipeT", SpanTransforms); +plProfile_CreateTimer(" SpanFog", "PipeT", SpanFog); +plProfile_CreateTimer(" SelectLights", "PipeT", SelectLights); +plProfile_CreateTimer(" SelectProj", "PipeT", SelectProj); +plProfile_CreateTimer(" CheckDyn", "PipeT", CheckDyn); +plProfile_CreateTimer(" CheckStat", "PipeT", CheckStat); +plProfile_CreateTimer(" RenderBuff", "PipeT", RenderBuff); +plProfile_CreateTimer(" RenderPrim", "PipeT", RenderPrim); +plProfile_CreateTimer("PlateMgr", "PipeT", PlateMgr); +plProfile_CreateTimer("DebugText", "PipeT", DebugText); +plProfile_CreateTimer("Reset", "PipeT", Reset); + +plProfile_CreateCounterNoReset("Reload", "PipeC", PipeReload); +plProfile_CreateCounter("AvRTPoolUsed", "PipeC", AvRTPoolUsed); +plProfile_CreateCounter("AvRTPoolCount", "PipeC", AvRTPoolCount); +plProfile_CreateCounter("AvRTPoolRes", "PipeC", AvRTPoolRes); +plProfile_CreateCounter("AvRTShrinkTime", "PipeC", AvRTShrinkTime); +plProfile_CreateCounter("NumSkin", "PipeC", NumSkin); + +plMetalEnumerate plMetalPipeline::enumerator; + +class plRenderTriListFunc : public plRenderPrimFunc +{ +protected: + plMetalDevice* fDevice; + int fBaseVertexIndex; + int fVStart; + int fVLength; + int fIStart; + int fNumTris; + +public: + plRenderTriListFunc(plMetalDevice* device, int baseVertexIndex, + int vStart, int vLength, int iStart, int iNumTris) + : fDevice(device), + fBaseVertexIndex(baseVertexIndex), + fVStart(vStart), + fVLength(vLength), + fIStart(iStart), + fNumTris(iNumTris) {} + + bool RenderPrims() const override; +}; + +bool plRenderTriListFunc::RenderPrims() const +{ + plProfile_IncCount(DrawFeedTriangles, fNumTris); + plProfile_IncCount(DrawTriangles, fNumTris); + plProfile_Inc(DrawPrimStatic); + + size_t uniformsSize = offsetof(VertexUniforms, uvTransforms) + sizeof(UVOutDescriptor) * fDevice->fPipeline->fCurrNumLayers; + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); + + plMetalLights* lights = &fDevice->fPipeline->fLights; + size_t lightSize = offsetof(plMetalLights, lampSources) + (sizeof(plMetalShaderLightSource) * lights->count); + + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(lights, sizeof(plMetalLights), VertexShaderArgumentLights); + fDevice->CurrentRenderCommandEncoder()->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, fNumTris, MTL::IndexTypeUInt16, fDevice->fCurrentIndexBuffer, (sizeof(uint16_t) * fIStart)); +} + +plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord* devMode) : pl3DPipeline(devMode), + fRenderTargetRefList(), + fMatRefList(), + fCurrentRenderPassUniforms(), + fFragFunction(), + fVShaderRefList(), + fPShaderRefList(), + fULutTextureRef(), + fCurrRenderLayer() +{ + fTextureRefList = nullptr; + fVtxBuffRefList = nullptr; + fIdxBuffRefList = nullptr; + fMatRefList = nullptr; + fTextFontRefList = nullptr; + + fCurrLayerIdx = 0; + fDevice.fPipeline = this; + + fMaxLayersAtOnce = devMode->GetDevice()->GetLayersAtOnce(); + + fIsFullscreen = !fInitialPipeParams.Windowed; + + fDevice.SetOutputLayer(static_cast(window)); + // For now - set this once at startup. If the underlying device is allow to change on + // the fly (eGPU, display change, etc) - revisit. + fDevice.GetOutputLayer()->setDevice(fDevice.fMetalDevice); + + // Default our output format to 8 bit BGRA. Client may immediately change this to + // the actual framebuffer format. + SetFramebufferFormat(MTL::PixelFormatBGRA8Unorm); + + // Alloc half our simultaneous textures to piggybacks. + // Won't hurt us unless we try to many things at once. + fMaxPiggyBacks = fMaxLayersAtOnce >> 1; + + // Metal is always PS3 capable + plQuality::SetCapability(plQuality::kPS_3); + + fDevice.SetMaxAnsiotropy(fInitialPipeParams.AnisotropicLevel); + fDevice.SetMSAASampleCount(fInitialPipeParams.AntiAliasingAmount); + + fCurrentRenderPassUniforms = (VertexUniforms*)calloc(sizeof(VertexUniforms), sizeof(char)); + + // RenderTarget pools are shared for our shadow generation algorithm. + // Different sizes for different resolutions. + ICreateDeviceObjects(); + ICreateDynDeviceObjects(); + IMakeRenderTargetPools(); +} + +plMetalPipeline::~plMetalPipeline() +{ + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) { + pm->IReleaseGeometry(); + } +} + +void plMetalPipeline::ICreateDeviceObjects() +{ + fPlateMgr = new plMetalPlateManager(this); +} + +bool plMetalPipeline::PreRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr) +{ + plDrawableSpans* ds = plDrawableSpans::ConvertNoRef(drawable); + if (!ds) { + return false; + } + + if ((ds->GetType() & fView.GetDrawableTypeMask()) == 0) { + return false; + } + + fView.GetVisibleSpans(ds, visList, visMgr); + + return visList.size() > 0; +} + +bool plMetalPipeline::PrepForRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr) +{ + plProfile_BeginTiming(PrepDrawable); + + plDrawableSpans* ice = plDrawableSpans::ConvertNoRef(drawable); + if (!ice) { + plProfile_EndTiming(PrepDrawable); + return false; + } + + // Find our lights + ICheckLighting(ice, visList, visMgr); + + // Sort our faces + if (ice->GetNativeProperty(plDrawable::kPropSortFaces)) { + ice->SortVisibleSpans(visList, this); + } + + // Prep for render. This is gives the drawable a chance to + // do any last minute updates for its buffers, including + // generating particle tri lists. + ice->PrepForRender(this); + + // Any skinning necessary + if (!ISoftwareVertexBlend(ice, visList)) { + plProfile_EndTiming(PrepDrawable); + return false; + } + + plProfile_EndTiming(PrepDrawable); + + return true; +} + +plTextFont* plMetalPipeline::MakeTextFont(ST::string face, uint16_t size) +{ + plTextFont* font = new plMetalTextFont(this, &fDevice); + font->Create(face, size); + font->Link(&fTextFontRefList); + return font; +} + +bool plMetalPipeline::OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) +{ + // FIXME: What's this? + // Hoikas: It's for runtime reading/writing the vertices, mostly used by stuff like dynamic decals. + return false; +} + +bool plMetalPipeline::CloseAccess(plAccessSpan& acc) { return false; } + +void plMetalPipeline::PushRenderRequest(plRenderRequest* req) +{ + // Save these, since we want to copy them to our current view + hsMatrix44 l2w = fView.GetLocalToWorld(); + hsMatrix44 w2l = fView.GetWorldToLocal(); + + plFogEnvironment defFog = fView.GetDefaultFog(); + + fViewStack.push(fView); + + SetViewTransform(req->GetViewTransform()); + + PushRenderTarget(req->GetRenderTarget()); + fView.fRenderState = req->GetRenderState(); + + fView.fRenderRequest = req; + hsRefCnt_SafeRef(fView.fRenderRequest); + + SetDrawableTypeMask(req->GetDrawableMask()); + SetSubDrawableTypeMask(req->GetSubDrawableMask()); + + float depth = req->GetClearDepth(); + fView.SetClear(&req->GetClearColor(), &depth); + + if (req->GetOverrideMat()) { + PushOverrideMaterial(req->GetOverrideMat()); + } + + // Set from our saved ones... + fView.SetWorldToLocal(w2l); + fView.SetLocalToWorld(l2w); + + RefreshMatrices(); + + if (req->GetIgnoreOccluders()) { + fView.SetMaxCullNodes(0); + } + + fState.Reset(); +} + +void plMetalPipeline::PopRenderRequest(plRenderRequest* req) +{ + if (req->GetOverrideMat()) { + PopOverrideMaterial(nil); + } + + // new render target means we can't use the previous pipeline state + // it won't be set yet on the new target + // in theory we could have a stack of these so when we unwind we + // could get the state back. + fState.Reset(); + + hsRefCnt_SafeUnRef(fView.fRenderRequest); + fView = fViewStack.top(); + fViewStack.pop(); + + PopRenderTarget(); + fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera; +} + +plRenderTarget* plMetalPipeline::PopRenderTarget() +{ + pl3DPipeline::PopRenderTarget(); + fState.Reset(); +} + +void plMetalPipeline::ClearRenderTarget(plDrawable* d) +{ + plDrawableSpans* src = plDrawableSpans::ConvertNoRef(d); + + if (!src) { + ClearRenderTarget(); + return; + } + + Draw(d); +} + +void plMetalPipeline::ClearRenderTarget(const hsColorRGBA* col, const float* depth) +{ + if (fView.fRenderState & (kRenderClearColor | kRenderClearDepth)) { + hsColorRGBA clearColor = col ? *col : GetClearColor(); + float clearDepth = depth ? *depth : fView.GetClearDepth(); + fDevice.Clear(fView.fRenderState & kRenderClearColor, { clearColor.r, clearColor.g, clearColor.b, clearColor.a }, fView.fRenderState & kRenderClearDepth, 1.0); + fState.Reset(); + } +} + +hsGDeviceRef* plMetalPipeline::MakeRenderTargetRef(plRenderTarget* owner) +{ + plMetalRenderTargetRef* ref = nullptr; + MTL::Texture* depthBuffer = nullptr; + plCubicRenderTarget* cubicRT; + + // If we have Shader Model 3 and support non-POT textures, let's make reflections the pipe size + plDynamicCamMap* camMap = plDynamicCamMap::ConvertNoRef(owner); + if (camMap && camMap->IsReflection()) { + camMap->ResizeViewport(IGetViewTransform()); + } + + /// Check--is this renderTarget really a child of a cubicRenderTarget? + if (owner->GetParent()) { + /// This'll create the deviceRefs for all of its children as well + MakeRenderTargetRef(owner->GetParent()); + return owner->GetDeviceRef(); + } + + // If we already have a rendertargetref, we just need it filled out with D3D resources. + if (owner->GetDeviceRef()) + ref = (plMetalRenderTargetRef*)owner->GetDeviceRef(); + + /// Create the render target now + // Start with the depth surface. + // Note that we only ever give a cubic rendertarget a single shared depth buffer, + // since we only render one face at a time. If we were rendering part of face X, then part + // of face Y, then more of face X, then they would all need their own depth buffers. + if (owner->GetZDepth() && (owner->GetFlags() & (plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen))) { + MTL::TextureDescriptor* depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + owner->GetWidth(), + owner->GetHeight(), + false); + if (fDevice.fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { + // on Apple Silicon GPUs - don't allocate memory to back the render target + // this assumes the render target only needs to survive this render pass + // FIXME: Do we need to promise the output survives the render pass? + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + depthBuffer = fDevice.fMetalDevice->newTexture(depthTextureDescriptor); + } + + // See if it's a cubic render target. + // Primary consumer here is the vertex/pixel shader water. + cubicRT = plCubicRenderTarget::ConvertNoRef(owner); + if (cubicRT) { + if (!ref) + ref = new plMetalRenderTargetRef(); + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), false); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + + plMetalDeviceRef* device = (plMetalDeviceRef*)owner->GetDeviceRef(); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + + /// Create a CUBIC texture + for (int i = 0; i < 6; i++) { + plRenderTarget* face = cubicRT->GetFace(i); + plMetalRenderTargetRef* fRef; + + if (face->GetDeviceRef() != nullptr) { + fRef = (plMetalRenderTargetRef*)face->GetDeviceRef(); + if (!fRef->IsLinked()) + fRef->Link(&fRenderTargetRefList); + } else { + fRef = new plMetalRenderTargetRef(); + + face->SetDeviceRef(fRef); + ((plMetalRenderTargetRef*)face->GetDeviceRef())->Link(&fRenderTargetRefList); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(face->GetDeviceRef()); + } + + // in since the root texture has changed reload all the face textures + static const uint kFaceMapping[] = { + 1, // kLeftFace + 0, // kRightFace + 4, // kFrontFace + 5, // kBackFace + 2, // kTopFace + 3 // kBottomFace + }; + + if (fRef->fTexture) { + fRef->fTexture->release(); + fRef->fTexture = nullptr; + } + + if (fRef->fDepthBuffer) { + fRef->fDepthBuffer->release(); + fRef->fDepthBuffer = nullptr; + } + + fRef->fTexture = texture->newTextureView(MTL::PixelFormatBGRA8Unorm, MTL::TextureType2D, NS::Range::Make(0, 1), NS::Range::Make(kFaceMapping[i], 1)); + // in since the depth buffer is shared each render target gets their own retain + fRef->fDepthBuffer = depthBuffer->retain(); + fRef->SetDirty(false); + } + + // if the ref already has an old texture, release it + if (ref->fTexture) + ref->fTexture->release(); + if (ref->fDepthBuffer) + ref->fDepthBuffer->release(); + ref->fTexture = texture; + ref->fDepthBuffer = depthBuffer; + ref->fOwner = owner; + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + ref->SetDirty(false); + + return ref; + } else if (owner->GetFlags() & plRenderTarget::kIsTexture) { + if (!ref) + ref = new plMetalRenderTargetRef(); + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setWidth(owner->GetWidth()); + textureDescriptor->setHeight(owner->GetHeight()); + textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + + plMetalDeviceRef* device = (plMetalDeviceRef*)owner->GetDeviceRef(); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + textureDescriptor->release(); + + // if the ref already has an old texture, release it + if (ref->fTexture) + ref->fTexture->release(); + if (ref->fDepthBuffer) + ref->fDepthBuffer->release(); + ref->fTexture = texture; + ref->fDepthBuffer = depthBuffer; + ref->fOwner = owner; + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + return ref; + } + + // Not a texture either, must be a plain offscreen. + // Offscreen isn't currently used for anything. + else if (owner->GetFlags() & plRenderTarget::kIsOffscreen) { + /// Create a blank surface + + if (!ref) + ref = new plMetalRenderTargetRef(); + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setWidth(owner->GetWidth()); + textureDescriptor->setHeight(owner->GetHeight()); + textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + textureDescriptor->setStorageMode(MTL::StorageModeManaged); + + plMetalDeviceRef* device = (plMetalDeviceRef*)owner->GetDeviceRef(); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + textureDescriptor->release(); + + // if the ref already has an old texture, release it + if (ref->fTexture) + ref->fTexture->release(); + if (ref->fDepthBuffer) + ref->fDepthBuffer->release(); + ref->fTexture = texture; + ref->fDepthBuffer = depthBuffer; + ref->fOwner = owner; + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + return ref; + } + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + // Mark as not dirty so it doesn't get re-created + if (ref != nullptr) + ref->SetDirty(false); + + return ref; +} + +bool plMetalPipeline::BeginRender() +{ + // leaking is bad - create an autorelease pool to dispose + // of autoreleased Metal resources at the end of the pass + fCurrentPool = NS::AutoreleasePool::alloc()->init(); + // offset transform + RefreshScreenMatrices(); + + fState.Reset(); + + // offset transform + RefreshScreenMatrices(); + + // If this is the primary BeginRender, make sure we're really ready. + if (fInSceneDepth++ == 0) { + fDevice.BeginRender(); + + fVtxRefTime++; + plMetalBufferPoolRef::SetFrameTime(fVtxRefTime); + + // Render any shadow maps that have been submitted for this frame. + IPreprocessShadows(); + IPreprocessAvatarTextures(); + + CA::MetalLayer* outputLayer = fDevice.GetOutputLayer(); + + CA::MetalDrawable* drawable = fDevice.GetOutputLayer()->nextDrawable()->retain(); + if (!drawable) { + // no framebuffer available - abort + EndRender(); + return true; + } + fDevice.CreateNewCommandBuffer(drawable); + drawable->release(); + + /// If we have a renderTarget active, use its viewport + // FIXME: New drawables should inherit existing viewport + // fDevice.SetViewport(); + } + + fRenderCnt++; + + // Would probably rather this be an input. + fTime = hsTimer::GetSysSeconds(); + + return false; +} + +bool plMetalPipeline::EndRender() +{ + bool retVal = false; + fState.Reset(); + + if (--fInSceneDepth == 0) { + fDevice.SubmitCommandBuffer(); + + IClearShadowSlaves(); + } + + // Do this last, after we've drawn everything + // Just letting go of things we're done with for the frame. + hsRefCnt_SafeUnRef(fCurrMaterial); + fCurrMaterial = nullptr; + + for (int i = 0; i < 8; i++) { + if (fLayerRef[i]) { + hsRefCnt_SafeUnRef(fLayerRef[i]); + fLayerRef[i] = nullptr; + } + } + fCurrentPool->release(); + fCurrentPool = nullptr; + + return retVal; +} + +void plMetalPipeline::RenderScreenElements() +{ + bool reset = false; + + if (fView.HasCullProxy()) { + Draw(fView.GetCullProxy()); + } + + hsGMatState tHack = PushMaterialOverride(hsGMatState::kMisc, hsGMatState::kMiscWireFrame, false); + hsGMatState ambHack = PushMaterialOverride(hsGMatState::kShade, hsGMatState::kShadeWhite, true); + + plProfile_BeginTiming(PlateMgr); + // Plates + if (fPlateMgr) { + fPlateMgr->DrawToDevice(this); + reset = true; + } + plProfile_EndTiming(PlateMgr); + + PopMaterialOverride(ambHack, true); + PopMaterialOverride(tHack, false); + + plProfile_BeginTiming(DebugText); + /// Debug text + if (fDebugTextMgr && plDebugText::Instance().IsEnabled()) { + fDebugTextMgr->DrawToDevice(this); + reset = true; + } + plProfile_EndTiming(DebugText); + + plProfile_BeginTiming(Reset); + if (reset) { + fView.fXformResetFlags = fView.kResetAll; // Text destroys view transforms + } + plProfile_EndTiming(Reset); +} + +bool plMetalPipeline::IsFullScreen() const { return fIsFullscreen; } + +void plMetalPipeline::Resize(uint32_t width, uint32_t height) +{ + /* + Resize had a bunch of notes on the DX version about how it was an old function, replaced by ResetDisplayDevice. I'll implement it for now, but consider moving over to ResetDisplayDevice. + + This function is cheaper than resetting the entire display device though. + */ + hsMatrix44 w2c, c2w, proj; + + // Store some states that we *want* to restore back... + plViewTransform resetTransform = GetViewTransform(); + + // Destroy old + IReleaseDeviceObjects(); + IReleaseDynDeviceObjects(); + + // Reset width and height + if (width != 0 && height != 0) { + // Width and height of zero mean just recreate + fOrigWidth = width; + fOrigHeight = height; + IGetViewTransform().SetScreenSize((uint16_t)(fOrigWidth), (uint16_t)(fOrigHeight)); + resetTransform.SetScreenSize((uint16_t)(fOrigWidth), (uint16_t)(fOrigHeight)); + } else { + // Just for debug + hsStatusMessage("Recreating the pipeline...\n"); + } + + ICreateDeviceObjects(); + + // Restore states + SetViewTransform(resetTransform); + IProjectionMatrixToDevice(); + + plVirtualCam1::Refresh(); + + ICreateDynDeviceObjects(); + + /// Broadcast a message letting everyone know that we were recreated and that + /// all device-specific stuff needs to be recreated + plDeviceRecreateMsg* clean = new plDeviceRecreateMsg(this); + plgDispatch::MsgSend(clean); +} + +void plMetalPipeline::IReleaseDeviceObjects() +{ + IReleaseDynDeviceObjects(); + + delete fPlateMgr; + fPlateMgr = nullptr; +} + +void plMetalPipeline::LoadResources() +{ + hsStatusMessageF("Begin Device Reload t=%f", hsTimer::GetSeconds()); + plNetClientApp::StaticDebugMsg("Begin Device Reload"); + + if (fFragFunction == nil) { + FindFragFunction(); + } + + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) + pm->IReleaseGeometry(); + + IReleaseDynamicBuffers(); + IReleaseAvRTPool(); + + // Create all RenderTargets + plPipeRTMakeMsg* rtMake = new plPipeRTMakeMsg(this); + rtMake->Send(); + + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) + pm->ICreateGeometry(); + + // Create all POOL_DEFAULT (sorted) index buffers in the scene. + plPipeGeoMakeMsg* defMake = new plPipeGeoMakeMsg(this, true); + defMake->Send(); + + // This can be a bit of a mem hog and will use more mem if available, so + // keep it last in the POOL_DEFAULT allocs. + IFillAvRTPool(); + + // Force a create of all our static vertex buffers. + plPipeGeoMakeMsg* manMake = new plPipeGeoMakeMsg(this, false); + manMake->Send(); + + // Okay, we've done it, clear the request. + plPipeResReq::Clear(); + + plProfile_IncCount(PipeReload, 1); + + hsStatusMessageF("End Device Reload t=%f", hsTimer::GetSeconds()); + plNetClientApp::StaticDebugMsg("End Device Reload"); +} + +bool plMetalPipeline::SetGamma(float eR, float eG, float eB) +{ + uint16_t tabR[256]; + uint16_t tabG[256]; + uint16_t tabB[256]; + + tabR[0] = tabG[0] = tabB[0] = 0L; + + plConst(float) kMinE(0.1f); + if (eR > kMinE) + eR = 1.f / eR; + else + eR = 1.f / kMinE; + if (eG > kMinE) + eG = 1.f / eG; + else + eG = 1.f / kMinE; + if (eB > kMinE) + eB = 1.f / eB; + else + eB = 1.f / kMinE; + + int i; + for (i = 1; i < 256; i++) { + float orig = float(i) / 255.f; + + float gamm; + gamm = pow(orig, eR); + gamm *= float(uint16_t(-1)); + tabR[i] = uint16_t(gamm); + + gamm = pow(orig, eG); + gamm *= float(uint16_t(-1)); + tabG[i] = uint16_t(gamm); + + gamm = pow(orig, eB); + gamm *= float(uint16_t(-1)); + tabB[i] = uint16_t(gamm); + } + + SetGamma(tabR, tabG, tabB); + + return true; +} + +bool plMetalPipeline::SetGamma(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) +{ + // allocate a new buffer every time so we don't cause problems with a running render pass + if (fDevice.fGammaLUTTexture) { + fDevice.fGammaLUTTexture->release(); + fDevice.fGammaLUTTexture = nullptr; + } + + /* + Plasma has multiple types of gamma corrections it can do - and the engine reserves + the right to create any color correct LUT. Ugh. Load the LUT into a texture as 8 bit + per channel data. The Metal renderer supports up to 10 bit colors - but it can subsample + the texture to interpolate the colors in between what the LUT defines. + */ + constexpr size_t numLuts = 256; + + MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); + texDescriptor->setTextureType(MTL::TextureType1DArray); + texDescriptor->setWidth(numLuts); + texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); + texDescriptor->setArrayLength(3); + + fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); + + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 0, tabR, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 1, tabG, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 2, tabB, 0, 0); + + return true; +} + +bool plMetalPipeline::SetGamma10(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) +{ + // allocate a new buffer every time so we don't cause problems with a running render pass + if (fDevice.fGammaLUTTexture) { + fDevice.fGammaLUTTexture->release(); + fDevice.fGammaLUTTexture = nullptr; + } + + /* + Loads in a real 10 bit color LUT for fancy displays. This LUT contains + way more data - but the shader doesn't care. The shader does an x lookup + by normalized co-ordinate - not value. So the width of the texture can + vary. + */ + constexpr size_t numLuts = 1024; + + MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); + texDescriptor->setTextureType(MTL::TextureType1DArray); + texDescriptor->setWidth(numLuts); + texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); + texDescriptor->setArrayLength(3); + + fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); + + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 0, tabR, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 1, tabG, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 2, tabB, 0, 0); + + return true; +} + +bool plMetalPipeline::CaptureScreen(plMipmap* dest, bool flipVertical, uint16_t desiredWidth, uint16_t desiredHeight) +{ + // FIXME: Screen capture + // FIXME: Double fix me - wasn't this working? + return false; +} + +plMipmap* plMetalPipeline::ExtractMipMap(plRenderTarget* targ) +{ + if (plCubicRenderTarget::ConvertNoRef(targ)) + return nullptr; + + if (targ->GetPixelSize() != 32) { + hsAssert(false, "Only RGBA8888 currently implemented"); + return nullptr; + } + + plMetalRenderTargetRef* ref = (plMetalRenderTargetRef*)targ->GetDeviceRef(); + if (!ref) + return nullptr; + + const int width = targ->GetWidth(); + const int height = targ->GetHeight(); + + plMipmap* mipMap = new plMipmap(width, height, plMipmap::kARGB32Config, 1); + + uint8_t* ptr = (uint8_t*)(ref->fTexture->buffer()->contents()); + const NS::UInteger pitch = ref->fTexture->width() * 4; + + ref->fTexture->getBytes(mipMap->GetAddr32(0, 0), pitch, MTL::Region(0, 0, width, height), 0); + + const uint32_t blackOpaque = 0xff000000; + int y; + for (y = 0; y < height; y++) { + uint32_t* destPtr = mipMap->GetAddr32(0, y); + uint32_t* srcPtr = (uint32_t*)destPtr; + int x; + for (x = 0; x < width; x++) { + destPtr[x] = srcPtr[x] | blackOpaque; + } + ptr += pitch; + } + + return mipMap; +} + +void plMetalPipeline::GetSupportedDisplayModes(std::vector* res, int ColorDepth) +{ + /* + There are decisions to make here. + + Modern macOS does not support "display modes." You panel runs at native resolution at all times, + and you can over-render or under-render. But you never set the display mode of the panel, or get + the display mode of the panel. Most games have a "scale slider." + + Note: There are legacy APIs for display modes for compatibility with older software. In since + we're here writing a new renderer, lets do things the right way. The display mode APIs also have + trouble with density. I.E. a 4k display might be reported as a 2k display if the window manager is + running in a higher DPI mode. + + The basic approach should be to render at whatever the resolution of our output surface is. We're + mostly doing that now (aspect ratio doesn't adjust.) + + Ideally we should support some sort of scaling/semi dynamic renderbuffer resolution thing. But don't + mess with the window servers framebuffer size. macOS has accelerated resolution scaling like consoles + do. Use that. + */ + + std::vector supported; + CA::MetalLayer* layer = fDevice.GetOutputLayer(); + CGSize drawableSize = layer->drawableSize(); + supported.emplace_back(); + supported[0].Width = drawableSize.width; + supported[0].Height = drawableSize.height; + supported[0].ColorDepth = 32; + + *res = supported; +} + +int plMetalPipeline::GetMaxAnisotropicSamples() +{ + // Metal always supports 16. There is no device check (as far as I know.) + return 16; +} + +int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) +{ + // Metal devices may not support the full antialias range + // return the max and we'll work it out later + if (fDevice.fMetalDevice->supportsTextureSampleCount(8)) { + return 8; + } + if (fDevice.fMetalDevice->supportsTextureSampleCount(4)) { + return 4; + } + if (fDevice.fMetalDevice->supportsTextureSampleCount(2)) { + return 2; + } + return 1; +} + +void plMetalPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync) +{ + fIsFullscreen = !Windowed; + Resize(Width, Height); + fDevice.SetMaxAnsiotropy(MaxAnisotropicSamples); +} + +void plMetalPipeline::RenderSpans(plDrawableSpans* ice, const std::vector& visList) +{ + plProfile_BeginTiming(RenderSpan); + + hsMatrix44 lastL2W; + size_t i, j; + hsGMaterial* material; + const std::vector& spans = ice->GetSpanArray(); + + // plProfile_IncCount(EmptyList, !visList.GetCount()); + + /// Set this (*before* we do our TestVisibleWorld stuff...) + lastL2W.Reset(); + ISetLocalToWorld(lastL2W, lastL2W); // This is necessary; otherwise, we have to test for + // the first transform set, since this'll be identity + // but the actual device transform won't be (unless + // we do this) + + /// Loop through our spans, combining them when possible + for (i = 0; i < visList.size();) { + if (GetOverrideMaterial() != nullptr) { + material = GetOverrideMaterial(); + } else { + material = ice->GetMaterial(spans[visList[i]]->fMaterialIdx); + } + + /// It's an icicle--do our icicle merge loop + plIcicle tempIce(*((plIcicle*)spans[visList[i]])); + + // Start at i + 1, look for as many spans as we can add to tempIce + for (j = i + 1; j < visList.size(); j++) { + if (GetOverrideMaterial()) { + tempIce.fMaterialIdx = spans[visList[j]]->fMaterialIdx; + } + + plProfile_BeginTiming(MergeCheck); + if (!spans[visList[j]]->CanMergeInto(&tempIce)) { + plProfile_EndTiming(MergeCheck); + break; + } + plProfile_EndTiming(MergeCheck); + // plProfile_Inc(SpanMerge); + + plProfile_BeginTiming(MergeSpan); + spans[visList[j]]->MergeInto(&tempIce); + plProfile_EndTiming(MergeSpan); + } + + if (material != nullptr) { + // First, do we have a device ref at this index? + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); + + if (mRef == nullptr) { + mRef = new plMetalMaterialShaderRef(material, this); + material->SetDeviceRef(mRef); + } + + if (!mRef->IsLinked()) { + mRef->Link(&fMatRefList); + } + + hsGDeviceRef* vb = ice->GetVertexRef(tempIce.fGroupIdx, tempIce.fVBufferIdx); + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; + + // What do we change? + + plProfile_BeginTiming(SpanTransforms); + ISetupTransforms(ice, tempIce, lastL2W); + plProfile_EndTiming(SpanTransforms); + + // Check that the underlying buffers are ready to go. + plProfile_BeginTiming(CheckDyn); + ICheckDynBuffers(ice, ice->GetBufferGroup(tempIce.fGroupIdx), &tempIce); + plProfile_EndTiming(CheckDyn); + + plProfile_BeginTiming(CheckStat); + plGBufferGroup* grp = ice->GetBufferGroup(tempIce.fGroupIdx); + CheckVertexBufferRef(grp, tempIce.fVBufferIdx); + CheckIndexBufferRef(grp, tempIce.fIBufferIdx); + plProfile_EndTiming(CheckStat); + + // Draw this span now + IRenderBufferSpan(tempIce, + vb, + ice->GetIndexRef(tempIce.fGroupIdx, tempIce.fIBufferIdx), + material, + tempIce.fVStartIdx, tempIce.fVLength, // These are used as our accumulated range + tempIce.fIPackedIdx, tempIce.fILength); + } + + // Restart our search... + i = j; + } + + plProfile_EndTiming(RenderSpan); + /// All done! +} + +void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, hsMatrix44& lastL2W) +{ + if (span.fNumMatrices) { + if (span.fNumMatrices <= 2) { + ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal); + lastL2W = span.fLocalToWorld; + } else { + lastL2W.Reset(); + ISetLocalToWorld(lastL2W, lastL2W); + fView.fLocalToWorldLeftHanded = span.fLocalToWorld.GetParity(); + } + } else if (lastL2W != span.fLocalToWorld) { + ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal); + lastL2W = span.fLocalToWorld; + } else { + fView.fLocalToWorldLeftHanded = lastL2W.GetParity(); + } + + if (span.fNumMatrices == 2) { + matrix_float4x4 mat; + hsMatrix2SIMD(drawable->GetPaletteMatrix(span.fBaseMatrix + 1), &mat); + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), VertexShaderArgumentBlendMatrix1); + } + + fCurrentRenderPassUniforms->projectionMatrix = fDevice.fMatrixProj; + fCurrentRenderPassUniforms->worldToCameraMatrix = fDevice.fMatrixW2C; + fCurrentRenderPassUniforms->cameraToWorldMatrix = fDevice.fMatrixC2W; + fCurrentRenderPassUniforms->localToWorldMatrix = fDevice.fMatrixL2W; +} + +void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, + hsGDeviceRef* ib, hsGMaterial* material, + uint32_t vStart, uint32_t vLength, + uint32_t iStart, uint32_t iLength) +{ + if (iLength == 0) { + return; + } + + plProfile_BeginTiming(RenderBuff); + + plMetalVertexBufferRef* vRef = static_cast(vb); + plMetalIndexBufferRef* iRef = static_cast(ib); + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); + mRef->CheckMateralRef(); + + if (!vRef || !vRef->GetBuffer() || !iRef->GetBuffer()) { + plProfile_EndTiming(RenderBuff); + + hsAssert(false, ST::format("Trying to render a nil buffer pair! (Mat: {})", material->GetKeyName()).c_str()); + return; + } + + /* Index Buffer stuff and drawing */ + + plRenderTriListFunc render(&fDevice, 0, vStart, vLength, iStart, iLength); + + plProfile_EndTiming(RenderBuff); + + // Turn on this spans lights and turn off the rest. + ISelectLights(&span, mRef); + +#ifdef HS_DEBUGGING + fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string(material->GetKeyName().c_str(), NS::UTF8StringEncoding)); +#endif + + /* Vertex Buffer stuff */ + if (!vRef->GetBuffer()) { + return; + } + if (fState.fCurrentVertexBuffer != vRef->GetBuffer()) { + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); + } + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + + IPushPiggyBacks(material); + hsRefCnt_SafeAssign(fCurrMaterial, material); + uint32_t pass; + for (pass = 0; pass < mRef->GetNumPasses(); pass++) { + if (IHandleMaterialPass(material, pass, &span, vRef)) { + render.RenderPrims(); + } + + // Projection wants to do it's own lighting, push the current lighting state + // so we can keep the same light calculations on the next pass + PushCurrentLightSources(); + + plProfile_BeginTiming(SelectProj); + ISelectLights(&span, mRef, true); + plProfile_EndTiming(SelectProj); + + // Take care of projections that get applied to each pass. + if (fProjEach.size() && !(fView.fRenderState & kRenderNoProjection)) { +#ifdef HS_DEBUGGING + fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string("Render projections", NS::UTF8StringEncoding)); +#endif + IRenderProjectionEach(render, material, pass, span, vRef); +#ifdef HS_DEBUGGING + fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); +#endif + } + // Revert the light state back to what we had before projections + PopCurrentLightSources(); + + if (IsDebugFlagSet(plPipeDbg::kFlagNoUpperLayers)) + pass = mRef->GetNumPasses(); + } + + IPopPiggyBacks(); + + // Render any aux spans associated. + if (span.GetNumAuxSpans()) { + IRenderAuxSpans(span); + + // aux spans will change the current vertex buffer, put ours back + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); + } + + // Only render projections and shadows if we successfully rendered the span. + // j == -1 means we aborted render. + if (pass >= 0) { + // if we had to render aux spans, we probably changed the vertex and index buffer + // reset those + fState.fCurrentVertexBuffer = vRef->GetBuffer(); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + + // Projections that get applied to the frame buffer (after all passes). + if (fProjAll.size() && !(fView.fRenderState & kRenderNoProjection)) { + fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(MTLSTR("Render All Projections")); + IRenderProjections(render, vRef); + fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); + } + + // Handle render of shadows onto geometry. + if (fShadows.size()) { + IRenderShadowsOntoSpan(render, &span, material, vRef); + } + } + + if (span.GetNumAuxSpans() || (pass >= 0 && fShadows.size())) { + } + +#ifdef HS_DEBUGGING + fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); +#endif +} + +// IRenderProjections /////////////////////////////////////////////////////////// +// Render any projected lights that want to be rendered a single time after +// all passes on the object are complete. +void plMetalPipeline::IRenderProjections(const plRenderPrimFunc& render, const plMetalVertexBufferRef* vRef) +{ + PushCurrentLightSources(); + IDisableLightsForShadow(); + for (plLightInfo* li : fProjAll) { + IRenderProjection(render, li, vRef); + } + PopCurrentLightSources(); +} + +// IRenderProjection ////////////////////////////////////////////////////////////// +// Render this light's projection onto the frame buffer. +void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li, const plMetalVertexBufferRef* vRef) +{ + // Enable the projecting light only. + IEnableLight(0, li); + fLights.count = 1; + + plLayerInterface* proj = li->GetProjection(); + CheckTextureRef(proj); + plMetalTextureRef* tex = (plMetalTextureRef*)proj->GetTexture()->GetDeviceRef(); + + IScaleLight(0, true); + + fCurrentRenderPassUniforms->ambientSrc = 1; + fCurrentRenderPassUniforms->diffuseSrc = 1; + fCurrentRenderPassUniforms->emissiveSrc = 1; + fCurrentRenderPassUniforms->specularSrc = 1; + fCurrentRenderPassUniforms->globalAmb = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->emissiveCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->fogColor = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->diffuseCol = {1.f, 1.f, 1.f, 1.f}; + + matrix_float4x4 tXfm; + hsMatrix2SIMD(proj->GetTransform(), &tXfm); + fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = proj->GetUVWSrc(); + + fCurrNumLayers = 1; + // We should have put ZNoZWrite on during export, but we didn't. + IHandleZMode(hsGMatState::kZNoZWrite); + + // This is a bit weird - in since this isn't a material we need to build a query for the right Metal program ourselves + plMetalFragmentShaderDescription description{}; + description.fNumLayers = fCurrNumLayers = 1; + + description.Populate(proj, 0); + // DX sets the color invert when the final color should be inverted. Not sure why! + if (proj->GetBlendFlags() & hsGMatState::kBlendInvertFinalColor) { + description.fBlendModes[0] |= hsGMatState::kBlendInvertColor; + } + + plMetalMaterialPassPipelineState materialShaderState(&fDevice, vRef, description); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = materialShaderState.GetRenderPipelineState(); + + fState.fCurrentPipelineState = linkedPipeline->pipelineState; + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(tex->fTexture, 0); + MTL::SamplerState* samplerState = fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(proj->GetClampFlags())); + fDevice.CurrentRenderCommandEncoder()->setFragmentSamplerState(samplerState, 0); + fState.layerStates[0].clampFlag = hsGMatState::hsGMatClampFlags(proj->GetClampFlags()); + + // Okay, render it already. + + render.RenderPrims(); +} + +// IRenderProjectionEach /////////////////////////////////////////////////////////////////////////////////////// +// Render any lights that are to be projected onto each pass of the object. +void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef) +{ + // If this is a bump map pass, forget it, we've already "done" per-pixel lighting. + // if( fLayerState[iPass].fMiscFlags & (hsGMatState::kMiscBumpLayer | hsGMatState::kMiscBumpChans) ) + // return; + + // Push the LayerShadowBase override. This sets the blend + // to framebuffer as Add/ZNoWrite and AmbientColor = 0. + static plLayerLightBase layLightBase; + + // For each projector: + int k; + for (k = 0; k < fProjEach.size(); k++) { + // Push its projected texture as a piggyback. + plLightInfo* li = fProjEach[k]; + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); + + plLayerInterface* proj = li->GetProjection(); + hsAssert(proj, "A projector with no texture to project?"); + IPushProjPiggyBack(proj); + + // Enable the projecting light only. + IEnableLight(0, li); + fLights.count = 1; + + AppendLayerInterface(&layLightBase, false); + + IHandleMaterialPass(material, iPass, &span, vRef, false); + + IScaleLight(0, true); + + // Do the render with projection. + render.RenderPrims(); + + RemoveLayerInterface(&layLightBase, false); + + // Pop its projected texture off piggyback + IPopProjPiggyBacks(); + } +} + +// ICheckAuxBuffers /////////////////////////////////////////////////////////////////////// +// The AuxBuffers are associated with drawables for things to be drawn right after that +// drawable's contents. In particular, see the plDynaDecal, which includes things like +// water ripples, bullet hits, and footprints. +// This function just makes sure they are ready to be rendered, called right before +// the rendering. +bool plMetalPipeline::ICheckAuxBuffers(const plAuxSpan* span) +{ + plGBufferGroup* group = span->fGroup; + + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); + if (!vRef) + return true; + + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx); + if (!iRef) + return true; + + // If our vertex buffer ref is volatile and the timestamp is off + // then it needs to be refilled + if (vRef->Expired(fVtxRefTime)) { + IRefreshDynVertices(group, vRef); + } + + return false; // No error +} + +// IRenderAuxSpans //////////////////////////////////////////////////////////////////////////// +// Save and restore render state around calls to IRenderAuxSpan. This lets +// a list of aux spans get rendered with only one save/restore state. +void plMetalPipeline::IRenderAuxSpans(const plSpan& span) +{ + if (IsDebugFlagSet(plPipeDbg::kFlagNoAuxSpans)) + return; + + ISetLocalToWorld(hsMatrix44::IdentityMatrix(), hsMatrix44::IdentityMatrix()); + + int i; + for (i = 0; i < span.GetNumAuxSpans(); i++) + IRenderAuxSpan(span, span.GetAuxSpan(i)); + + ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal); +} + +// IRenderAuxSpan ////////////////////////////////////////////////////////// +// Aux spans (auxilliary) are geometry rendered immediately after, and therefore dependent, on +// other normal geometry. They don't have SceneObjects, Drawables, DrawInterfaces or +// any of that, and therefore don't correspond to any object in the scene. +// They are dynamic procedural decals. See plDynaDecal.cpp and plDynaDecalMgr.cpp. +// This is wrapped by IRenderAuxSpans, which makes sure state is restored to resume +// normal rendering after the AuxSpan is rendered. +void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) +{ + // Make sure the underlying resources are created and filled in with current data. + CheckVertexBufferRef(aux->fGroup, aux->fVBufferIdx); + CheckIndexBufferRef(aux->fGroup, aux->fIBufferIdx); + ICheckAuxBuffers(aux); + + // Set to render from the aux spans buffers. + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)aux->fGroup->GetVertexBufferRef(aux->fVBufferIdx); + + if (!vRef) + return; + + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)aux->fGroup->GetIndexBufferRef(aux->fIBufferIdx); + + if (!iRef) + return; + + // Now just loop through the aux material, rendering in as many passes as it takes. + hsGMaterial* material = aux->fMaterial; + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); + + if (mRef == nullptr) { + mRef = new plMetalMaterialShaderRef(material, this); + material->SetDeviceRef(mRef); + } + + /* Vertex Buffer stuff */ + if (!vRef->GetBuffer()) { + return; + } + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + + plRenderTriListFunc render(&fDevice, 0, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength); + + for (int32_t pass = 0; pass < mRef->GetNumPasses(); pass++) { + IHandleMaterialPass(material, pass, &span, vRef); + if (aux->fFlags & plAuxSpan::kOverrideLiteModel) { + fCurrentRenderPassUniforms->ambientCol = {1.0f, 1.0f, 1.0f}; + + fCurrentRenderPassUniforms->diffuseSrc = 1.0; + fCurrentRenderPassUniforms->ambientSrc = 1.0; + fCurrentRenderPassUniforms->emissiveSrc = 0.0; + fCurrentRenderPassUniforms->specularSrc = 1.0; + } + + render.RenderPrims(); + } +} + +bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, const plSpan* currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) +{ + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); + + fCurrLayerIdx = mRef->GetPassIndex(pass); + plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); + + hsGMatState s; + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + if (s.fZFlags & hsGMatState::kZIncLayer) + ISetLayer(1); + else + ISetLayer(0); + + IHandleZMode(s); + IHandleBlendMode(s); + + if (s.fMiscFlags & hsGMatState::kMiscTwoSided) { + if (fState.fCurrentCullMode != MTL::CullModeNone) { + fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + fState.fCurrentCullMode = MTL::CullModeNone; + } + } else { + ISetCullMode(); + } + + // Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the + // shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders + // to null after calling this. That won't work here in since our pipeline state has to know the shaders. + if (lay->GetVertexShader() && allowShaders) { + lay = IPushOverBaseLayer(lay); + lay = IPushOverAllLayer(lay); + + // pure shader path + plShader* vertexShader = lay->GetVertexShader(); + plShader* fragShader = lay->GetPixelShader(); + + fCurrLay = lay; + fCurrNumLayers = mRef->fPassLengths[pass]; + + ISetShaders(vRef, s, vertexShader, fragShader); + + // FIXME: Programmable pipeline does not implement the full feature set + /* + The programmable pipeline doesn't do things like set the texture transform matrices, + In practice, the transforms aren't set and used. Does it matter that the Metal + implementation doesn't implement the full inputs the DX version gets? + + If it is implemented, the same checks the DX version does should be also implemented. + DX will set texture transforms, but then turn them off in the pipeline and manually + manipulate texture coords in the shader. + + Texture setting should also _maybe_ be reconciled with the "fixed" pipeline. But + the fixed pipeline uses indirect textures mapped to a buffer. That approach could + work for the programmable pipeline too, but I'm planning changes to the fixed pipeline + and the way it stores textures. So maybe things should be reconciled after that + work is done. + */ + + for (size_t i = 0; i < material->GetNumLayers(); i++) { + plLayerInterface* layer = material->GetLayer(i); + if (!layer) { + return false; + } + + CheckTextureRef(layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + return false; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + return false; + } + + size_t idOffset = 0; + // Metal doesn't like mixing 2D and cubic textures. If this is a cubic texture, make sure it lands in the right ID range. + if (plCubicRenderTarget::ConvertNoRef(img)) { + idOffset = FragmentShaderArgumentAttributeCubicTextures; + } + + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i + idOffset); + } + lay = IPopOverAllLayer(lay); + lay = IPopOverBaseLayer(lay); + } else { + //"Fixed" path + + /* + To compute correct lighting we need to add the pushover layers. + The actual renderer will do it's own add and remove, so remove the + pushover layer before we get to the actual layer loop. + */ + lay = IPushOverBaseLayer(lay); + lay = IPushOverAllLayer(lay); + ICalcLighting(mRef, lay, currSpan); + + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + /* + If the layer opacity is 0, don't draw it. This prevents it from contributing to the Z buffer. + This can happen with some models like the fire marbles in the neighborhood that have some models + for physics only, and then can block other rendering in the Z buffer. + DX pipeline does this in ILoopOverLayers. + */ + if ((s.fBlendFlags & hsGMatState::kBlendAlpha) && lay->GetOpacity() <= 0 && (fCurrLightingMethod != plSpan::kLiteVtxPreshaded)) { + // FIXME: All these popping of layers in the return sections is getting ugly + + lay = IPopOverAllLayer(lay); + lay = IPopOverBaseLayer(lay); + + return false; + } + + if (s.fBlendFlags & hsGMatState::kBlendInvertVtxAlpha) + fCurrentRenderPassUniforms->invVtxAlpha = true; + else + fCurrentRenderPassUniforms->invVtxAlpha = false; + + std::vector& spanLights = currSpan->GetLightList(false); + + size_t numActivePiggyBacks = 0; + if (!(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive)) { + /// Tack lightmap onto last stage if we have one + numActivePiggyBacks = fActivePiggyBacks; + } + + plMetalFragmentShaderDescription fragmentShaderDescription; + + lay = IPopOverAllLayer(lay); + lay = IPopOverBaseLayer(lay); + + if (numActivePiggyBacks == 0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { + mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass); + + fragmentShaderDescription = mRef->GetFragmentShaderDescription(pass); + } else { + // Plasma pulls piggybacks from the rear first, pull the number of active piggybacks + auto firstPiggyback = fPiggyBackStack.end() - numActivePiggyBacks; + auto lastPiggyback = fPiggyBackStack.end(); + + std::vector subPiggybacks(firstPiggyback, lastPiggyback); + + auto preEncodeTransform = [this](plLayerInterface* layer, uint32_t index) { + if (index == 0) { + layer = IPushOverBaseLayer(layer); + } + layer = IPushOverAllLayer(layer); + + return layer; + }; + + auto postEncodeTransform = [this](plLayerInterface* layer, uint32_t index) { + layer = IPopOverAllLayer(layer); + if (index == 0) + layer = IPopOverBaseLayer(layer); + return layer; + }; + + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), + fCurrentRenderPassUniforms, + pass, + &fragmentShaderDescription, + &subPiggybacks, + preEncodeTransform, + postEncodeTransform); + } + + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, fragmentShaderDescription).GetRenderPipelineState(); + const MTL::RenderPipelineState* pipelineState = linkedPipeline->pipelineState; + + if (fState.fCurrentPipelineState != pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); + fState.fCurrentPipelineState = pipelineState; + } + } + + return true; +} + +// ISetPipeConsts ////////////////////////////////////////////////////////////////// +// A shader can request that the pipeline fill in certain constants that are indeterminate +// until the pipeline is about to render the object the shader is applied to. For example, +// the object's local to world. A single shader may be used on multiple objects with +// multiple local to world transforms. This ensures the pipeline will shove the proper +// local to world into the shader immediately before the render. +// See plShader.h for the list of available pipe constants. +// Note that the lighting pipe constants are NOT implemented. +void plMetalPipeline::ISetPipeConsts(plShader* shader) +{ + size_t n = shader->GetNumPipeConsts(); + int i; + for (i = 0; i < n; i++) { + const plPipeConst& pc = shader->GetPipeConst(i); + switch (pc.fType) { + case plPipeConst::kFogSet: { + float set[4]; + // FIXME: Fog broken in dynamic pipeline + // IGetVSFogSet(set); + // shader->SetFloat4(pc.fReg, set); + } break; + case plPipeConst::kLayAmbient: { + hsColorRGBA col = fCurrLay->GetAmbientColor(); + shader->SetColor(pc.fReg, col); + } break; + case plPipeConst::kLayRuntime: { + hsColorRGBA col = fCurrLay->GetRuntimeColor(); + col.a = fCurrLay->GetOpacity(); + shader->SetColor(pc.fReg, col); + } break; + case plPipeConst::kLaySpecular: { + hsColorRGBA col = fCurrLay->GetSpecularColor(); + shader->SetColor(pc.fReg, col); + } break; + case plPipeConst::kTex3x4_0: + case plPipeConst::kTex3x4_1: + case plPipeConst::kTex3x4_2: + case plPipeConst::kTex3x4_3: + case plPipeConst::kTex3x4_4: + case plPipeConst::kTex3x4_5: + case plPipeConst::kTex3x4_6: + case plPipeConst::kTex3x4_7: { + int stage = pc.fType - plPipeConst::kTex3x4_0; + + if (stage > fCurrNumLayers) { + // Ooops. This is bad, means the shader is expecting more layers than + // we actually have (or is just bogus). Assert and quietly continue. + hsAssert(false, "Shader asking for higher stage transform than we have"); + continue; + } + const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); + + shader->SetMatrix34(pc.fReg, xfm); + } break; + case plPipeConst::kTex2x4_0: + case plPipeConst::kTex2x4_1: + case plPipeConst::kTex2x4_2: + case plPipeConst::kTex2x4_3: + case plPipeConst::kTex2x4_4: + case plPipeConst::kTex2x4_5: + case plPipeConst::kTex2x4_6: + case plPipeConst::kTex2x4_7: { + int stage = pc.fType - plPipeConst::kTex2x4_0; + + if (stage > fCurrNumLayers) { + // Ooops. This is bad, means the shader is expecting more layers than + // we actually have (or is just bogus). Assert and quietly continue. + hsAssert(false, "Shader asking for higher stage transform than we have"); + continue; + } + const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); + + shader->SetMatrix24(pc.fReg, xfm); + } break; + case plPipeConst::kTex1x4_0: + case plPipeConst::kTex1x4_1: + case plPipeConst::kTex1x4_2: + case plPipeConst::kTex1x4_3: + case plPipeConst::kTex1x4_4: + case plPipeConst::kTex1x4_5: + case plPipeConst::kTex1x4_6: + case plPipeConst::kTex1x4_7: { + int stage = pc.fType - plPipeConst::kTex1x4_0; + + if (stage > fCurrNumLayers) { + // Ooops. This is bad, means the shader is expecting more layers than + // we actually have (or is just bogus). Assert and quietly continue. + hsAssert(false, "Shader asking for higher stage transform than we have"); + continue; + } + const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); + + shader->SetFloat4(pc.fReg, xfm.fMap[0]); + } break; + case plPipeConst::kLocalToNDC: { + hsMatrix44 cam2ndc = IGetCameraToNDC(); + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + hsMatrix44 local2ndc = cam2ndc * world2cam * GetLocalToWorld(); + + shader->SetMatrix44(pc.fReg, local2ndc); + } break; + + case plPipeConst::kCameraToNDC: { + hsMatrix44 cam2ndc = IGetCameraToNDC(); + + shader->SetMatrix44(pc.fReg, cam2ndc); + } break; + + case plPipeConst::kWorldToNDC: { + hsMatrix44 cam2ndc = IGetCameraToNDC(); + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + hsMatrix44 world2ndc = cam2ndc * world2cam; + + shader->SetMatrix44(pc.fReg, world2ndc); + } break; + + case plPipeConst::kLocalToWorld: + shader->SetMatrix34(pc.fReg, GetLocalToWorld()); + break; + + case plPipeConst::kWorldToLocal: + shader->SetMatrix34(pc.fReg, GetWorldToLocal()); + break; + + case plPipeConst::kWorldToCamera: { + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + shader->SetMatrix34(pc.fReg, world2cam); + } break; + + case plPipeConst::kCameraToWorld: { + hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld(); + + shader->SetMatrix34(pc.fReg, cam2world); + } break; + + case plPipeConst::kLocalToCamera: { + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + hsMatrix44 local2cam = world2cam * GetLocalToWorld(); + + shader->SetMatrix34(pc.fReg, local2cam); + } break; + + case plPipeConst::kCameraToLocal: { + hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld(); + + hsMatrix44 cam2local = GetWorldToLocal() * cam2world; + + shader->SetMatrix34(pc.fReg, cam2local); + } break; + + case plPipeConst::kCamPosWorld: { + shader->SetVectorW(pc.fReg, GetViewTransform().GetCameraToWorld().GetTranslate(), 1.f); + } break; + + case plPipeConst::kCamPosLocal: { + hsPoint3 localCam = GetWorldToLocal() * GetViewTransform().GetCameraToWorld().GetTranslate(); + + shader->SetVectorW(pc.fReg, localCam, 1.f); + } break; + + case plPipeConst::kObjPosWorld: { + shader->SetVectorW(pc.fReg, GetLocalToWorld().GetTranslate(), 1.f); + } break; + + // UNIMPLEMENTED + case plPipeConst::kDirLight1: + case plPipeConst::kDirLight2: + case plPipeConst::kDirLight3: + case plPipeConst::kDirLight4: + case plPipeConst::kPointLight1: + case plPipeConst::kPointLight2: + case plPipeConst::kPointLight3: + case plPipeConst::kPointLight4: + case plPipeConst::kColorFilter: + case plPipeConst::kMaxType: { + hsAssert(0, "Unimplemented uniform passed to shader"); + } break; + } + } +} + +// ISetShaders ///////////////////////////////////////////////////////////////////////////////////// +// Setup to render using the input vertex and pixel shader. Either or both may +// be nil, in which case the fixed function pipeline is indicated. +// Any Pipe Constants the non-FFP shader wants will be set here. +// Lastly, all constants will be set (as a block) for any non-FFP vertex or pixel shader. +bool plMetalPipeline::ISetShaders(const plMetalVertexBufferRef* vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader) +{ + hsAssert(vShader, "Can't handle programmable passes without vShader"); + hsAssert(pShader, "Can't handle programmable passes without pShader"); + plShaderID::ID vertexShaderID = vShader->GetDecl()->GetID(); + plShaderID::ID fragmentShaderID = pShader->GetDecl()->GetID(); + + plMetalDevice::plMetalLinkedPipeline* pipeline = plMetalDynamicMaterialPipelineState(&fDevice, vRef, blendMode.fBlendFlags, vertexShaderID, fragmentShaderID).GetRenderPipelineState(); + if (fState.fCurrentPipelineState != pipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipeline->pipelineState); + fState.fCurrentPipelineState = pipeline->pipelineState; + } + + if (vShader) { + hsAssert(vShader->IsVertexShader(), "Wrong type shader as vertex shader"); + ISetPipeConsts(vShader); + + plMetalVertexShader* vRef = (plMetalVertexShader*)vShader->GetDeviceRef(); + if (!vRef) { + vRef = new plMetalVertexShader(vShader); + hsRefCnt_SafeUnRef(vRef); + } + if (!vRef->IsLinked()) + vRef->Link(&fVShaderRefList); + + vRef->ISetConstants(this); + } + + if (pShader) { + hsAssert(pShader->IsPixelShader(), "Wrong type shader as pixel shader"); + + ISetPipeConsts(pShader); + + plMetalFragmentShader* pRef = (plMetalFragmentShader*)pShader->GetDeviceRef(); + if (!pRef) { + pRef = new plMetalFragmentShader(pShader); + hsRefCnt_SafeUnRef(pRef); + } + if (!pRef->IsLinked()) + pRef->Link(&fPShaderRefList); + + pRef->ISetConstants(this); + } + + /*if( vsHandle != fSettings.fCurrVertexShader ) + { + HRESULT hr = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = vsHandle); + hsAssert(!FAILED(hr), "Error setting vertex shader"); + } + + if( psHandle != fSettings.fCurrPixelShader ) + { + HRESULT hr = fD3DDevice->SetPixelShader(fSettings.fCurrPixelShader = psHandle); + hsAssert(!FAILED(hr), "Error setting pixel shader"); + }*/ + + // Handle cull mode here, because current cullmode is dependent on + // the handedness of the LocalToCamera AND whether we are twosided. + ISetCullMode(); + + return true; +} + +bool plMetalPipeline::ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase) +{ + if (!(spanBase->fTypeMask & plSpan::kVertexSpan)) + return false; + // If we arent' an trilist, we're toast. + if (!(spanBase->fTypeMask & plSpan::kIcicleSpan)) + return false; + + plIcicle* span = (plIcicle*)spanBase; + + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); + if (!vRef) + return true; + + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx); + if (!iRef) + return true; + + // If our vertex buffer ref is volatile and the timestamp is off + // then it needs to be refilled + // MTL::PurgeableState bufferState = vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateNonVolatile); + if (vRef->Expired(fVtxRefTime)) { + IRefreshDynVertices(group, vRef); + // fDevice.GetCurrentCommandBuffer()->addCompletedHandler( ^(MTL::CommandBuffer *buffer) { + // vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); + // }); + } + + if (iRef->IsDirty()) { + fDevice.FillIndexBufferRef(iRef, group, span->fIBufferIdx); + iRef->SetRebuiltSinceUsed(true); + } + + return false; // No error +} + +bool plMetalPipeline::IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef) +{ + ptrdiff_t size = (group->GetVertBufferEnd(vRef->fIndex) - group->GetVertBufferStart(vRef->fIndex)) * vRef->fVertexSize; + if (!size) + return false; // No error, just nothing to do. + + hsAssert(size > 0, "Bad start and end counts in a group"); + + if (!vRef->GetBuffer()) { + hsAssert(size > 0, "Being asked to fill a buffer that doesn't exist yet?"); + } + + uint8_t* vData; + if (vRef->fData) + vData = vRef->fData; + else + vData = group->GetVertBufferData(vRef->fIndex) + group->GetVertBufferStart(vRef->fIndex) * vRef->fVertexSize; + + vRef->PrepareForWrite(); + + MTL::Buffer* vertexBuffer = vRef->GetBuffer(); + if (!vertexBuffer || vertexBuffer->length() < size) { + // Plasma will present different length buffers at different times + vertexBuffer = fDevice.fMetalDevice->newBuffer(vData, size, MTL::ResourceStorageModeManaged)->autorelease(); + if (vRef->Volatile()) { + fDevice.GetCurrentCommandBuffer()->addCompletedHandler(^(MTL::CommandBuffer* buffer){ + // vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); + }); + } + vRef->SetBuffer(vertexBuffer); + } else { + memcpy(vertexBuffer->contents(), + vData, + size); + vertexBuffer->didModifyRange(NS::Range(0, size)); + } + + vRef->fRefTime = fVtxRefTime; + vRef->SetDirty(false); + + return false; +} + +void plMetalPipeline::IHandleZMode(hsGMatState flags) +{ + // Metal is very particular that if there is no depth buffer we need to explictly disable z read and write + if (fDevice.fCurrentDepthFormat == MTL::PixelFormatInvalid) { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); + return; + } + + MTL::DepthStencilState* newDepthState; + switch (flags.fZFlags & hsGMatState::kZMask) { + case hsGMatState::kZClearZ: + fDevice.Clear(false, {0.0f, 0.0f, 0.0f, 0.0f}, true, 0.0); + break; + case hsGMatState::kZNoZRead: + newDepthState = fDevice.fNoZReadStencilState; + break; + case hsGMatState::kZNoZWrite: + newDepthState = fDevice.fNoZWriteStencilState; + break; + case hsGMatState::kZNoZRead | hsGMatState::kZClearZ: + newDepthState = fDevice.fNoZReadStencilState; + break; + case hsGMatState::kZNoZRead | hsGMatState::kZNoZWrite: + newDepthState = fDevice.fNoZReadOrWriteStencilState; + break; + case 0: + newDepthState = fDevice.fDefaultStencilState; + break; + case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite: + case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead: + hsAssert(false, "Illegal combination of Z Buffer modes (Clear but don't write)"); + break; + } + + if (fState.fCurrentDepthStencilState != newDepthState) { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(newDepthState); + fState.fCurrentDepthStencilState = newDepthState; + } +} + +//// ISetLayer //////////////////////////////////////////////////////////////// +// Sets whether we're rendering a base layer or upper layer. Upper layer has +// a Z bias to avoid Z fighting. +void plMetalPipeline::ISetLayer(uint32_t lay) +{ + if (lay) { + if (fCurrRenderLayer != lay) { + fCurrRenderLayer = lay; + + plCONST(int) kBiasMult = 8; + static float mult [[gnu::used]] = -8.0f; + static float constBias [[gnu::used]] = -0.0f; + static float max [[gnu::used]] = -0.00001f; + fDevice.CurrentRenderCommandEncoder()->setDepthBias(constBias, mult, max); + } + } else if (fCurrRenderLayer != 0) { + fCurrRenderLayer = 0; + fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0f, 0.0f, 0.0f); + } +} + +void plMetalPipeline::IHandleBlendMode(hsGMatState flags) +{ + // This function is a weird leftover of CPU side blend mode setting. + // We need the error case, but nothing else? In Metal this is all + // done GPU side - but the GPU can't write an error state on a CPU + // side buffer. + if (flags.fBlendFlags & hsGMatState::kBlendNoColor) { + flags.fBlendFlags |= hsGMatState::kBlendAlphaPremultiplied; + } else { + switch (flags.fBlendFlags & hsGMatState::kBlendMask) { + case hsGMatState::kBlendDetail: + case hsGMatState::kBlendAlpha: + case hsGMatState::kBlendMult: + case hsGMatState::kBlendAdd: + case hsGMatState::kBlendMADD: + case hsGMatState::kBlendAddColorTimesAlpha: + case 0: + break; + + default: { + hsAssert(false, "Too many blend modes specified in material"); + plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); + if (lay) { + if (lay->GetBlendFlags() & hsGMatState::kBlendAlpha) { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); + } else { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); + } + } + // layer state needs to be syncronized to the GPU + static_cast(fCurrMaterial->GetDeviceRef())->SetDirty(true); + } + break; + } + } +} + +void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan) +{ + // plProfile_Inc(MatLightState); + + if (IsDebugFlagSet(plPipeDbg::kFlagAllBright)) { + fCurrentRenderPassUniforms->globalAmb = {1.f, 1.f, 1.f, 1.f}; + + fCurrentRenderPassUniforms->ambientCol = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->diffuseCol = {1.f, 1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->emissiveCol = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->emissiveCol = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->specularCol = {1.f, 1.f, 1.f}; + + fCurrentRenderPassUniforms->ambientSrc = 1; + fCurrentRenderPassUniforms->diffuseSrc = 1; + fCurrentRenderPassUniforms->emissiveSrc = 1; + fCurrentRenderPassUniforms->specularSrc = 1; + + return; + } + + hsGMatState state; + state.Composite(currLayer->GetState(), fMatOverOn, fMatOverOff); + + uint32_t mode = (currSpan != nullptr) ? (currSpan->fProps & plSpan::kLiteMask) : plSpan::kLiteMaterial; + + if (state.fMiscFlags & hsGMatState::kMiscBumpChans) { + mode = plSpan::kLiteMaterial; + state.fShadeFlags |= hsGMatState::kShadeNoShade | hsGMatState::kShadeWhite; + } + + /// Select one of our three lighting methods + switch (mode) { + case plSpan::kLiteMaterial: // Material shading + { + if (state.fShadeFlags & hsGMatState::kShadeWhite) { + fCurrentRenderPassUniforms->globalAmb = {1.f, 1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->ambientCol = {1.f, 1.f, 1.f}; + } else if (IsDebugFlagSet(plPipeDbg::kFlagNoPreShade)) { + fCurrentRenderPassUniforms->globalAmb = {0.f, 0.f, 0.f, 1.f}; + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; + } else { + hsColorRGBA amb = currLayer->GetPreshadeColor(); + fCurrentRenderPassUniforms->globalAmb = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.f}; + fCurrentRenderPassUniforms->ambientCol = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b)}; + } + + hsColorRGBA dif = currLayer->GetRuntimeColor(); + fCurrentRenderPassUniforms->diffuseCol = {static_cast(dif.r), static_cast(dif.g), static_cast(dif.b), static_cast(currLayer->GetOpacity())}; + + hsColorRGBA em = currLayer->GetAmbientColor(); + fCurrentRenderPassUniforms->emissiveCol = {static_cast(em.r), static_cast(em.g), static_cast(em.b)}; + + // Set specular properties + if (state.fShadeFlags & hsGMatState::kShadeSpecular) { + hsColorRGBA spec = currLayer->GetSpecularColor(); + fCurrentRenderPassUniforms->specularCol = {static_cast(spec.r), static_cast(spec.g), static_cast(spec.b)}; +#if 0 + mat.Power = currLayer->GetSpecularPower(); +#endif + } else { + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; + } + + fCurrentRenderPassUniforms->diffuseSrc = 1.f; + fCurrentRenderPassUniforms->emissiveSrc = 1.f; + fCurrentRenderPassUniforms->specularSrc = 1.f; + + if (state.fShadeFlags & hsGMatState::kShadeNoShade) { + fCurrentRenderPassUniforms->ambientSrc = 1.f; + } else { + fCurrentRenderPassUniforms->ambientSrc = 0.f; + } + fCurrLightingMethod = plSpan::kLiteMaterial; + + break; + } + + case plSpan::kLiteVtxPreshaded: // Vtx preshaded + { + fCurrentRenderPassUniforms->globalAmb = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->diffuseCol = {0.f, 0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->emissiveCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; + + fCurrentRenderPassUniforms->diffuseSrc = 0.f; + fCurrentRenderPassUniforms->ambientSrc = 1.f; + fCurrentRenderPassUniforms->specularSrc = 1.f; + + if (state.fShadeFlags & hsGMatState::kShadeEmissive) { + fCurrentRenderPassUniforms->emissiveSrc = 0.f; + } else { + fCurrentRenderPassUniforms->emissiveSrc = 1.f; + } + + fCurrLightingMethod = plSpan::kLiteVtxPreshaded; + break; + } + + case plSpan::kLiteVtxNonPreshaded: // Vtx non-preshaded + { + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->diffuseCol = {0.f, 0.f, 0.f, 0.f}; + + hsColorRGBA em = currLayer->GetAmbientColor(); + fCurrentRenderPassUniforms->emissiveCol = {static_cast(em.r), static_cast(em.g), static_cast(em.b)}; + + // Set specular properties + if (state.fShadeFlags & hsGMatState::kShadeSpecular) { + hsColorRGBA spec = currLayer->GetSpecularColor(); + fCurrentRenderPassUniforms->specularCol = {static_cast(spec.r), static_cast(spec.g), static_cast(spec.b)}; +#if 0 + mat.Power = currLayer->GetSpecularPower(); +#endif + } else { + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; + } + + hsColorRGBA amb = currLayer->GetPreshadeColor(); + fCurrentRenderPassUniforms->globalAmb = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a)}; + + fCurrentRenderPassUniforms->ambientSrc = 0; + fCurrentRenderPassUniforms->diffuseSrc = 0; + fCurrentRenderPassUniforms->emissiveSrc = 1; + fCurrentRenderPassUniforms->specularSrc = 1; + + fCurrLightingMethod = plSpan::kLiteVtxNonPreshaded; + break; + } + } + // Piggy-back some temporary fog stuff on the lighting... + const plFogEnvironment* fog = (currSpan ? (currSpan->fFogEnvironment ? currSpan->fFogEnvironment : &fView.GetDefaultFog()) : nullptr); + + if (currLayer) { + if ((currLayer->GetShadeFlags() & hsGMatState::kShadeReallyNoFog) && !(fMatOverOff.fShadeFlags & hsGMatState::kShadeReallyNoFog)) + fog = nil; + } + + uint8_t type = fog ? fog->GetType() : plFogEnvironment::kNoFog; + hsColorRGBA color; + + switch (type) { + case plFogEnvironment::kLinearFog: { + float start, end; + fog->GetPipelineParams(&start, &end, &color); + + fCurrentRenderPassUniforms->fogExponential = 0; + fCurrentRenderPassUniforms->fogValues = {start, end}; + fCurrentRenderPassUniforms->fogColor = {static_cast(color.r), static_cast(color.g), static_cast(color.b)}; + break; + } + case plFogEnvironment::kExpFog: + case plFogEnvironment::kExp2Fog: { + float density; + float power = (type == plFogEnvironment::kExp2Fog) ? 2.0f : 1.0f; + fog->GetPipelineParams(&density, &color); + + fCurrentRenderPassUniforms->fogExponential = 1; + fCurrentRenderPassUniforms->fogValues = {power, density}; + fCurrentRenderPassUniforms->fogColor = {static_cast(color.r), static_cast(color.g), static_cast(color.b)}; + break; + } + default: + fCurrentRenderPassUniforms->fogExponential = 0; + fCurrentRenderPassUniforms->fogValues = {0.f, 0.f}; + fCurrentRenderPassUniforms->fogColor = {0.f, 0.f, 0.f}; + break; + } + + if (currLayer->GetBlendFlags() & (hsGMatState::kBlendAdd | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha)) { + fCurrentRenderPassUniforms->fogColor = {0.f, 0.f, 0.f}; + } +} + +// ISelectLights /////////////////////////////////////////////////////////////// +// Find the strongest numLights lights to illuminate the span with. +// Weaker lights are faded out in effect so they won't pop when the +// strongest N changes membership. +void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj) +{ + const size_t numLights = kMetalMaxLightCount; + int32_t i = 0; + int32_t startScale; + float threshhold; + float overHold = 0.3; + float scale; + static std::vector onLights; + onLights.clear(); + + if (!IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights) && + !(IsDebugFlagSet(plPipeDbg::kFlagNoApplyProjLights) && proj) && + !(IsDebugFlagSet(plPipeDbg::kFlagOnlyApplyProjLights) && !proj)) { + std::vector& spanLights = span->GetLightList(proj); + + fLights.count = 0; + for (i = 0; i < spanLights.size() && i < numLights; i++) { + // If these are non-projected lights, go ahead and enable them. + if (!proj) { + IEnableLight(fLights.count, spanLights[i]); + fLights.count++; + } + onLights.emplace_back(spanLights[i]); + } + startScale = i; + + /// Attempt #2: Take some of the n strongest lights (below a given threshhold) and + /// fade them out to nothing as they get closer to the bottom. This way, they fade + /// out of existence instead of pop out. + + if (i < spanLights.size() - 1 && i > 0) { + threshhold = span->GetLightStrength(i, proj); + i--; + overHold = threshhold * 1.5f; + + if (overHold > span->GetLightStrength(0, proj)) { + overHold = span->GetLightStrength(0, proj); + } + + for (; i > 0 && span->GetLightStrength(i, proj) < overHold; i--) { + scale = (overHold - span->GetLightStrength(i, proj)) / (overHold - threshhold); + + IScaleLight(i, (1 - scale) * span->GetLightScale(i, proj)); + } + startScale = i + 1; + } + + /// Make sure those lights that aren't scaled....aren't + for (i = 0; i < startScale; i++) { + IScaleLight(i, span->GetLightScale(i, proj)); + } + } + + // For the projected lights, don't enable, just remember who they are. + if (proj) { + fProjAll.clear(); + fProjEach.clear(); + for (i = 0; i < onLights.size(); i++) { + if (onLights[i]->OverAll()) + fProjAll.emplace_back(onLights[i]); + else + fProjEach.emplace_back(onLights[i]); + } + onLights.clear(); + } +} + +void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) +{ + hsColorRGBA amb = light->GetAmbient(); + fLights.lampSources[i].ambient = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a)}; + + hsColorRGBA diff = light->GetDiffuse(); + fLights.lampSources[i].diffuse = {static_cast(diff.r), static_cast(diff.g), static_cast(diff.b), static_cast(diff.a)}; + + hsColorRGBA spec = light->GetSpecular(); + fLights.lampSources[i].specular = {static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), static_cast(spec.a)}; + + plDirectionalLightInfo* dirLight = nullptr; + plOmniLightInfo* omniLight = nullptr; + plSpotLightInfo* spotLight = nullptr; + + if ((dirLight = plDirectionalLightInfo::ConvertNoRef(light)) != nullptr) { + hsVector3 lightDir = dirLight->GetWorldDirection(); + fLights.lampSources[i].position = {lightDir.fX, lightDir.fY, lightDir.fZ, 0.0}; + fLights.lampSources[i].direction = {lightDir.fX, lightDir.fY, lightDir.fZ}; + + fLights.lampSources[i].constAtten = 1.0f; + fLights.lampSources[i].linAtten = 0.0f; + fLights.lampSources[i].quadAtten = 0.0f; + } else if ((omniLight = plOmniLightInfo::ConvertNoRef(light)) != nullptr) { + hsPoint3 pos = omniLight->GetWorldPosition(); + fLights.lampSources[i].position = {pos.fX, pos.fY, pos.fZ, 1.0}; + + // TODO: Maximum Range + + fLights.lampSources[i].constAtten = omniLight->GetConstantAttenuation(); + fLights.lampSources[i].linAtten = omniLight->GetLinearAttenuation(); + fLights.lampSources[i].quadAtten = omniLight->GetQuadraticAttenuation(); + + if (!omniLight->GetProjection() && (spotLight = plSpotLightInfo::ConvertNoRef(omniLight)) != nullptr) { + hsVector3 lightDir = spotLight->GetWorldDirection(); + fLights.lampSources[i].direction = {lightDir.fX, lightDir.fY, lightDir.fZ}; + + float falloff = spotLight->GetFalloff(); + float gamma = cosf(spotLight->GetSpotInner()); + float phi = cosf(spotLight->GetProjection() ? hsConstants::half_pi : spotLight->GetSpotOuter()); + + fLights.lampSources[i].spotProps = {falloff, gamma, phi}; + } else { + fLights.lampSources[i].spotProps = {0.0f, 0.0f, 0.0f}; + } + } else { + IDisableLight(i); + } +} + +void plMetalPipeline::IDisableLight(size_t i) +{ + fLights.lampSources[i].position = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].ambient = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].diffuse = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].specular = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].constAtten = {1.0f}; + fLights.lampSources[i].linAtten = {0.0f}; + fLights.lampSources[i].quadAtten = {0.0f}; + fLights.lampSources[i].scale = {0.0f}; +} + +void plMetalPipeline::IScaleLight(size_t i, float scale) +{ + scale = int(scale * 1.e1f) * 1.e-1f; + fLights.lampSources[i].scale = scale; +} + +void plMetalPipeline::IDrawPlate(plPlate* plate) +{ + if (!plate->IsVisible()) { + return; + } + hsGMaterial* material = plate->GetMaterial(); + + plLayerInterface* lay = material->GetLayer(0); + hsGMatState s; + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + IHandleZMode(s); + IHandleBlendMode(s); + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); + fState.fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState; + + simd_float4x4 projMat = matrix_identity_float4x4; + + /// Set up the transform directly + fDevice.SetLocalToWorldMatrix(plate->GetTransform()); + + IPushPiggyBacks(material); + + // First, do we have a device ref at this index? + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); + + if (mRef == nullptr) { + mRef = new plMetalMaterialShaderRef(material, this); + material->SetDeviceRef(mRef); + } + + if (!mRef->IsLinked()) { + mRef->Link(&fMatRefList); + } + + fDevice.SetLocalToWorldMatrix(plate->GetTransform()); + + plMetalPlateManager* pm = (plMetalPlateManager*)fPlateMgr; + + plMetalPlatePipelineState state(&fDevice); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = state.GetRenderPipelineState(); + + if (fState.fCurrentPipelineState != linkedPipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fState.fCurrentPipelineState = linkedPipeline->pipelineState; + } + float alpha = material->GetLayer(0)->GetOpacity(); + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&alpha, sizeof(float), 6); + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(pm->fDepthState); + fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + + int uniformSize = sizeof(VertexUniforms); + VertexUniforms uniforms; + uniforms.projectionMatrix = projMat; + matrix_float4x4 modelMatrix; + uniforms.worldToCameraMatrix = modelMatrix; + uniforms.uvTransforms[0].UVWSrc = 0; + // uniforms.worldToLocalMatrix = fDevice.fMatrixW2L; + + // flip world to camera, it's upside down + matrix_float4x4 flip = matrix_identity_float4x4; + flip.columns[1][1] = -1.0f; + + // uniforms.worldToCameraMatrix = + // uniforms.cameraToWorldMatrix = fDevice.fMatrixC2W; + uniforms.localToWorldMatrix = matrix_multiply(flip, fDevice.fMatrixL2W); + + mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), &uniforms, 0); + // FIXME: Hacking the old texture drawing into the plate path + mRef->prepareTextures(fDevice.CurrentRenderCommandEncoder(), 0); + + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&uniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); + + pm->EncodeDraw(fDevice.CurrentRenderCommandEncoder()); + + IPopPiggyBacks(); +} + +// Push and pop light sources +// The DX version would just keep a giant pool of lights +// that could be claimed by different parts of the pipeline. +// In Metal, when a part of the pipeline wants to own lights +// we'll just let them push/pop the current state. +void plMetalPipeline::PushCurrentLightSources() +{ + plMetalLights* lightSources = new plMetalLights(); + memcpy(lightSources, &fLights, sizeof(plMetalLights)); + fLightSourceStack.emplace_back(lightSources); +} + +void plMetalPipeline::PopCurrentLightSources() +{ + hsAssert(fLightSourceStack.size() > 0, "Asked to pop light sources but none on stack"); + plMetalLights* lightSources = fLightSourceStack.back(); + fLightSourceStack.pop_back(); + memcpy(&fLights, lightSources, sizeof(plMetalLights)); + delete lightSources; +} + +// Special effects ///////////////////////////////////////////////////////////// + +// IPushOverBaseLayer ///////////////////////////////////////////////////////// +// Sets fOverBaseLayer (if any) as a wrapper on top of input layer. +// This allows the OverBaseLayer to intercept and modify queries of +// the real current layer's properties (e.g. color or state). +// fOverBaseLayer is set to only get applied to the base layer during +// multitexturing. +// Must be matched with call to IPopOverBaseLayer. +plLayerInterface* plMetalPipeline::IPushOverBaseLayer(plLayerInterface* li) +{ + if (!li) + return nullptr; + + fOverLayerStack.emplace_back(li); + + if (!fOverBaseLayer) + return fOverBaseLayer = li; + + fForceMatHandle = true; + fOverBaseLayer = fOverBaseLayer->Attach(li); + fOverBaseLayer->Eval(fTime, fFrame, 0); + return fOverBaseLayer; +} + +// IPopOverBaseLayer ///////////////////////////////////////////////////////// +// Removes fOverBaseLayer as wrapper on top of input layer. +// Should match calls to IPushOverBaseLayer. +plLayerInterface* plMetalPipeline::IPopOverBaseLayer(plLayerInterface* li) +{ + if (!li) + return nullptr; + + fForceMatHandle = true; + + plLayerInterface* pop = fOverLayerStack.back(); + fOverLayerStack.pop_back(); + fOverBaseLayer = fOverBaseLayer->Detach(pop); + + return pop; +} + +// IPushOverAllLayer /////////////////////////////////////////////////// +// Push fOverAllLayer (if any) as wrapper around the input layer. +// fOverAllLayer is set to be applied to each layer during multitexturing. +// Must be matched by call to IPopOverAllLayer +plLayerInterface* plMetalPipeline::IPushOverAllLayer(plLayerInterface* li) +{ + if (!li) + return nullptr; + + fOverLayerStack.push_back(li); + + if (!fOverAllLayer) { + fOverAllLayer = li; + fOverAllLayer->Eval(fTime, fFrame, 0); + return fOverAllLayer; + } + + fForceMatHandle = true; + fOverAllLayer = fOverAllLayer->Attach(li); + fOverAllLayer->Eval(fTime, fFrame, 0); + + return fOverAllLayer; +} + +// IPopOverAllLayer ////////////////////////////////////////////////// +// Remove fOverAllLayer as wrapper on top of input layer. +// Should match calls to IPushOverAllLayer. +plLayerInterface* plMetalPipeline::IPopOverAllLayer(plLayerInterface* li) +{ + if (!li) + return nullptr; + + fForceMatHandle = true; + + plLayerInterface* pop = fOverLayerStack.back(); + fOverLayerStack.pop_back(); + fOverAllLayer = fOverAllLayer->Detach(pop); + + return pop; +} + +// IPushProjPiggyBack ////////////////////////////////////////////////// +// Push a projected texture on as a piggy back. +void plMetalPipeline::IPushProjPiggyBack(plLayerInterface* li) +{ + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) + return; + + fPiggyBackStack.push_back(li); + fActivePiggyBacks = uint32_t(fPiggyBackStack.size()) - fMatPiggyBacks; + fForceMatHandle = true; +} + +// IPopProjPiggyBacks ///////////////////////////////////////////////// +// Remove a projected texture from use as a piggy back. +void plMetalPipeline::IPopProjPiggyBacks() +{ + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) + return; + + fPiggyBackStack.resize(fMatPiggyBacks); + ISetNumActivePiggyBacks(); + fForceMatHandle = true; +} + +// IPushPiggyBacks //////////////////////////////////////////////////// +// Push any piggy backs associated with a material, presumed to +// be a light map because that's all they are used for. +// Matched with IPopPiggyBacks +void plMetalPipeline::IPushPiggyBacks(hsGMaterial* mat) +{ + hsAssert(!fMatPiggyBacks, "Push/Pop Piggy mismatch"); + + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) + return; + + for (int i = 0; i < mat->GetNumPiggyBacks(); i++) { + if (!mat->GetPiggyBack(i)) + continue; + + if ((mat->GetPiggyBack(i)->GetMiscFlags() & hsGMatState::kMiscLightMap) && IsDebugFlagSet(plPipeDbg::kFlagNoLightmaps)) + continue; + + fPiggyBackStack.push_back(mat->GetPiggyBack(i)); + fMatPiggyBacks++; + } + ISetNumActivePiggyBacks(); + fForceMatHandle = true; +} + +// IPopPiggyBacks /////////////////////////////////////////////////////// +// Pop any current piggy backs set from IPushPiggyBacks. +// Matches IPushPiggyBacks. +void plMetalPipeline::IPopPiggyBacks() +{ + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) + return; + + fPiggyBackStack.resize(fPiggyBackStack.size() - fMatPiggyBacks); + fMatPiggyBacks = 0; + + ISetNumActivePiggyBacks(); + fForceMatHandle = true; +} + +// PiggyBacks - used in techniques like projective lighting. +// PiggyBacks are layers appended to each drawprimitive pass. +// For example, if a material has 3 layers which will be drawn +// in 2 passes, +// pass0: layer0+layer1 +// pass1: layer2 +// Then if a piggyback layer layerPB is active, the actual rendering would be +// pass0: layer0+layer1+layerPB +// pass1: layer2 + layerPB + +// ISetNumActivePiggyBacks ///////////////////////////////////////////// +// Calculate the number of active piggy backs. +size_t plMetalPipeline::ISetNumActivePiggyBacks() +{ + return fActivePiggyBacks = std::min(fMaxPiggyBacks, uint32_t(fPiggyBackStack.size())); +} + +struct plAVTexVert +{ + float fPos[2]; + float fUv[2]; +}; + +void plMetalPipeline::IPreprocessAvatarTextures() +{ + plProfile_Set(AvRTPoolUsed, fClothingOutfits.size()); + plProfile_Set(AvRTPoolCount, fAvRTPool.size()); + plProfile_Set(AvRTPoolRes, fAvRTWidth); + plProfile_Set(AvRTShrinkTime, uint32_t(hsTimer::GetSysSeconds() - fAvRTShrinkValidSince)); + + // Frees anyone used last frame that we don't need this frame + IClearClothingOutfits(&fPrevClothingOutfits); + + if (fClothingOutfits.size() == 0) + return; + + plMipmap* itemBufferTex = nullptr; + + for (size_t oIdx = 0; oIdx < fClothingOutfits.size(); oIdx++) { + plClothingOutfit* co = fClothingOutfits[oIdx]; + if (co->fBase == nullptr || co->fBase->fBaseTexture == nullptr) + continue; + + plRenderTarget* rt = plRenderTarget::ConvertNoRef(co->fTargetLayer->GetTexture()); + if (rt != nullptr && co->fDirtyItems.Empty()) + // we've still got our valid RT from last frame and we have nothing to do. + continue; + + if (rt == nullptr) { + rt = IGetNextAvRT(); + // we're about to add a texture that wasn't there before + // mark the material as dirty + plMetalMaterialShaderRef* ref = static_cast(co->fMaterial->GetDeviceRef()); + if (ref) { + ref->SetDirty(true); + } + co->fTargetLayer->SetTexture(rt); + } + + PushRenderTarget(rt); + fDevice.CurrentRenderCommandEncoder()->setViewport({0, 0, static_cast(rt->GetWidth()), static_cast(rt->GetHeight()), 0.f, 1.f}); + + static MTL::RenderPipelineState* baseAvatarRenderState = nullptr; + static MTL::RenderPipelineState* avatarRenderState = nullptr; + + if (!baseAvatarRenderState) { + // This is a bit of a hack, this really should be part of the plMetalDevice's function map. + // But that hash map assumes that it follows the vertex arrangement of the models. + // After a refactor, this function creation should go there. + MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init()->autorelease(); + MTL::Library* library = fDevice.fMetalDevice->newDefaultLibrary()->autorelease(); + + MTL::Function* vertFunction = library->newFunction(MTLSTR("PreprocessAvatarVertexShader"))->autorelease(); + MTL::Function* fragFunction = library->newFunction(MTLSTR("PreprocessAvatarFragmentShader"))->autorelease(); + + descriptor->setVertexFunction(vertFunction); + descriptor->setFragmentFunction(fragFunction); + + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); + vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(0)->setBufferIndex(0); + vertexDescriptor->attributes()->object(0)->setOffset(0); + vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(1)->setBufferIndex(0); + vertexDescriptor->attributes()->object(1)->setOffset(sizeof(simd_float2)); + + vertexDescriptor->layouts()->object(0)->setStride(sizeof(simd_float2) * 2); + + descriptor->setVertexDescriptor(vertexDescriptor); + + descriptor->colorAttachments()->object(0)->setBlendingEnabled(false); + descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + NS::Error* error = nullptr; + baseAvatarRenderState = fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); + + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + avatarRenderState = fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); + } + + float uOff = 0.5f / rt->GetWidth(); + float vOff = 0.5f / rt->GetHeight(); + + plClothingLayout* layout = plClothingMgr::GetClothingMgr()->GetLayout(co->fBase->fLayoutName); + + for (plClothingItem* item : co->fItems) { + for (size_t j = 0; j < item->fElements.size(); j++) { + for (int k = 0; k < plClothingElement::kLayerMax; k++) { + if (item->fTextures[j][k] == nullptr) + continue; + + itemBufferTex = item->fTextures[j][k]; + hsColorRGBA tint = co->GetItemTint(item, k); + if (k >= plClothingElement::kLayerSkinBlend1 && k <= plClothingElement::kLayerSkinLast) + tint.a = co->fSkinBlends[k - plClothingElement::kLayerSkinBlend1]; + + if (k == plClothingElement::kLayerBase) { + if (fState.fCurrentPipelineState != baseAvatarRenderState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(baseAvatarRenderState); + fState.fCurrentPipelineState = baseAvatarRenderState; + } + } else { + if (fState.fCurrentPipelineState != avatarRenderState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(avatarRenderState); + fState.fCurrentPipelineState = avatarRenderState; + } + } + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&tint, sizeof(hsColorRGBA), 0); + + float screenW = (float)item->fElements[j]->fWidth / layout->fOrigWidth * 2.f; + float screenH = (float)item->fElements[j]->fHeight / layout->fOrigWidth * 2.f; + float screenX = (float)item->fElements[j]->fXPos / layout->fOrigWidth * 2.f - 1.f; + float screenY = (1.f - (float)item->fElements[j]->fYPos / layout->fOrigWidth) * 2.f - 1.f - screenH; + IDrawClothingQuad(screenX, screenY, screenW, screenH, uOff, vOff, itemBufferTex); + } + } + } + + PopRenderTarget(); + co->fDirtyItems.Clear(); + } + + fView.fXformResetFlags = fView.kResetAll; + + fClothingOutfits.swap(fPrevClothingOutfits); +} + +void plMetalPipeline::IDrawClothingQuad(float x, float y, float w, float h, + float uOff, float vOff, plMipmap* tex) +{ + const uint32_t kVSize = sizeof(plAVTexVert); + plMetalTextureRef* ref = (plMetalTextureRef*)tex->GetDeviceRef(); + if (!ref || ref->IsDirty()) { + CheckTextureRef(tex); + ref = (plMetalTextureRef*)tex->GetDeviceRef(); + } + if (!ref->fTexture) { + IReloadTexture(tex, ref); + } + hsRefCnt_SafeAssign(fLayerRef[0], ref); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 0); + + plAVTexVert ptr[4]; + plAVTexVert vert; + vert.fPos[0] = x; + vert.fPos[1] = y; + vert.fUv[0] = uOff; + vert.fUv[1] = 1.f + vOff; + + // P0 + ptr[2] = vert; + + // P1 + ptr[0] = vert; + ptr[0].fPos[0] += w; + ptr[0].fUv[0] += 1.f; + + // P2 + ptr[1] = vert; + ptr[1].fPos[0] += w; + ptr[1].fUv[0] += 1.f; + ptr[1].fPos[1] += h; + ptr[1].fUv[1] -= 1.f; + + // P3 + ptr[3] = vert; + ptr[3].fPos[1] += h; + ptr[3].fUv[1] -= 1.f; + + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(ptr, sizeof(ptr), 0); + fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveType::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); +} + +void plMetalPipeline::FindFragFunction() +{ + MTL::Library* library = fDevice.fMetalDevice->newDefaultLibrary(); + + NS::Error* error = nullptr; + + MTL::FunctionConstantValues* functionContents = MTL::FunctionConstantValues::alloc()->init(); + short numUVs = 1; + functionContents->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); + functionContents->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumLayers); + + MTL::Function* fragFunction = library->newFunction( + NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), + functionContents, + &error); + fFragFunction = fragFunction; + + functionContents->release(); + library->release(); +} + +/*plPipeline* plPipelineCreate::ICreateMetalPipeline(hsWindowHndl disp, hsWindowHndl hWnd, const hsG3DDeviceModeRecord* devMode) +{ + plMetalPipeline* pipe = new plMetalPipeline(disp, hWnd, devMode); + return pipe; +}*/ + +// IClearShadowSlaves /////////////////////////////////////////////////////////////////////////// +// At EndRender(), we need to clear our list of shadow slaves. They are only valid for one frame. +void plMetalPipeline::IClearShadowSlaves() +{ + int i; + for (i = 0; i < fShadows.size(); i++) { + const plShadowCaster* caster = fShadows[i]->fCaster; + caster->GetKey()->UnRefObject(); + } + fShadows.clear(); +} + +// Create all our video memory consuming D3D objects. +bool plMetalPipeline::ICreateDynDeviceObjects() +{ + // Front/Back/Depth buffers + // if( ICreateNormalSurfaces() ) + // return true; + + // RenderTarget pools are shared for our shadow generation algorithm. + // Different sizes for different resolutions. + IMakeRenderTargetPools(); + + // Create device-specific stuff + fDebugTextMgr = new plDebugTextManager(); + if (fDebugTextMgr == nullptr) + return true; + + // Vertex buffers, index buffers, textures, etc. + LoadResources(); + + return false; +} + +// IReleaseDynDeviceObjects ////////////////////////////////////////////// +// Make sure we aren't holding on to anything, and release all of +// the D3D resources that we normally hang on to forever. Meaning things +// that persist through unloading one age and loading the next. +void plMetalPipeline::IReleaseDynDeviceObjects() +{ + // We should do this earlier, but the textFont objects don't remove + // themselves from their parent objects yet + delete fDebugTextMgr; + fDebugTextMgr = nullptr; + + while (fTextFontRefList) + delete fTextFontRefList; + + while (fRenderTargetRefList) { + plMetalRenderTargetRef* rtRef = fRenderTargetRefList; + rtRef->Release(); + rtRef->Unlink(); + } + + // The shared dynamic vertex buffers used by things like objects skinned on CPU, or + // particle systems. + IReleaseDynamicBuffers(); + // IReleaseAvRTPool(); + IReleaseRenderTargetPools(); +} + +// IReleaseDynamicBuffers ///////////////////////////////////////////////// +// Release everything we've created in POOL_DEFAULT. +// This is called on shutdown or when we lose the device. Search for D3DERR_DEVICELOST. +void plMetalPipeline::IReleaseDynamicBuffers() +{ + // PlateMgr has a POOL_DEFAULT vertex buffer for drawing quads. + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) + pm->IReleaseGeometry(); +} + +// IReleaseRenderTargetPools ////////////////////////////////////////////////// +// Free up all resources assosiated with our pools of rendertargets of varying +// sizes. Primary user of these pools is the shadow generation. +void plMetalPipeline::IReleaseRenderTargetPools() +{ + int i; + + for (i = 0; i < fRenderTargetPool512.size(); i++) { + delete fRenderTargetPool512[i]; + fRenderTargetPool512[i] = nullptr; + } + fRenderTargetPool512.clear(); + + for (i = 0; i < fRenderTargetPool256.size(); i++) { + delete fRenderTargetPool256[i]; + fRenderTargetPool256[i] = nullptr; + } + fRenderTargetPool256.clear(); + + for (i = 0; i < fRenderTargetPool128.size(); i++) { + delete fRenderTargetPool128[i]; + fRenderTargetPool128[i] = nullptr; + } + fRenderTargetPool128.clear(); + + for (i = 0; i < fRenderTargetPool64.size(); i++) { + delete fRenderTargetPool64[i]; + fRenderTargetPool64[i] = nullptr; + } + fRenderTargetPool64.clear(); + + for (i = 0; i < fRenderTargetPool32.size(); i++) { + delete fRenderTargetPool32[i]; + fRenderTargetPool32[i] = nullptr; + } + fRenderTargetPool32.clear(); + + for (i = 0; i < kMaxRenderTargetNext; i++) { + fRenderTargetNext[i] = 0; + // Blur is implemented in Metal through MPS. + // If we need a hand written blur algorithm implement here. + // fBlurScratchRTs[i] = nil; + // fBlurDestRTs[i] = nil; + } + +#ifdef MF_ENABLE_HACKOFF + hackOffscreens.Reset(); +#endif // MF_ENABLE_HACKOFF +} + +/////////////////////////////////////////////////////////////////////////////// +//// ShadowSection +//// Shadow specific internal functions +/////////////////////////////////////////////////////////////////////////////// +// See plGLight/plShadowMaster.cpp for more notes. + +float blurScale = -1.f; +static const int kL2NumSamples = 3; // Log2(4) + +// IPrepShadowCaster //////////////////////////////////////////////////////////////////////// +// Make sure all the geometry in this shadow caster is ready to be rendered. +// Keep in mind the single shadow caster may be multiple spans possibly in +// multiple drawables. +// The tricky part here is that we need to prep each drawable involved, +// but only prep it once. Say the caster is composed of: +// drawableA, span0 +// drawableA, span1 +// drawableB, span0 +// Then we need to call plDrawable::PrepForRender() ONCE on drawableA, +// and once on drawableB. Further, we need to do any necessary CPU +// skinning with ISofwareVertexBlend(drawableA, visList={0,1}) and +// ISofwareVertexBlend(drawableB, visList={1}). +bool plMetalPipeline::IPrepShadowCaster(const plShadowCaster* caster) +{ + static hsBitVector done; + done.Clear(); + const std::vector& castSpans = caster->Spans(); + + int i; + for (i = 0; i < castSpans.size(); i++) { + if (!done.IsBitSet(i)) { + // We haven't already done this castSpan + + plDrawableSpans* drawable = castSpans[i].fDraw; + + // Start a visList with this index. + static std::vector visList; + visList.clear(); + visList.push_back((int16_t)(castSpans[i].fIndex)); + + // We're about to have done this castSpan. + done.SetBit(i); + + // Look forward through castSpans for any other spans + // with the same drawable, and add them to visList. + // We'll handle all the spans from this drawable at once. + int j; + for (j = i + 1; j < castSpans.size(); j++) { + if (!done.IsBitSet(j) && (castSpans[j].fDraw == drawable)) { + // Add to list + visList.push_back((int16_t)(castSpans[j].fIndex)); + + // We're about to have done this castSpan. + done.SetBit(j); + } + } + // That's all, prep the drawable. + drawable->PrepForRender(this); + + // Do any software skinning. + if (!ISoftwareVertexBlend(drawable, visList)) + return false; + } + } + + return true; +} + +// IRenderShadowCaster //////////////////////////////////////////////// +// Render the shadow caster into the slave's render target, creating a shadow map. +bool plMetalPipeline::IRenderShadowCaster(plShadowSlave* slave) +{ + const plShadowCaster* caster = slave->fCaster; + + // Setup to render into the slave's render target. + if (!IPushShadowCastState(slave)) + return false; + + // Get the shadow caster ready to render. + if (!IPrepShadowCaster(slave->fCaster)) + return false; + + // for each shadowCaster.fSpans + int iSpan; + for (iSpan = 0; iSpan < caster->Spans().size(); iSpan++) { + plDrawableSpans* dr = caster->Spans()[iSpan].fDraw; + const plSpan* sp = caster->Spans()[iSpan].fSpan; + uint32_t spIdx = caster->Spans()[iSpan].fIndex; + + hsAssert(sp->fTypeMask & plSpan::kIcicleSpan, "Shadow casting from non-trimeshes not currently supported"); + + // render shadowcaster.fSpans[i] to rendertarget + if (!(sp->fProps & plSpan::kPropNoShadowCast)) + IRenderShadowCasterSpan(slave, dr, *(const plIcicle*)sp); + + // Keep track of which shadow slaves this span was rendered into. + // If self-shadowing is off, we use that to determine not to + // project the shadow map onto its source geometry. + sp->SetShadowBit(slave->fIndex); // index set in SubmitShadowSlave + } + + // Debug only. + if (blurScale >= 0.f) + slave->fBlurScale = blurScale; + + // If this shadow requests being blurred, do it. + if (slave->fBlurScale > 0.f) + fDevice.EncodeBlur(fDevice.GetCurrentCommandBuffer(), fDevice.fCurrentFragmentOutputTexture, slave->fBlurScale); + + // Finished up, restore previous state. + IPopShadowCastState(slave); + +#if MCN_BOUNDS_SPANS + if (IsDebugFlagSet(plPipeDbg::kFlagShowShadowBounds)) { + /// Add a span to our boundsIce to show this + IAddBoundsSpan(fBoundsSpans, &slave->fWorldBounds); + } +#endif // MCN_BOUNDS_SPANS + + return true; +} + +// We have a (possibly empty) list of shadows submitted for this frame. +// At BeginRender, we need to accomplish: +// Find render targets for each shadow request of the requested size. +// Render the associated spans into the render targets. Something like the following: +void plMetalPipeline::IPreprocessShadows() +{ + plProfile_BeginTiming(PrepShadows); + + // Mark our shared resources as free to be used. + IResetRenderTargetPools(); + + // Some board (possibly the Parhelia) freaked if anistropic filtering + // was enabled when rendering to a render target. We never need it for + // shadow maps, and it is slower, so we just kill it here. + // ISetAnisotropy(false); + + // Generate a shadow map for each submitted shadow slave. + // Shadow slave corresponds to one shadow caster paired + // with one shadow light that affects it. So a single caster + // may be in multiple slaves (from different lights), or a + // single light may be in different slaves (affecting different + // casters). The overall number is low in spite of the possible + // permutation explosion, because a slave is only generated + // for a caster being affected (in range etc.) by a light. + int iSlave; + for (iSlave = 0; iSlave < fShadows.size(); iSlave++) { + plShadowSlave* slave = fShadows[iSlave]; + + // Any trouble, remove it from the list for this frame. + if (!IRenderShadowCaster(slave)) { + fShadows.erase(fShadows.begin() + iSlave); + iSlave--; + continue; + } + } + + // Restore + // ISetAnisotropy(true); + + plProfile_EndTiming(PrepShadows); +} + +// IPushShadowCastState //////////////////////////////////////////////////////////////////////////////// +// Push all the state necessary to start rendering this shadow map, but independent of the +// actual shadow caster to be rendered into the map. +bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) +{ + plRenderTarget* renderTarg = IFindRenderTarget(slave->fWidth, slave->fHeight, slave->fView.GetOrthogonal()); + if (!renderTarg) + return false; + + // Let the slave setup the transforms, viewport, etc. necessary to render it's shadow + // map. This just goes into a plViewTransform, we translate that into D3D state ourselves below. + if (!slave->SetupViewTransform(this)) + return false; + + // Set texture to U_LUT + fCurrentRenderPassUniforms->specularSrc = 0.0; + + // if( !ref->fTexture ) + //{ + // if( ref->fData ) + // IReloadTexture( ref ); + // } + // fDevice.SetRenderTarget(ref->fTexture); + + // Push the shadow slave's view transform as our current render state. + fViewStack.push(fView); + fView.SetMaxCullNodes(0); + SetViewTransform(slave->fView); + IProjectionMatrixToDevice(); + + // Push the shadow map as the current render target + PushRenderTarget(renderTarg); + + // We'll be rendering the light space distance to the span fragment into + // alpha (color is white), so our camera space position, transformed into light space + // and then converted to [0..255] via our ULut. + + // METAL NOTE: D3DTSS_TCI_CAMERASPACEPOSITION and D3DTTFF_COUNT3 are hardcoded into the shader + + // Set texture transform to slave's lut transform. See plShadowMaster::IComputeLUT(). + hsMatrix44 castLUT = slave->fCastLUT; + if (slave->fFlags & plShadowSlave::kCastInCameraSpace) { + hsMatrix44 c2w = GetCameraToWorld(); + + castLUT = castLUT * c2w; + } + + simd_float4x4 tXfm; + hsMatrix2SIMD(castLUT, &tXfm); + + fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; + + /*DWORD clearColor = 0xff000000L; +// const int l2NumSamples = kL2NumSamples; // HACKSAMPLE + const int l2NumSamples = mfCurrentTest > 101 ? 3 : 2; + DWORD intens; + if( slave->fBlurScale > 0 ) + { + const int kNumSamples = mfCurrentTest > 101 ? 8 : 4; + int nPasses = (int)ceil(float(kNumSamples) / fMaxLayersAtOnce); + int nSamplesPerPass = kNumSamples / nPasses; + DWORD k = int(128.f / float(nSamplesPerPass)); + intens = (0xff << 24) + | ((128 + k) << 16) + | ((128 + k) << 8) + | ((128 + k) << 0); + clearColor = (0xff << 24) + | ((128 - k) << 16) + | ((128 - k) << 8) + | ((128 - k) << 0); + } + else + intens = 0xffffffff;*/ + + // Note that we discard the shadow caster's alpha here, although we don't + // need to. Even on a 2 texture stage system, we could include the diffuse + // alpha and the texture alpha from the base texture. But we don't. + + // Set color to white. We could accomplish this easier by making the color + // in our ULut white. + /*fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, intens); + + fSettings.fVeryAnnoyingTextureInvalidFlag = true; + fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR); + fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_SELECTARG1); + + fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); + fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP, D3DTOP_SELECTARG1); + fLayerState[0].fBlendFlags = uint32_t(-1); + + // For stage 1 - disable + fLastEndingStage = 1; + fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE); + fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE); + fLayerState[1].fBlendFlags = uint32_t(-1);*/ + + // fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE); + // fD3DDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE); + // fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO); + + // fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS); + + slave->fPipeData = renderTarg; + + // Enable ZBuffering w/ write + // fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE); + // fLayerState[0].fZFlags &= ~hsGMatState::kZMask; + + // Clear the render target: + // alpha to white ensures no shadow where there's no caster + // color to black in case we ever get blurring going + // Z to 1 + // Stencil ignored + if (slave->ReverseZ()) { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fReverseZStencilState); + // fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_GREATEREQUAL); + // fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 0.0f, 0L); + } else { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fDefaultStencilState); + } + + // Bring the viewport in (AFTER THE CLEAR) to protect the alpha boundary. + fView.GetViewTransform().SetViewPort(1, 1, (float)(slave->fWidth - 2), (float)(slave->fHeight - 2), false); + fDevice.SetViewport(); + + // inlEnsureLightingOff(); + + return true; +} + +// ISetupShadowState ////////////////////////////////////////////////////////////////// +// We use the shadow light to modulate the shadow effect in two ways while +// projecting the shadow map onto the scene. +// First, the intensity of the shadow follows the N dot L of the light on +// the surface being projected onto. So on a sphere, the darkening effect +// of the shadow will fall off as the normals go from pointing to the light to +// pointing 90 degrees off. +// Second, we attenuate the whole shadow effect through the lights diffuse color. +// We attenuate for different reasons, like the intensity of the light, or +// to fade out a shadow as it gets too far in the distance to matter. +void plMetalPipeline::ISetupShadowState(plShadowSlave* slave, plShadowState& shadowState) +{ + shadowState.power = slave->fPower; + + slave->fSelfShadowOn = false; + + if (slave->Positional()) { + hsPoint3 position = slave->fLightPos; + shadowState.lightPosition.x = position.fX; + shadowState.lightPosition.y = position.fY; + shadowState.lightPosition.z = position.fZ; + + shadowState.directional = false; + } else { + hsVector3 dir = slave->fLightDir; + shadowState.lightDirection.x = dir.fX; + shadowState.lightDirection.y = dir.fY; + shadowState.lightDirection.z = dir.fZ; + + shadowState.directional = true; + } +} + +// IFindRenderTarget ////////////////////////////////////////////////////////////////// +// Find a matching render target from the pools. We prefer the requested size, but +// will look for a smaller size if there isn't one available. +// Param ortho indicates whether it will be used for orthogonal projection as opposed +// to perspective (directional light vs. point light), but is no longer used. +plRenderTarget* plMetalPipeline::IFindRenderTarget(uint32_t& width, uint32_t& height, bool ortho) +{ + std::vector* pool = nil; + uint32_t* iNext = nil; + // NOT CURRENTLY SUPPORTING NON-SQUARE SHADOWS. IF WE DO, CHANGE THIS. + switch (height) { + case 512: + pool = &fRenderTargetPool512; + iNext = &fRenderTargetNext[9]; + break; + case 256: + pool = &fRenderTargetPool256; + iNext = &fRenderTargetNext[8]; + break; + case 128: + pool = &fRenderTargetPool128; + iNext = &fRenderTargetNext[7]; + break; + case 64: + pool = &fRenderTargetPool64; + iNext = &fRenderTargetNext[6]; + break; + case 32: + pool = &fRenderTargetPool32; + iNext = &fRenderTargetNext[5]; + break; + default: + return nil; + } + plRenderTarget* rt = (*pool)[*iNext]; + if (!rt) { + // We didn't find one, try again the next size down. + if (height > 32) + return IFindRenderTarget(width >>= 1, height >>= 1, ortho); + + // We must be totally out. Oh well. + return nil; + } + (*iNext)++; + + return rt; +} + +//// SharedRenderTargetRef ////////////////////////////////////////////////////// +// Same as MakeRenderTargetRef, except specialized for the shadow map generation. +// The shadow map pools of a given dimension (called RenderTargetPool) all share +// a single depth buffer of that size. This allows sharing on NVidia hardware +// that wants the depth buffer dimensions to match the color buffer size. +// It may be that NVidia hardware doesn't care any more. Contact Matthias +// about that. +hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRenderTarget* owner) +{ + plMetalRenderTargetRef* ref = nullptr; + MTL::Texture* depthSurface = nullptr; + MTL::Texture* texture = nullptr; + MTL::Texture* cTexture = nullptr; + int i; + plCubicRenderTarget* cubicRT; + uint16_t width, height; + + // If we don't already have one to share from, start from scratch. + if (!share) + return MakeRenderTargetRef(owner); + + // hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd"); + +#ifdef HS_DEBUGGING + // Check out the validity of the match. Debug only. + hsAssert(!owner->GetParent() == !share->GetParent(), "Mismatch on shared render target"); + hsAssert(owner->GetWidth() == share->GetWidth(), "Mismatch on shared render target"); + hsAssert(owner->GetHeight() == share->GetHeight(), "Mismatch on shared render target"); + hsAssert(owner->GetZDepth() == share->GetZDepth(), "Mismatch on shared render target"); + hsAssert(owner->GetStencilDepth() == share->GetStencilDepth(), "Mismatch on shared render target"); +#endif // HS_DEBUGGING + + /// Check--is this renderTarget really a child of a cubicRenderTarget? + if (owner->GetParent() != nullptr) { + /// This'll create the deviceRefs for all of its children as well + SharedRenderTargetRef(share->GetParent(), owner->GetParent()); + return owner->GetDeviceRef(); + } + + if (owner->GetDeviceRef() != nullptr) + ref = (plMetalRenderTargetRef*)owner->GetDeviceRef(); + + // Look for a good format of matching color and depth size. + // FIXME: we're hardcoded for a certain tier and we aren't trying to create matching render buffers for efficiency + // if( !IFindRenderTargetInfo(owner, surfFormat, resType) ) + //{ + // hsAssert( false, "Error getting renderTarget info" ); + // return nullptr; + //} + + /// Create the render target now + // Start with the depth. We're just going to share the depth surface on the + // input shareRef. + plMetalRenderTargetRef* shareRef = (plMetalRenderTargetRef*)share->GetDeviceRef(); + hsAssert(shareRef, "Trying to share from a render target with no ref"); + depthSurface = shareRef->fDepthBuffer; + + // FIXME: Add the usage to these textures, they're only accessed by the GPU + // Check for Cubic. This is unlikely, since this function is currently only + // used for the shadow map pools. + cubicRT = plCubicRenderTarget::ConvertNoRef(owner); + if (cubicRT != nullptr) { + /// And create the ref (it'll know how to set all the flags) + if (ref != nullptr) + ref->SetOwner(owner); + else { + ref = new plMetalRenderTargetRef(); + ref->SetOwner(owner); + } + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatRGBA8Uint, owner->GetWidth(), false); + MTL::Texture* cubeTexture = fDevice.fMetalDevice->newTexture(textureDescriptor); + + // hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd"); + if (cubeTexture) { + /// Create a CUBIC texture + for (i = 0; i < 6; i++) { + plRenderTarget* face = cubicRT->GetFace(i); + plMetalRenderTargetRef* fRef; + + if (face->GetDeviceRef() != nullptr) { + fRef = (plMetalRenderTargetRef*)face->GetDeviceRef(); + fRef->SetOwner(face); + if (!fRef->IsLinked()) + fRef->Link(&fRenderTargetRefList); + } else { + plMetalRenderTargetRef* targetRef = new plMetalRenderTargetRef(); + targetRef->SetOwner(face); + face->SetDeviceRef(targetRef); + ((plMetalRenderTargetRef*)face->GetDeviceRef())->Link(&fRenderTargetRefList); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(face->GetDeviceRef()); + } + } + + ref->fTexture = cubeTexture; + } else { + hsRefCnt_SafeUnRef(ref); + ref = nullptr; + } + } + // Is it a texture render target? Probably, since shadow maps are all we use this for. + else if (owner->GetFlags() & plRenderTarget::kIsTexture || owner->GetFlags() & plRenderTarget::kIsOffscreen) { + // DX seperated the onscreen and offscreen types. Metal doesn't care. All render targets are textures. + /// Create a normal texture + if (ref != nullptr) + ref->SetOwner(owner); + else { + ref = new plMetalRenderTargetRef(); + ref->SetOwner(owner); + } + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), owner->GetHeight(), false); + // Give compute shader write access + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + if (texture) { + ref->fTexture = texture; + } else { + hsRefCnt_SafeUnRef(ref); + ref = nullptr; + } + + if (owner->GetZDepth() && (owner->GetFlags() & (plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen))) { + MTL::TextureDescriptor* depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + owner->GetWidth(), + owner->GetHeight(), + false); + + if (fDevice.fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + MTL::Texture* depthBuffer = fDevice.fMetalDevice->newTexture(depthTextureDescriptor); + ref->fDepthBuffer = depthBuffer; + } + } + + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + if (ref != nullptr) { + ref->SetDirty(false); + } + + return ref; +} + +// IMakeRenderTargetPools ///////////////////////////////////////////////////////////// +// These are actually only used as shadow map pools, but they could be used for other +// render targets. +// All these are created here in a single call because they go in POOL_DEFAULT, so they +// must be created before we start creating things in POOL_MANAGED. +void plMetalPipeline::IMakeRenderTargetPools() +{ + // FIXME: We should probably have a release function for the render target pools + // IReleaseRenderTargetPools(); // Just to be sure. + + // Numbers of render targets to be created for each size. + // These numbers were set with multi-player in mind, so should be reconsidered. + // But do keep in mind that there are many things in production assets that cast + // shadows besides the avatar. + plConst(float) kCount[kMaxRenderTargetNext] = { + 0, // 1x1 + 0, // 2x2 + 0, // 4x4 + 0, // 8x8 + 0, // 16x16 + 32, // 32x32 + 16, // 64x64 + 8, // 128x128 + 4, // 256x256 + 0 // 512x512 + }; + int i; + for (i = 0; i < kMaxRenderTargetNext; i++) { + std::vector* pool = nullptr; + switch (i) { + default: + case 0: + case 1: + case 2: + case 3: + case 4: + break; + + case 5: + pool = &fRenderTargetPool32; + break; + case 6: + pool = &fRenderTargetPool64; + break; + case 7: + pool = &fRenderTargetPool128; + break; + case 8: + pool = &fRenderTargetPool256; + break; + case 9: + pool = &fRenderTargetPool512; + break; + } + if (pool) { + pool->resize(kCount[i] + 1); + (*pool)[0] = nil; + (*pool)[(int)(kCount[i])] = nullptr; + int j; + for (j = 0; j < kCount[i]; j++) { + uint16_t flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected; + uint8_t bitDepth = 32; + uint8_t zDepth = 24; + uint8_t stencilDepth = 0; + + // If we ever allow non-square shadows, change this. + int width = 1 << i; + int height = width; + + plRenderTarget* rt = new plRenderTarget(flags, width, height, bitDepth, zDepth, stencilDepth); + + // If we've failed to create our render target ref, we're probably out of + // video memory. We'll return nil, and this guy just doesn't get a shadow + // until more video memory turns up (not likely). + if (!SharedRenderTargetRef((*pool)[0], rt)) { + delete rt; + pool->resize(j + 1); + (*pool)[j] = nullptr; + break; + } + (*pool)[j] = rt; + } + } + } +} + +// IPopShadowCastState /////////////////////////////////////////////////// +// Pop the state set to render this shadow caster, so we're ready to render +// a different shadow caster, or go on to our main render. +bool plMetalPipeline::IPopShadowCastState(plShadowSlave* slave) +{ + fView = fViewStack.top(); + fViewStack.pop(); + + PopRenderTarget(); + fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera; + + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(nullptr, 16); + + return true; +} + +// IResetRenderTargetPools ///////////////////////////////////////////////////////////////// +// No release of resources, this just resets for the start of a frame. So if a shadow +// slave gets a render target from a pool, once this is called (conceptually at the +// end of the frame), the slave no longer owns that render target. +void plMetalPipeline::IResetRenderTargetPools() +{ + int i; + for (i = 0; i < kMaxRenderTargetNext; i++) { + fRenderTargetNext[i] = 0; + // fBlurScratchRTs[i] = nullptr; + // fBlurDestRTs[i] = nullptr; + } + + // fLights.fNextShadowLight = 0; +} + +// IRenderShadowCasterSpan ////////////////////////////////////////////////////////////////////// +// Render the span into a rendertarget of the correct size, generating +// a depth map from this light to that span. +void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span) +{ + // Check that it's ready to render. + plProfile_BeginTiming(CheckDyn); + ICheckDynBuffers(drawable, drawable->GetBufferGroup(span.fGroupIdx), &span); + plProfile_EndTiming(CheckDyn); + + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)drawable->GetVertexRef(span.fGroupIdx, span.fVBufferIdx); + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)drawable->GetIndexRef(span.fGroupIdx, span.fIBufferIdx); + + if (vRef->GetBuffer() == nullptr || iRef->GetBuffer() == nullptr) { + hsAssert(false, "Trying to render a nil buffer pair!"); + return; + } + + /// Switch to the vertex buffer we want + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowCasterPipelineState(&fDevice, vRef).GetRenderPipelineState(); + if (fState.fCurrentPipelineState != linkedPipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fState.fCurrentPipelineState = linkedPipeline->pipelineState; + } + + if (fState.fCurrentVertexBuffer != vRef->GetBuffer()) { + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); + } + + fState.fCurrentVertexBuffer = vRef->GetBuffer(); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + fState.fCurrentCullMode = MTL::CullModeNone; + fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + + uint32_t vStart = span.fVStartIdx; + uint32_t vLength = span.fVLength; + uint32_t iStart = span.fIPackedIdx; + uint32_t iLength = span.fILength; + + plRenderTriListFunc render(&fDevice, 0, vStart, vLength, iStart, iLength); + + static hsMatrix44 emptyMatrix; + hsMatrix44 m = emptyMatrix; + + ISetupTransforms(drawable, span, m); + + bool flip = slave->ReverseCull(); + ISetCullMode(flip); + + render.RenderPrims(); +} + +// IRenderShadowsOntoSpan ///////////////////////////////////////////////////////////////////// +// After doing the usual render for a span (all passes), we call the following. +// If the span accepts shadows, this will loop over all the shadows active this +// frame, and apply the ones that intersect this spans bounds. See below for details. +void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef* vRef) +{ + // We've already computed which shadows affect this span. That's recorded in slaveBits. + const hsBitVector& slaveBits = span->GetShadowSlaves(); + + bool first = true; + + for (size_t i = 0; i < fShadows.size(); i++) { + if (slaveBits.IsBitSet(fShadows[i]->fIndex)) { + // This slave affects this span. + if (first) { + // On the first, we do all the setup that is independent of + // the shadow slave, so state that needs to get set once before + // projecting any number of shadow maps. + ISetupShadowRcvTextureStages(mat); + + first = false; + } + + // Now setup any state specific to this shadow slave. + ISetupShadowSlaveTextures(fShadows[i]); + + // See ISetupShadowLight below for how the shadow light is used. + // The shadow light isn't used in generating the shadow map, it's used + // in projecting the shadow map onto the scene. + plShadowState shadowState; + ISetupShadowState(fShadows[i], shadowState); + + struct plMetalFragmentShaderDescription passDescription{}; + + passDescription.fNumLayers = fCurrNumLayers = 3; + + /* + Things get a wee bit complicated here. + + The texture we want to alpha blend with is already bound to texture 0 or texture 1. + However - the texture co-ords we want are in position 2 in the FVF vertex buffer. (stage 3) + + Build the shader with texture descriptions set properly for textures 0 and 1, + but put the instructions on how to treat the UVW for textures 0 or 1 into + the third stage. + + The shadow cast shader will automatically look in textures 0 and 1 when doing + the third stage blend. This saves us a texture bind. + */ + + passDescription.PopulateTextureInfo(mat->GetLayer(0), 0); + passDescription.Populate(mat->GetLayer(0), 2); + + if (mat->GetNumLayers() > 1) { + passDescription.PopulateTextureInfo(mat->GetLayer(1), 1); + passDescription.Populate(mat->GetLayer(1), 2); + } + // There's no texture for the third stage if we're reusing the textures + // for the first and second stages from the last render. + passDescription.fPassTypes[2] = PassTypeColor; + + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); + if (fState.fCurrentPipelineState != linkedPipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fState.fCurrentPipelineState = linkedPipeline->pipelineState; + } + + int selfShadowNow = span->IsShadowBitSet(fShadows[i]->fIndex); + + // We vary the shadow intensity when self shadowing (see below), + // so we cache whether the shadow light is set for regular or + // self shadowing intensity. If what we're doing now is different + // than what we're currently set for, set it again. + if (selfShadowNow != fShadows[i]->fSelfShadowOn) { + // We lower the power on self shadowing, because the artists like to + // crank up the shadow strength to huge values to get a darker shadow + // on the environment, which causes the shadow on the avatar to get + // way too dark. Another way to look at it is when self shadowing, + // the surface being projected onto is going to be very close to + // the surface casting the shadow (because they are the same object). + if (selfShadowNow) { + plConst(float) kMaxSelfPower = 0.3f; + float power = (float)fShadows[i]->fPower > kMaxSelfPower ? (float)kMaxSelfPower : ((float)fShadows[i]->fPower); + shadowState.power = power; + } else { + shadowState.power = fShadows[i]->fPower; + } + + // record which our intensity is now set for. + fShadows[i]->fSelfShadowOn = selfShadowNow; + } + + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&shadowState, sizeof(shadowState), VertexShaderArgumentShadowState); + +#ifndef PLASMA_EXTERNAL_RELEASE + if (!IsDebugFlagSet(plPipeDbg::kFlagNoShadowApply)) +#endif // PLASMA_EXTERNAL_RELEASE + render.RenderPrims(); + } + } +} + +// ISetupShadowRcvTextureStages //////////////////////////////////////////// +// Set the generic stage states. We'll fill in the specific textures +// for each slave later. +void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) +{ + // Do this first, this normally stomps all over our uniforms + // FIXME: Way to encode layers without stomping all over uniforms? + plMetalMaterialShaderRef* matShader = static_cast(mat->GetDeviceRef()); + // matShader->encodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, 0, 0, nullptr); + + // We're whacking about with renderstate independent of current material, + // so make sure the next span processes it's material, even if it's the + // same one. + fForceMatHandle = true; + + // Set the D3D lighting/material model + ISetShadowLightState(mat); + + // Zbuffering on read-only + + if (fState.fCurrentDepthStencilState != fDevice.fNoZWriteStencilState) { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZWriteStencilState); + fState.fCurrentDepthStencilState = fDevice.fNoZWriteStencilState; + } + + int layerIndex = -1; + // If mat's base layer is alpha'd, and we have > 3 TMU's factor + // in the base layer's alpha. + if ((fMaxLayersAtOnce > 3) && mat->GetLayer(0)->GetTexture() && (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha)) { + plLayerInterface* layer = mat->GetLayer(0); + layerIndex = 0; + + // If the following conditions are met, it means that layer 1 is a better choice to + // get the transparency from. The specific case we're looking for is vertex alpha + // simulated by an invisible second layer alpha LUT (known as the alpha hack). + if ((layer->GetMiscFlags() & hsGMatState::kMiscBindNext) && mat->GetLayer(1) && !(mat->GetLayer(1)->GetMiscFlags() & hsGMatState::kMiscNoShadowAlpha) && !(mat->GetLayer(1)->GetBlendFlags() & hsGMatState::kBlendNoTexAlpha) && mat->GetLayer(1)->GetTexture()) { + layer = mat->GetLayer(1); + layerIndex = 1; + } + + // Normal UVW source. + uint32_t uvwSrc = layer->GetUVWSrc(); + + // Normal UVW source. + fCurrentRenderPassUniforms->uvTransforms[2].UVWSrc = uvwSrc; + // MiscFlags to layer's misc flags + matrix_float4x4 tXfm; + hsMatrix2SIMD(layer->GetTransform(), &tXfm); + fCurrentRenderPassUniforms->uvTransforms[2].transform = tXfm; + } + + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&layerIndex, sizeof(int), FragmentShaderArgumentShadowCastAlphaSrc); +} + +// ISetShadowLightState ////////////////////////////////////////////////////////////////// +// Set the D3D lighting/material model for projecting the shadow map onto this material. +void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) +{ + fCurrLightingMethod = plSpan::kLiteShadow; + + if (mat && mat->GetNumLayers() && mat->GetLayer(0)) + fCurrentRenderPassUniforms->diffuseCol.r = fCurrentRenderPassUniforms->diffuseCol.g = fCurrentRenderPassUniforms->diffuseCol.b = mat->GetLayer(0)->GetOpacity(); + else + fCurrentRenderPassUniforms->diffuseCol.r = fCurrentRenderPassUniforms->diffuseCol.g = fCurrentRenderPassUniforms->diffuseCol.b = 1.f; + fCurrentRenderPassUniforms->diffuseCol.a = 1.f; + + fCurrentRenderPassUniforms->diffuseSrc = 1.0f; + fCurrentRenderPassUniforms->emissiveSrc = 1.0f; + fCurrentRenderPassUniforms->emissiveCol = 0.0f; + fCurrentRenderPassUniforms->specularSrc = 0.0f; + fCurrentRenderPassUniforms->ambientSrc = 0.0f; + fCurrentRenderPassUniforms->globalAmb = 0.0f; +} + +// IDisableLightsForShadow /////////////////////////////////////////////////////////// +// Disable any lights that are enabled. We'll only want the shadow light illuminating +// the surface. +void plMetalPipeline::IDisableLightsForShadow() +{ + // FIXME: Planned for removal - but used by projections. New light code will obsolete. + int i; + for (i = 0; i < 8; i++) { + IDisableLight(i); + } + fLights.count = 0; +} + +// ISetupShadowSlaveTextures ////////////////////////////////////////////// +// Set any state specific to this shadow slave for projecting the slave's +// shadow map onto the surface. +void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) +{ + // D3DMATRIX tXfm; + + hsMatrix44 c2w = GetCameraToWorld(); + + // Stage 0: + // Set Stage 0's texture to the slave's rendertarget. + // Set texture transform to slave's camera to texture transform + plRenderTarget* renderTarg = (plRenderTarget*)slave->fPipeData; + hsAssert(renderTarg, "Processing a slave that hasn't been rendered"); + if (!renderTarg) + return; + plMetalTextureRef* ref = (plMetalTextureRef*)renderTarg->GetDeviceRef(); + hsAssert(ref, "Shadow map ref should have been made when it was rendered"); + if (!ref) + return; + + hsRefCnt_SafeAssign(fLayerRef[0], ref); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 16); + + plMetalShadowCastFragmentShaderArgumentBuffer uniforms; + uniforms.pointLightCast = slave->fView.GetOrthogonal() ? false : true; + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&uniforms, sizeof(plMetalShadowCastFragmentShaderArgumentBuffer), FragmentShaderArgumentShadowCastUniforms); + + hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w; + simd_float4x4 tXfm; + hsMatrix2SIMD(cameraToTexture, &tXfm); + + fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; + fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + + // Stage 1: the lut + // Set the texture transform to slave's fRcvLUT + hsMatrix44 cameraToLut = slave->fRcvLUT * c2w; + hsMatrix2SIMD(cameraToLut, &tXfm); + + fCurrentRenderPassUniforms->uvTransforms[1].UVWSrc = plLayerInterface::kUVWPosition; + fCurrentRenderPassUniforms->uvTransforms[1].transform = tXfm; +} + +/////////////////////////////////////////////////////////////////////////////// +//// View Stuff /////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +//// IIsViewLeftHanded //////////////////////////////////////////////////////// +// Returns true if the combination of the local2world and world2camera +// matrices is left-handed. + +bool plMetalPipeline::IIsViewLeftHanded() +{ + return fView.GetViewTransform().GetOrthogonal() ^ (fView.fLocalToWorldLeftHanded ^ fView.fWorldToCamLeftHanded) ? true : false; +} + +//// ISetCullMode ///////////////////////////////////////////////////////////// +// Tests and sets the current winding order cull mode (CW, CCW, or none). +// Will reverse the cull mode as necessary for left handed camera or local to world +// transforms. +void plMetalPipeline::ISetCullMode(bool flip) +{ + MTL::CullMode newCullMode = !IIsViewLeftHanded() ^ !flip ? MTL::CullModeFront : MTL::CullModeBack; + if (fState.fCurrentCullMode != newCullMode) { + fDevice.CurrentRenderCommandEncoder()->setCullMode(newCullMode); + fState.fCurrentCullMode = newCullMode; + } +} + +plMetalDevice* plMetalPipeline::GetMetalDevice() const +{ + return &fDevice; +} + +//// Local Static Stuff /////////////////////////////////////////////////////// + +// FIXME: CPU avatar stuff that should be evaluated once this moves onto the GPU. + +template +static inline void inlCopy(uint8_t*& src, uint8_t*& dst) +{ + T* src_ptr = reinterpret_cast(src); + T* dst_ptr = reinterpret_cast(dst); + *dst_ptr = *src_ptr; + src += sizeof(T); + dst += sizeof(T); +} + +template +static inline const uint8_t* inlExtract(const uint8_t* src, T* val) +{ + const T* ptr = reinterpret_cast(src); + *val = *ptr++; + return reinterpret_cast(ptr); +} + +template <> +inline const uint8_t* inlExtract(const uint8_t* src, hsPoint3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 1.f; + return reinterpret_cast(src_ptr); +} + +template <> +inline const uint8_t* inlExtract(const uint8_t* src, hsVector3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 0.f; + return reinterpret_cast(src_ptr); +} + +template +static inline void inlSkip(uint8_t*& src) +{ + src += sizeof(T) * N; +} + +template +static inline uint8_t* inlStuff(uint8_t* dst, const T* val) +{ + T* ptr = reinterpret_cast(dst); + *ptr++ = *val; + return reinterpret_cast(ptr); +} + +//// ISoftwareVertexBlend /////////////////////////////////////////////////////// +// Emulate matrix palette operations in software. The big difference between the hardware +// and software versions is we only want to lock the vertex buffer once and blend all the +// verts we're going to in software, so the vertex blend happens once for an entire drawable. +// In hardware, we want the opposite, to break it into managable chunks, manageable meaning +// few enough matrices to fit into hardware registers. So for hardware version, we set up +// our palette, draw a span or few, setup our matrix palette with new matrices, draw, repeat. +bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std::vector& visList) +{ + if (IsDebugFlagSet(plPipeDbg::kFlagNoSkinning)) + return true; + + if (drawable->GetSkinTime() == fRenderCnt) + return true; + + const hsBitVector& blendBits = drawable->GetBlendingSpanVector(); + + if (drawable->GetBlendingSpanVector().Empty()) { + // This sucker doesn't have any skinning spans anyway. Just return + drawable->SetSkinTime(fRenderCnt); + return true; + } + + plProfile_BeginTiming(Skin); + + // lock the data buffer + + // First, figure out which buffers we need to blend. + const int kMaxBufferGroups = 20; + const int kMaxVertexBuffers = 20; + static char blendBuffers[kMaxBufferGroups][kMaxVertexBuffers]; + memset(blendBuffers, 0, kMaxBufferGroups * kMaxVertexBuffers * sizeof(**blendBuffers)); + + hsAssert(kMaxBufferGroups >= drawable->GetNumBufferGroups(), "Bigger than we counted on num groups skin."); + + const std::vector& spans = drawable->GetSpanArray(); + int i; + for (i = 0; i < visList.size(); i++) { + if (blendBits.IsBitSet(visList[i])) { + const plVertexSpan& vSpan = *(plVertexSpan*)spans[visList[i]]; + hsAssert(kMaxVertexBuffers > vSpan.fVBufferIdx, "Bigger than we counted on num buffers skin."); + + blendBuffers[vSpan.fGroupIdx][vSpan.fVBufferIdx] = 1; + drawable->SetBlendingSpanVectorBit(visList[i], false); + } + } + + // Now go through each of the group/buffer (= a real vertex buffer) pairs we found, + // and blend into it. We'll lock the buffer once, and then for each span that + // uses it, set the matrix palette and and then do the blend for that span. + // When we've done all the spans for a group/buffer, we unlock it and move on. + int j; + for (i = 0; i < kMaxBufferGroups; i++) { + for (j = 0; j < kMaxVertexBuffers; j++) { + if (blendBuffers[i][j]) { + // Found one. Do the lock. + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)drawable->GetVertexRef(i, j); + + hsAssert(vRef->fData, "Going into skinning with no place to put results!"); + + uint8_t* destPtr = vRef->fData; + + int k; + for (k = 0; k < visList.size(); k++) { + const plIcicle& span = *(plIcicle*)spans[visList[k]]; + if (span.fGroupIdx == i && span.fVBufferIdx == j) { + plProfile_Inc(NumSkin); + + hsMatrix44* matrixPalette = drawable->GetMatrixPalette(span.fBaseMatrix); + matrixPalette[0] = span.fLocalToWorld; + + uint8_t* ptr = vRef->fOwner->GetVertBufferData(vRef->fIndex); + ptr += span.fVStartIdx * vRef->fOwner->GetVertexSize(); + IBlendVertBuffer((plSpan*)&span, + matrixPalette, span.fNumMatrices, + ptr, + vRef->fOwner->GetVertexFormat(), + vRef->fOwner->GetVertexSize(), + destPtr + span.fVStartIdx * vRef->fVertexSize, + vRef->fVertexSize, + span.fVLength, + span.fLocalUVWChans); + vRef->SetDirty(true); + } + } + // Unlock and move on. + } + } + } + + plProfile_EndTiming(Skin); + + if (drawable->GetBlendingSpanVector().Empty()) { + // Only do this if we've blended ALL of the spans. Thus, this becomes a trivial + // rejection for all the skinning flags being cleared + drawable->SetSkinTime(fRenderCnt); + } + + return true; +} + +//// IBlendVertsIntoBuffer //////////////////////////////////////////////////// +// Given a pointer into a buffer of verts that have blending data in the D3D +// format, blends them into the destination buffer given without the blending +// info. + +void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMatrices, + const uint8_t* src, uint8_t format, uint32_t srcStride, + uint8_t* dest, uint32_t destStride, uint32_t count, + uint16_t localUVWChans) +{ + float pt_buf[] = {0.f, 0.f, 0.f, 1.f}; + float vec_buf[] = {0.f, 0.f, 0.f, 0.f}; + hsPoint3* pt = reinterpret_cast(pt_buf); + hsVector3* vec = reinterpret_cast(vec_buf); + + uint32_t indices; + float weights[4]; + + // Dropped support for localUVWChans at templatization of code + hsAssert(localUVWChans == 0, "support for skinned UVWs dropped. reimplement me?"); + const size_t uvChanSize = plGBufferGroup::CalcNumUVs(format) * sizeof(float) * 3; + uint8_t numWeights = (format & plGBufferGroup::kSkinWeightMask) >> 4; + + for (uint32_t i = 0; i < count; ++i) { + // Extract data + src = inlExtract(src, pt); + + float weightSum = 0.f; + for (uint8_t j = 0; j < numWeights; ++j) { + src = inlExtract(src, &weights[j]); + weightSum += weights[j]; + } + weights[numWeights] = 1.f - weightSum; + + if (format & plGBufferGroup::kSkinIndices) + src = inlExtract(src, &indices); + else + indices = 1 << 8; + src = inlExtract(src, vec); + + // Destination buffers (float4 for SSE alignment) + simd_float4 destNorm_buf = (simd_float4){0.f, 0.f, 0.f, 0.f}; + simd_float4 destPt_buf = (simd_float4){0.f, 0.f, 0.f, 1.f}; + + simd_float4x4 simdMatrix; + + // Blend + for (uint32_t j = 0; j < numWeights + 1; ++j) { + hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix); + if (weights[j]) { + // Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine. + destPt_buf += simd_mul(*(simd_float4*)pt_buf, simdMatrix) * weights[j]; + destNorm_buf += simd_mul(*(simd_float4*)vec_buf, simdMatrix) * weights[j]; + } + // ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf); + indices >>= 8; + } + // Probably don't really need to renormalize this. There errors are + // going to be subtle and "smooth". + /* hsFastMath::NormalizeAppr(destNorm); */ + + // Slam data into position now + dest = inlStuff(dest, reinterpret_cast(&destPt_buf)); + dest = inlStuff(dest, reinterpret_cast(&destNorm_buf)); + + // Jump past colors and UVws + dest += sizeof(uint32_t) * 2 + uvChanSize; + src += sizeof(uint32_t) * 2 + uvChanSize; + } +} + +// Resource checking + +// CheckTextureRef ////////////////////////////////////////////////////// +// Make sure the given layer's texture has background D3D resources allocated. +void plMetalPipeline::CheckTextureRef(plLayerInterface* layer) +{ + plBitmap* bitmap = layer->GetTexture(); + + if (bitmap) { + CheckTextureRef(bitmap); + } +} + +void plMetalPipeline::CheckTextureRef(plBitmap* bitmap) +{ + plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); + + if (!tRef) { + tRef = static_cast(MakeTextureRef(bitmap)); + } + + // If it's dirty, refill it. + if (tRef->IsDirty()) { + IReloadTexture(bitmap, tRef); + } +} + +hsGDeviceRef* plMetalPipeline::MakeTextureRef(plBitmap* bitmap) +{ + plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); + + if (!tRef) { + tRef = new plMetalTextureRef(); + + fDevice.SetupTextureRef(bitmap, tRef); + } + + if (!tRef->IsLinked()) { + tRef->Link(&fTextureRefList); + } + + // Make sure it has all resources created. + fDevice.CheckTexture(tRef); + + // If it's dirty, refill it. + if (tRef->IsDirty()) { + IReloadTexture(bitmap, tRef); + } + return tRef; +} + +void plMetalPipeline::IReloadTexture(plBitmap* bitmap, plMetalTextureRef* ref) +{ + plMipmap* mip = plMipmap::ConvertNoRef(bitmap); + if (mip) { + fDevice.MakeTextureRef(ref, mip); + return; + } + + plCubicEnvironmap* cubic = plCubicEnvironmap::ConvertNoRef(bitmap); + if (cubic) { + fDevice.MakeCubicTextureRef(ref, cubic); + return; + } +} + +// CheckVertexBufferRef ///////////////////////////////////////////////////// +// Make sure the buffer group has a valid buffer ref and that it is up to date. +void plMetalPipeline::CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) +{ + // First, do we have a device ref at this index? + plMetalVertexBufferRef* vRef = static_cast(owner->GetVertexBufferRef(idx)); + + // If not + if (!vRef) { + // Make the blank ref + vRef = new plMetalVertexBufferRef(); + + fDevice.SetupVertexBufferRef(owner, idx, vRef); + } + + if (!vRef->IsLinked()) { + vRef->Link(&fVtxBuffRefList); + } + + // One way or another, we now have a vbufferref[idx] in owner. + // Now, does it need to be (re)filled? + // If the owner is volatile, then we hold off. It might not + // be visible, and we might need to refill it again if we + // have an overrun of our dynamic buffer. + if (!vRef->Volatile()) { + // If it's a static buffer, allocate a vertex buffer for it. + fDevice.CheckStaticVertexBuffer(vRef, owner, idx); + + // Might want to remove this assert, and replace it with a dirty check + // if we have static buffers that change very seldom rather than never. + hsAssert(!vRef->IsDirty(), "Non-volatile vertex buffers should never get dirty"); + } else { + // Make sure we're going to be ready to fill it. + if (!vRef->fData && (vRef->fFormat != owner->GetVertexFormat())) { + vRef->fData = new uint8_t[vRef->fCount * vRef->fVertexSize]; + fDevice.FillVolatileVertexBufferRef(vRef, owner, idx); + } + } +} + +// CheckIndexBufferRef ///////////////////////////////////////////////////// +// Make sure the buffer group has an index buffer ref and that its data is current. +void plMetalPipeline::CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) +{ + plMetalIndexBufferRef* iRef = static_cast(owner->GetIndexBufferRef(idx)); + + if (!iRef) { + // Create one from scratch. + iRef = new plMetalIndexBufferRef(); + + fDevice.SetupIndexBufferRef(owner, idx, iRef); + } + + if (!iRef->IsLinked()) { + iRef->Link(&fIdxBuffRefList); + } + + // Make sure it has all resources created. + fDevice.CheckIndexBuffer(iRef); + + // If it's dirty, refill it. + if (iRef->IsDirty()) { + fDevice.FillIndexBufferRef(iRef, owner, idx); + } +} + +//// IGetBufferFormatSize ///////////////////////////////////////////////////// +// Calculate the vertex stride from the given format. +uint32_t plMetalPipeline::IGetBufferFormatSize(uint8_t format) const +{ + uint32_t size = sizeof(float) * 6 + sizeof(uint32_t) * 2; // Position and normal, and two packed colors + + switch (format & plGBufferGroup::kSkinWeightMask) { + case plGBufferGroup::kSkinNoWeights: + break; + case plGBufferGroup::kSkin1Weight: + size += sizeof(float); + break; + default: + hsAssert(false, "Invalid skin weight value in IGetBufferFormatSize()"); + } + + size += sizeof(float) * 3 * plGBufferGroup::CalcNumUVs(format); + + return size; +} + +void plMetalPipeline::plMetalPipelineCurrentState::Reset() +{ + fCurrentPipelineState = nullptr; + fCurrentDepthStencilState = nullptr; + fCurrentVertexBuffer = nullptr; + fCurrentCullMode.reset(); + + for (auto& layer : layerStates) { + layer.clampFlag = hsGMatState::hsGMatClampFlags(-1); + } +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h new file mode 100644 index 0000000000..4d819490c7 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -0,0 +1,295 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalPipeline_inc_ +#define _plMetalPipeline_inc_ + +#include +#include +#include + +#include "ShaderTypes.h" +#include "plMetalDevice.h" +#include "plPipeline/hsG3DDeviceSelector.h" +#include "plPipeline/pl3DPipeline.h" + +class plIcicle; +class plPlate; +class plMetalMaterialShaderRef; +class plAuxSpan; +class plMetalVertexShader; +class plMetalFragmentShader; +class plShadowCaster; + +const uint kMaxSkinWeightsPerMaterial = 3; + +class plMetalEnumerate +{ +public: + plMetalEnumerate() + { + hsG3DDeviceSelector::AddDeviceEnumerator(&plMetalEnumerate::Enumerate); + } + +private: + static void Enumerate(std::vector& records); +}; + +//// Helper Classes /////////////////////////////////////////////////////////// + +//// The RenderPrimFunc lets you have one function which does a lot of stuff +// around the actual call to render whatever type of primitives you have, instead +// of duplicating everything because the one line to render is different. +class plRenderPrimFunc +{ +public: + virtual bool RenderPrims() const = 0; // return true on error +}; + +class plMetalPipeline : public pl3DPipeline +{ +public: + // caching the frag function here so that the shader compiler can quickly access it + MTL::Function* fFragFunction; + +protected: + friend class plMetalDevice; + friend class plMetalPlateManager; + friend class plMetalMaterialShaderRef; + friend class plRenderTriListFunc; + friend class plMetalTextFont; + + plMetalMaterialShaderRef* fMatRefList; + plMetalRenderTargetRef* fRenderTargetRefList; + +public: + plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord* devMode); + ~plMetalPipeline(); + + CLASSNAME_REGISTER(plMetalPipeline); + GETINTERFACE_ANY(plMetalPipeline, plPipeline); + + /* All of these virtual methods are not implemented by pl3DPipeline and + * need to be re-implemented here! + */ + + /*** VIRTUAL METHODS ***/ + bool PreRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr = nullptr) override; + bool PrepForRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr = nullptr) override; + plTextFont* MakeTextFont(ST::string face, uint16_t size) override; + bool OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) override; + bool CloseAccess(plAccessSpan& acc) override; + void PushRenderRequest(plRenderRequest* req) override; + void PopRenderRequest(plRenderRequest* req) override; + void ClearRenderTarget(plDrawable* d) override; + void ClearRenderTarget(const hsColorRGBA* col = nullptr, const float* depth = nullptr) override; + hsGDeviceRef* MakeRenderTargetRef(plRenderTarget* owner) override; + bool BeginRender() override; + bool EndRender() override; + void RenderScreenElements() override; + bool IsFullScreen() const override; + void Resize(uint32_t width, uint32_t height) override; + void LoadResources() override; + bool SetGamma(float eR, float eG, float eB) override; + bool SetGamma(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) override; + bool SetGamma10(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) override; + bool Supports10BitGamma() const override { return true; }; + bool CaptureScreen(plMipmap* dest, bool flipVertical = false, uint16_t desiredWidth = 0, uint16_t desiredHeight = 0) override; + plMipmap* ExtractMipMap(plRenderTarget* targ) override; + void GetSupportedDisplayModes(std::vector* res, int ColorDepth = 32) override; + int GetMaxAnisotropicSamples() override; + int GetMaxAntiAlias(int Width, int Height, int ColorDepth) override; + void ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync = false) override; + void RenderSpans(plDrawableSpans* ice, const std::vector& visList) override; + void ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, hsMatrix44& lastL2W); + bool ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase); + bool IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef); + void IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, + hsGDeviceRef* ib, hsGMaterial* material, + uint32_t vStart, uint32_t vLength, + uint32_t iStart, uint32_t iLength); + void IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux); + void IRenderAuxSpans(const plSpan& span); + bool IHandleMaterialPass(hsGMaterial* material, uint32_t pass, const plSpan* currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders = true); + plMetalDevice* GetMetalDevice() const; + + // Create and/or Refresh geometry buffers + void CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) override; + void CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) override; + void CheckTextureRef(plLayerInterface* lay) override; + void CheckTextureRef(plBitmap* bitmap); + hsGDeviceRef* MakeTextureRef(plBitmap* bitmap); + void IReloadTexture(plBitmap* bitmap, plMetalTextureRef* ref); + + uint32_t IGetBufferFormatSize(uint8_t format) const; + + plRenderTarget* PopRenderTarget() override; + + MTL::PixelFormat GetFramebufferFormat() { return fDevice.GetFramebufferFormat(); }; + void SetFramebufferFormat(MTL::PixelFormat format) { fDevice.SetFramebufferFormat(format); }; + +private: + VertexUniforms* fCurrentRenderPassUniforms; + + bool fIsFullscreen; + + void FindFragFunction(); + + void ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj = false); + void IEnableLight(size_t i, plLightInfo* light); + void IDisableLight(size_t i); + void IScaleLight(size_t i, float scale); + void ICalcLighting(plMetalMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan); + void IHandleBlendMode(hsGMatState flags); + void IHandleZMode(hsGMatState flags); + + void IDrawPlate(plPlate* plate); + void IPreprocessAvatarTextures(); + void IDrawClothingQuad(float x, float y, float w, float h, + float uOff, float vOff, plMipmap* tex); + void IClearShadowSlaves(); + + void ICreateDeviceObjects(); + void IReleaseDynDeviceObjects(); + bool ICreateDynDeviceObjects(); + void IReleaseDynamicBuffers(); + void IReleaseDeviceObjects(); + + bool IIsViewLeftHanded(); + void ISetCullMode(bool flip = false); + + plLayerInterface* IPushOverBaseLayer(plLayerInterface* li); + plLayerInterface* IPopOverBaseLayer(plLayerInterface* li); + plLayerInterface* IPushOverAllLayer(plLayerInterface* li); + plLayerInterface* IPopOverAllLayer(plLayerInterface* li); + + void IPushPiggyBacks(hsGMaterial* mat); + void IPopPiggyBacks(); + void IPushProjPiggyBack(plLayerInterface* li); + void IPopProjPiggyBacks(); + size_t ISetNumActivePiggyBacks(); + bool ICheckAuxBuffers(const plAuxSpan* span); + + void ISetPipeConsts(plShader* shader); + bool ISetShaders(const plMetalVertexBufferRef* vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader); + + bool ISoftwareVertexBlend(plDrawableSpans* drawable, const std::vector& visList); + void IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMatrices, + const uint8_t* src, uint8_t format, uint32_t srcStride, + uint8_t* dest, uint32_t destStride, uint32_t count, + uint16_t localUVWChans); + + plMetalVertexShader* fVShaderRefList; + plMetalFragmentShader* fPShaderRefList; + bool IPrepShadowCaster(const plShadowCaster* caster); + bool IRenderShadowCaster(plShadowSlave* slave); + void IPreprocessShadows(); + bool IPushShadowCastState(plShadowSlave* slave); + plRenderTarget* IFindRenderTarget(uint32_t& width, uint32_t& height, bool ortho); + bool IPopShadowCastState(plShadowSlave* slave); + void IResetRenderTargetPools(); + void IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span); + plMetalTextureRef* fULutTextureRef; + void IMakeRenderTargetPools(); + hsGDeviceRef* SharedRenderTargetRef(plRenderTarget* share, plRenderTarget* owner); + void IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef* vRef); + void ISetupShadowRcvTextureStages(hsGMaterial* mat); + void ISetupShadowSlaveTextures(plShadowSlave* slave); + void ISetShadowLightState(hsGMaterial* mat); + void ISetupShadowState(plShadowSlave* slave, plShadowState& shadowState); + void IDisableLightsForShadow(); + void IReleaseRenderTargetPools(); + void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); + void IRenderProjections(const plRenderPrimFunc& render, const plMetalVertexBufferRef* vRef); + void IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li, const plMetalVertexBufferRef* vRef); + + void ISetLayer(uint32_t lay); + + // Shadows + std::vector fRenderTargetPool512; + std::vector fRenderTargetPool256; + std::vector fRenderTargetPool128; + std::vector fRenderTargetPool64; + std::vector fRenderTargetPool32; + enum + { + kMaxRenderTargetNext = 10 + }; + uint32_t fRenderTargetNext[kMaxRenderTargetNext]; + + std::vector fProjEach; + std::vector fProjAll; + + uint32_t fCurrRenderLayer; + + void PushCurrentLightSources(); + void PopCurrentLightSources(); + plMetalLights fLights; + std::vector fLightSourceStack; + + static plMetalEnumerate enumerator; + + plTextFont* fTextFontRefList; + + NS::AutoreleasePool* fCurrentPool; + + /// Describes the state for the "fixed function" shader. + struct plMetalPipelineCurrentState + { + // notes state of a given layer for a draw pass + // index is the offset from the curent root layer + // for the draw pass, not the overall index in the + // material + struct plMetalPipelineLayerState + { + hsGMatState::hsGMatClampFlags clampFlag; + } layerStates[8]; + + std::optional fCurrentCullMode; + const MTL::RenderPipelineState* fCurrentPipelineState; + MTL::Buffer* fCurrentVertexBuffer; + MTL::DepthStencilState* fCurrentDepthStencilState; + + void Reset(); + } fState; +}; + +#endif // _plGLPipeline_inc_ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp new file mode 100644 index 0000000000..1225f4c452 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -0,0 +1,487 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include "plMetalPipelineState.h" + +#include "plDrawable/plGBufferGroup.h" +#include "plGImage/plCubicEnvironmap.h" +#include "plGImage/plMipmap.h" +#include "plMetalDevice.h" +#include "plMetalMaterialShaderRef.h" +#include "plPipeline/plCubicRenderTarget.h" +#include "plPipeline/plRenderTarget.h" +#include "plSurface/hsGMaterial.h" +#include "plSurface/plLayerInterface.h" + +size_t plMetalPipelineState::GetHash() const +{ + return std::hash()(GetID()); +} + +plMetalPipelineState::plMetalPipelineState(plMetalDevice* device) + : fDevice(device) +{ +} + +plMetalRenderSpanPipelineState::plMetalRenderSpanPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) + : plMetalPipelineState(device) +{ + fNumUVs = plGBufferGroup::CalcNumUVs(vRef->fFormat); + fNumWeights = (vRef->fFormat & plGBufferGroup::kSkinWeightMask) >> 4; + fHasSkinIndices = (vRef->fFormat & plGBufferGroup::kSkinIndices); +} + +void plMetalRenderSpanPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const +{ + ushort numUVs = fNumUVs; + constants->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); + constants->setConstantValue(&fNumWeights, MTL::DataTypeUChar, FunctionConstantNumWeights); +} + +size_t plMetalRenderSpanPipelineState::GetHash() const +{ + size_t h1 = std::hash()(fNumUVs); + size_t h2 = std::hash()(fNumWeights); + size_t h3 = std::hash()(fHasSkinIndices); + + return h1 ^ h2 ^ h3 ^ plMetalPipelineState::GetHash(); +} + +plMetalDevice::plMetalLinkedPipeline* plMetalPipelineState::GetRenderPipelineState() +{ + return fDevice->PipelineState(this); +} + +void plMetalPipelineState::PrewarmRenderPipelineState() +{ + fDevice->PrewarmPipelineStateFor(this); +} + +plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription& description) + : plMetalRenderSpanPipelineState(device, vRef) +{ + fFragmentShaderDescription = description; + fFragmentShaderDescription.CacheHash(); +} + +void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const +{ + plMetalRenderSpanPipelineState::GetFunctionConstants(constants); + constants->setConstantValue(&fFragmentShaderDescription.fNumLayers, MTL::DataTypeUChar, FunctionConstantNumLayers); + constants->setConstantValues(&fFragmentShaderDescription.fPassTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); + constants->setConstantValues(&fFragmentShaderDescription.fBlendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); + constants->setConstantValues(&fFragmentShaderDescription.fMiscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); +} + +size_t plMetalMaterialPassPipelineState::GetHash() const +{ + size_t value = plMetalRenderSpanPipelineState::GetHash(); + value ^= fFragmentShaderDescription.GetHash(); + + return value; +} + +void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) +{ + int vertOffset = 0; + int skinWeightOffset = vertOffset + sizeof(hsPoint3); + if (fHasSkinIndices) { + skinWeightOffset += sizeof(uint32_t); + } + int normOffset = skinWeightOffset + (sizeof(float) * fNumWeights); + int colorOffset = normOffset + sizeof(hsPoint3); + int baseUvOffset = colorOffset + (sizeof(uint32_t) * 2); + int stride = baseUvOffset + sizeof(hsPoint3) * fNumUVs; + + vertexDescriptor->attributes()->object(VertexAttributePosition)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributePosition)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributePosition)->setOffset(vertOffset); + + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setOffset(normOffset); + + if (fNumWeights > 0) { + int weightOneOffset = skinWeightOffset; + + vertexDescriptor->attributes()->object(VertexAttributeWeights)->setFormat(MTL::VertexFormatFloat); + vertexDescriptor->attributes()->object(VertexAttributeWeights)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeWeights)->setOffset(weightOneOffset); + } + + for (int i = 0; i < fNumUVs; i++) { + vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setOffset(baseUvOffset + (i * sizeof(hsPoint3))); + } + + vertexDescriptor->attributes()->object(VertexAttributeColor)->setFormat(MTL::VertexFormatUChar4); + vertexDescriptor->attributes()->object(VertexAttributeColor)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeColor)->setOffset(colorOffset); + + vertexDescriptor->layouts()->object(VertexAttributePosition)->setStride(stride); +} + +void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor* descriptor) +{ + if (blendMode & hsGMatState::kBlendNoColor) { + // printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + return; + } + switch (blendMode & hsGMatState::kBlendMask) { + // Detail is just a special case of alpha, handled in construction of the texture + // mip chain by making higher levels of the chain more transparent. + case hsGMatState::kBlendDetail: + case hsGMatState::kBlendAlpha: + if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { + if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + } else { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + } + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); + } else { + if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + } else { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + } + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + } + break; + + // Multiply the final color onto the frame buffer. + case hsGMatState::kBlendMult: + if (blendMode & hsGMatState::kBlendInvertFinalColor) { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceColor); + } else { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceColor); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceColor); + } + break; + + // Add final color to FB. + case hsGMatState::kBlendAdd: + descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + break; + + // Multiply final color by FB color and add it into the FB. + case hsGMatState::kBlendMADD: + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorDestinationColor); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + break; + + // Final color times final alpha, added into the FB. + case hsGMatState::kBlendAddColorTimesAlpha: + if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + } else { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + } + break; + + // Overwrite final color onto FB + case 0: + descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); + descriptor->setAlphaBlendOperation(MTL::BlendOperationAdd); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); + break; + + default: { + /*hsAssert(false, "Too many blend modes specified in material"); + plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); + if( lay ) + { + if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha ) + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); + } + else + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); + } + }*/ + } break; + } +} + +MTL::Function* plMetalMaterialPassPipelineState::GetVertexFunction(MTL::Library* library) +{ + NS::Error* error = nullptr; + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); + GetFunctionConstants(constants); + MTL::Function* function = library->newFunction( + NS::String::string("pipelineVertexShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + &error) + ->autorelease(); + return function; +} + +MTL::Function* plMetalMaterialPassPipelineState::GetFragmentFunction(MTL::Library* library) +{ + return library->newFunction( + NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error**)nullptr) + ->autorelease(); +} + +plMetalMaterialPassPipelineState::~plMetalMaterialPassPipelineState() +{ +} + +const NS::String* plMetalMaterialPassPipelineState::GetDescription() +{ + return MTLSTR("Material Pipeline"); +} + +void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) +{ + uint32_t blendMode = fFragmentShaderDescription.fBlendModes[0]; + ConfigureBlendMode(blendMode, descriptor); +} + +void plMetalFragmentShaderDescription::Populate(const plLayerInterface* layPtr, const uint8_t index) +{ + fBlendModes[index] = layPtr->GetBlendFlags(); + fMiscFlags[index] = layPtr->GetMiscFlags(); + PopulateTextureInfo(layPtr, index); +} + +void plMetalFragmentShaderDescription::PopulateTextureInfo(const plLayerInterface* layPtr, const uint8_t index) +{ + plBitmap* texture = layPtr->GetTexture(); + if (texture != nullptr) { + if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { + fPassTypes[index] = PassTypeCubicTexture; + } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { + fPassTypes[index] = PassTypeTexture; + } else { + fPassTypes[index] = PassTypeColor; + } + + } else { + fPassTypes[index] = PassTypeColor; + } +} + +bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState& p) const +{ + return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fFragmentShaderDescription == fFragmentShaderDescription; +} + +MTL::Function* plMetalRenderShadowPipelineState::GetVertexFunction(MTL::Library* library) +{ + return library->newFunction( + NS::String::string("shadowCastVertexShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error**)nullptr) + ->autorelease(); +} + +MTL::Function* plMetalRenderShadowPipelineState::GetFragmentFunction(MTL::Library* library) +{ + return library->newFunction( + NS::String::string("shadowCastFragmentShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error**)nullptr) + ->autorelease(); +} + +void plMetalRenderShadowPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) +{ + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); +} + +const MTL::Function* plMetalRenderShadowCasterPipelineState::GetVertexFunction(MTL::Library* library) +{ + NS::Error* error = nullptr; + MTL::Function* function = library->newFunction( + MTLSTR("shadowVertexShader"), + MakeFunctionConstants(), + &error) + ->autorelease(); + return function; +} + +const MTL::Function* plMetalRenderShadowCasterPipelineState::GetFragmentFunction(MTL::Library* library) +{ + NS::Error* error = nullptr; + MTL::Function* function = library->newFunction( + MTLSTR("shadowFragmentShader"), + MakeFunctionConstants(), + &error) + ->autorelease(); + return function; +} + +const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL::Library* library) +{ + MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); + MTL::Function* vertFunction; + // map the original engine vertex shader id to the pixel shader function + switch (fVertexShaderID) { + case plShaderID::vs_WaveFixedFin7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveFixedFin7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::vs_CompCosines: + vertFunction = library->newFunction( + NS::String::string("vs_CompCosines", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::vs_BiasNormals: + vertFunction = library->newFunction( + NS::String::string("vs_BiasNormals", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::vs_GrassShader: + vertFunction = library->newFunction( + NS::String::string("vs_GrassShader", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::vs_WaveDecEnv_7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveDecEnv_7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::vs_WaveDec1Lay_7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveDec1Lay_7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::vs_WaveRip7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveRip7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + default: + hsAssert(0, "unknown shader requested"); + } + return vertFunction; +} + +const MTL::Function* plMetalDynamicMaterialPipelineState::GetFragmentFunction(MTL::Library* library) +{ + MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); + MTL::Function* fragFunction; + // map the original engine pixel shader id to the pixel shader function + switch (fFragmentShaderID) { + case plShaderID::ps_WaveFixed: + fragFunction = library->newFunction( + NS::String::string("ps_WaveFixed", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::ps_MoreCosines: + fragFunction = library->newFunction( + NS::String::string("ps_CompCosines", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::ps_BiasNormals: + fragFunction = library->newFunction( + NS::String::string("ps_BiasNormals", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::ps_GrassShader: + fragFunction = library->newFunction( + NS::String::string("ps_GrassShader", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::ps_WaveDecEnv: + fragFunction = library->newFunction( + NS::String::string("ps_WaveDecEnv", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::ps_CbaseAbase: + fragFunction = library->newFunction( + NS::String::string("ps_CbaseAbase", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + case plShaderID::ps_WaveRip: + fragFunction = library->newFunction( + NS::String::string("ps_WaveRip", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); + break; + default: + hsAssert(0, "unknown shader requested"); + } + return fragFunction; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h new file mode 100644 index 0000000000..0f5cf4114d --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -0,0 +1,429 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plMetalPipelineState_h +#define plMetalPipelineState_h + +#include + +#include + +#include "plMetalDevice.h" +#include "plSurface/plShaderTable.h" + +enum plMetalPipelineType +{ + // Unknown is for abstract types, don't use it + Unknown = 0, + MaterialShader, + ShadowCaster, + ShadowRender, + Clear, + Dynamic, + Text, + Plate +}; + +//MARK: Base pipeline state + +class plMetalPipelineState +{ +public: + plMetalPipelineState(plMetalDevice* device); + virtual ~plMetalPipelineState() = default; + + plMetalDevice::plMetalLinkedPipeline* GetRenderPipelineState(); + void PrewarmRenderPipelineState(); + bool operator==(const plMetalPipelineState& p) const + { + if ((&p)->GetID() != GetID()) { + return false; + } else { + return IsEqual(p); + } + } + virtual size_t GetHash() const; + virtual bool IsEqual(const plMetalPipelineState& p) const = 0; + virtual uint16_t GetID() const { return plMetalPipelineType::Unknown; }; + virtual plMetalPipelineState* Clone() = 0; + + // + virtual const MTL::Function* GetVertexFunction(MTL::Library* library) = 0; + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) = 0; + virtual const NS::String* GetDescription() = 0; + + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; + virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) = 0; + +protected: + plMetalDevice* fDevice; + virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const = 0; + MTL::FunctionConstantValues* MakeFunctionConstants() const + { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); + GetFunctionConstants(constants); + return constants; + } +}; + +//MARK: Abstract FVF vertex shader program parent type + +class plMetalRenderSpanPipelineState : public plMetalPipelineState +{ +public: + plMetalRenderSpanPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef); + bool IsEqual(const plMetalPipelineState& p) const override + { + const plMetalRenderSpanPipelineState* renderSpanPipelineSate = static_cast(&p); + if (!renderSpanPipelineSate) { + return false; + } + return renderSpanPipelineSate->fNumUVs == fNumUVs && renderSpanPipelineSate->fNumWeights == fNumWeights && renderSpanPipelineSate->fHasSkinIndices == fHasSkinIndices; + }; + size_t GetHash() const override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override = 0; + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override; + + void ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor* descriptor); + +protected: + uint8_t fNumUVs; + uint8_t fNumWeights; + bool fHasSkinIndices; + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; + MTL::FunctionConstantValues* MakeFunctionConstants() + { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); + GetFunctionConstants(constants); + return constants; + } +}; + +//MARK: Fixed function emulating material program + +struct plMetalFragmentShaderDescription +{ + uint8_t fPassTypes[8]; + uint32_t fBlendModes[8]; + uint32_t fMiscFlags[8]; + uint8_t fNumLayers; + + size_t hash; + + bool operator==(const plMetalFragmentShaderDescription& p) const + { + bool match = fNumLayers == p.fNumLayers && memcmp(fPassTypes, p.fPassTypes, sizeof(fPassTypes)) == 0 && memcmp(fBlendModes, p.fBlendModes, sizeof(fBlendModes)) == 0 && memcmp(fMiscFlags, p.fMiscFlags, sizeof(fMiscFlags)) == 0; + return match; + } + + void CacheHash() + { + if (!hash) + hash = GetHash(); + } + + size_t GetHash() const + { + if (hash) + return hash; + + std::size_t value = std::hash()(fNumLayers); + value ^= std::hash()(fNumLayers); + + for (int i = 0; i < 8; i++) { + value ^= std::hash()(fBlendModes[i]); + } + + for (int i = 0; i < 8; i++) { + value ^= std::hash()(fMiscFlags[i]); + } + + for (int i = 0; i < 8; i++) { + value ^= std::hash()(fPassTypes[i]); + } + + return value; + } + + void Populate(const plLayerInterface* layPtr, const uint8_t index); + void PopulateTextureInfo(const plLayerInterface* layPtr, const uint8_t index); +}; + +template <> +struct std::hash +{ + size_t operator()(plMetalFragmentShaderDescription const& s) const noexcept + { + return s.GetHash(); + } +}; + +class plMetalMaterialPassPipelineState : public plMetalRenderSpanPipelineState +{ +public: + plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription& description); + size_t GetHash() const override; + MTL::Function* GetVertexFunction(MTL::Library* library) override; + MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + const NS::String* GetDescription() override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; + + bool IsEqual(const plMetalPipelineState& p) const override; + + uint16_t GetID() const override { return plMetalPipelineType::MaterialShader; }; + + plMetalPipelineState* Clone() override + { + return new plMetalMaterialPassPipelineState(*this); + } + ~plMetalMaterialPassPipelineState(); + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; + +protected: + plMetalFragmentShaderDescription fFragmentShaderDescription; +}; + +//MARK: Shadow casting program + +class plMetalRenderShadowCasterPipelineState : public plMetalRenderSpanPipelineState +{ +public: + plMetalRenderShadowCasterPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) + : plMetalRenderSpanPipelineState(device, vRef) + { + } + const MTL::Function* GetVertexFunction(MTL::Library* library) override; + const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + const NS::String* GetDescription() override + { + return MTLSTR("Shadow Caster Pipeline"); + }; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override + { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); + }; + uint16_t GetID() const override { return plMetalPipelineType::ShadowCaster; }; + + plMetalPipelineState* Clone() override + { + return new plMetalRenderShadowCasterPipelineState(*this); + } +}; + +//MARK: Shadow rendering program + +class plMetalRenderShadowPipelineState : public plMetalMaterialPassPipelineState +{ +public: + plMetalRenderShadowPipelineState(plMetalDevice* device, plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription& description) + : plMetalMaterialPassPipelineState(device, vRef, description) + { + } + + const NS::String* GetDescription() override + { + return MTLSTR("Shadow Span Render Pipeline"); + }; + MTL::Function* GetVertexFunction(MTL::Library* library) override; + MTL::Function* GetFragmentFunction(MTL::Library* library) override; + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; + uint16_t GetID() const override { return plMetalPipelineType::ShadowRender; }; + + plMetalPipelineState* Clone() override + { + return new plMetalRenderShadowPipelineState(*this); + } +}; + +//MARK: Shader based render programs + +class plMetalDynamicMaterialPipelineState : public plMetalRenderSpanPipelineState +{ +public: + plMetalDynamicMaterialPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, uint32_t blendMode, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID) + : plMetalRenderSpanPipelineState(device, vRef), + fVertexShaderID(vertexShaderID), + fFragmentShaderID(fragmentShaderID), + fBlendMode(blendMode) + { + }; + + uint16_t GetID() const override { return plMetalPipelineType::Dynamic; }; + + plMetalPipelineState* Clone() override + { + return new plMetalDynamicMaterialPipelineState(*this); + } + + bool IsEqual(const plMetalPipelineState& p) const override + { + const plMetalDynamicMaterialPipelineState* dynamicState = static_cast(&p); + if (!dynamicState) { + return false; + } + return plMetalRenderSpanPipelineState::IsEqual(p) && dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID && dynamicState->fBlendMode == fBlendMode; + } + + size_t GetHash() const override + { + std::size_t value = std::hash()(fFragmentShaderID); + value ^= std::hash()(fVertexShaderID); + value ^= std::hash()(fVertexShaderID); + value ^= std::hash()(fBlendMode); + + return value ^ plMetalRenderSpanPipelineState::GetHash(); + } + + const MTL::Function* GetVertexFunction(MTL::Library* library) override; + const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + const NS::String* GetDescription() override + { + return MTLSTR("Dynamic Shader"); + } + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override + { + ConfigureBlendMode(fBlendMode, descriptor); + } + +protected: + plShaderID::ID fVertexShaderID; + plShaderID::ID fFragmentShaderID; + uint32_t fBlendMode; +}; + +template <> +struct std::hash +{ + std::size_t operator()(plMetalPipelineState const& s) const noexcept + { + return s.GetHash(); + } +}; + +//MARK: Clear buffer program + +class plMetalClearPipelineState : public plMetalPipelineState +{ +public: + plMetalClearPipelineState(plMetalDevice* device, bool shouldClearColor, bool shouldClearDepth) : plMetalPipelineState(device) + { + fShouldClearDepth = shouldClearDepth; + fShouldClearColor = shouldClearColor; + } + + bool IsEqual(const plMetalPipelineState& p) const override + { + const plMetalClearPipelineState* clearState = static_cast(&p); + if (!clearState) { + return false; + } + return clearState->fShouldClearDepth == fShouldClearDepth && fShouldClearColor == clearState->fShouldClearColor; + }; + + uint16_t GetID() const override { return plMetalPipelineType::Clear; }; + plMetalPipelineState* Clone() override + { + return new plMetalClearPipelineState(*this); + }; + + const MTL::Function* GetVertexFunction(MTL::Library* library) override + { + return library->newFunction(MTLSTR("clearVertex")); + }; + + const MTL::Function* GetFragmentFunction(MTL::Library* library) override + { + return library->newFunction(MTLSTR("clearFragment"), + MakeFunctionConstants(), + (NS::Error**)nullptr) + ->autorelease(); + }; + const NS::String* GetDescription() override + { + return MTLSTR("Clear"); + }; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override + { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + }; + + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override + { + vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(0)->setOffset(0); + vertexDescriptor->attributes()->object(0)->setBufferIndex(0); + vertexDescriptor->layouts()->object(0)->setStride(8); + vertexDescriptor->layouts()->object(0)->setStepFunction(MTL::VertexStepFunctionPerVertex); + vertexDescriptor->layouts()->object(0)->setStepRate(1); + }; + + void GetFunctionConstants(MTL::FunctionConstantValues* values) const override + { + values->setConstantValue(&fShouldClearDepth, MTL::DataTypeBool, NS::UInteger(0)); + values->setConstantValue(&fShouldClearColor, MTL::DataTypeBool, NS::UInteger(1)); + } + + size_t GetHash() const override + { + size_t value = plMetalPipelineState::GetHash(); + value ^= std::hash()(fShouldClearColor); + value ^= std::hash()(fShouldClearDepth); + + return value; + } + +private: + bool fShouldClearColor; + bool fShouldClearDepth; +}; + +#endif /* plMetalPipelineState_h */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp new file mode 100644 index 0000000000..9f6b1a387b --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -0,0 +1,166 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include "plMetalPlateManager.h" + +#include + +#include "ShaderTypes.h" +#include "plMetalPipeline.h" + +plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) + : plPlateManager(pipe), + fVtxBuffer() +{ + MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); + depthDescriptor->setDepthWriteEnabled(false); + fDepthState = pipe->fDevice.fMetalDevice->newDepthStencilState(depthDescriptor); + depthDescriptor->release(); +} + +void plMetalPlateManager::ICreateGeometry() +{ + plMetalPipeline *pipeline = (plMetalPipeline *)fOwner; + if (!fVtxBuffer) { + plateVertexBuffer vertexBuffer; + vertexBuffer.vertices[0].Set(-0.5f, -0.5f); + vertexBuffer.uv[0].Set(0.0f, 0.0f); + + vertexBuffer.vertices[1].Set(-0.5f, 0.5f); + vertexBuffer.uv[1].Set(0.0f, 1.0f); + + vertexBuffer.vertices[2].Set(0.5f, -0.5f); + vertexBuffer.uv[2].Set(1.0f, 0.0f); + + vertexBuffer.vertices[3].Set(0.5f, 0.5f); + vertexBuffer.uv[3].Set(1.0f, 1.0f); + + uint16_t indices[6] = {0, 1, 2, 1, 2, 3}; + + fVtxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&vertexBuffer, sizeof(plateVertexBuffer), MTL::StorageModeManaged); + fVtxBuffer->retain(); + idxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&indices, sizeof(indices), MTL::StorageModeManaged); + } +} + +void plMetalPlateManager::EncodeDraw(MTL::RenderCommandEncoder* encoder) +{ + encoder->setVertexBuffer(fVtxBuffer, 0, VertexAttributePosition); + encoder->setVertexBuffer(fVtxBuffer, offsetof(plateVertexBuffer, uv), VertexAttributeTexcoord); + + encoder->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, 6, MTL::IndexTypeUInt16, idxBuffer, 0); +} + +void plMetalPlateManager::IReleaseGeometry() +{ + if (fVtxBuffer) { + fVtxBuffer->release(); + fVtxBuffer = nullptr; + } +} + +void plMetalPlateManager::IDrawToDevice(plPipeline* pipe) +{ + plMetalPipeline *pipeline = (plMetalPipeline *)pipe; + plPlate *plate = nullptr; + + for (plate = fPlates; plate != nullptr; plate = plate->GetNext()) { + if (plate->IsVisible()) { + pipeline->IDrawPlate(plate); + } + } +} + +plMetalPlateManager::~plMetalPlateManager() +{ + IReleaseGeometry(); +} + +bool plMetalPlatePipelineState::IsEqual(const plMetalPipelineState& p) const +{ + return true; +} + +plMetalPipelineState* plMetalPlatePipelineState::Clone() +{ + return new plMetalPlatePipelineState(fDevice); +} + +const MTL::Function* plMetalPlatePipelineState::GetVertexFunction(MTL::Library* library) +{ + return library->newFunction(MTLSTR("plateVertexShader")); +} + +const MTL::Function* plMetalPlatePipelineState::GetFragmentFunction(MTL::Library* library) +{ + return library->newFunction(MTLSTR("fragmentShader")); +} + +const NS::String* plMetalPlatePipelineState::GetDescription() +{ + return MTLSTR("Plate Pipeline State"); +} + +void plMetalPlatePipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) +{ + descriptor->setBlendingEnabled(true); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); +} + +void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) +{ + vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(0)->setBufferIndex(VertexAttributePosition); + vertexDescriptor->attributes()->object(0)->setOffset(0); + vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(1)->setBufferIndex(VertexAttributeTexcoord); + vertexDescriptor->attributes()->object(1)->setOffset(0); + + vertexDescriptor->layouts()->object(0)->setStride(sizeof(simd_float2)); + vertexDescriptor->layouts()->object(1)->setStride(sizeof(simd_float2)); +} + +void plMetalPlatePipelineState::GetFunctionConstants(MTL::FunctionConstantValues*) const +{ +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h new file mode 100644 index 0000000000..490ae31388 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -0,0 +1,99 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plMetalPlateManager_hpp +#define plMetalPlateManager_hpp + +#include +#include + +#include + +#include "hsPoint2.h" +#include "plMetalPipelineState.h" +#include "plPipeline/plPlates.h" + +class plMetalPipeline; +class plMetalDevice; + +class plMetalPlatePipelineState : public plMetalPipelineState +{ +public: + plMetalPlatePipelineState(plMetalDevice* device) : plMetalPipelineState(device){}; + bool IsEqual(const plMetalPipelineState& p) const override; + uint16_t GetID() const override { return plMetalPipelineType::Plate; } + plMetalPipelineState* Clone() override; + const MTL::Function * GetVertexFunction(MTL::Library* library) override; + const MTL::Function * GetFragmentFunction(MTL::Library* library) override; + const NS::String* GetDescription() override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; + + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override; + + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; +}; + +class plMetalPlateManager : public plPlateManager +{ + friend class plMetalPipeline; + +public: + plMetalPlateManager(plMetalPipeline* pipe); + void IDrawToDevice(plPipeline* pipe) override; + void ICreateGeometry(); + void IReleaseGeometry(); + void EncodeDraw(MTL::RenderCommandEncoder* encoder); + ~plMetalPlateManager(); + +private: + struct plateVertexBuffer + { + hsPoint2 vertices[4]; + hsPoint2 uv[4]; + }; + MTL::Buffer* fVtxBuffer; + MTL::Buffer* idxBuffer; + MTL::DepthStencilState* fDepthState; +}; + +#endif /* plMetalPlateManager_hpp */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp new file mode 100644 index 0000000000..a82c6f03e3 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp @@ -0,0 +1,67 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "plMetalShader.h" + +#include "HeadSpin.h" +#include "plMetalPipeline.h" +#include "plSurface/plShader.h" + +plMetalShader::plMetalShader(plShader* owner) + : fOwner(owner), + fPipe() +{ + owner->SetDeviceRef(this); +} + +plMetalShader::~plMetalShader() +{ + fPipe = nullptr; +} + +void plMetalShader::SetOwner(plShader* owner) +{ + if (owner != fOwner) { + Release(); + fOwner = owner; + owner->SetDeviceRef(this); + } +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h new file mode 100644 index 0000000000..c934bf9467 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h @@ -0,0 +1,71 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plDXShader_inc +#define plDXShader_inc + +#include +#include + +#include "plMetalDeviceRef.h" + +class plShader; +class plMetalPipeline; + +class plMetalShader : public plMetalDeviceRef +{ +protected: + plShader* fOwner; + plMetalPipeline* fPipe; + MTL::Function* fFunction; + + virtual bool ISetConstants(plMetalPipeline* pipe) = 0; // On error, sets error string. + +public: + plMetalShader(plShader* owner); + virtual ~plMetalShader(); + + void SetOwner(plShader* owner); + MTL::Function* GetShader(plMetalPipeline* pipe) const { return fFunction; }; +}; + +#endif // plDXShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp new file mode 100644 index 0000000000..3f8a412ca4 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp @@ -0,0 +1,243 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include "plMetalTextFont.h" + +#include "HeadSpin.h" +#include "hsWindows.h" +#include "plMetalPipeline.h" +#include "plPipeline/hsWinRef.h" + +// Following number needs to be at least: 64 chars max in plTextFont drawn at any one time +// * 4 primitives per char max (for bold text) +// * 3 verts per primitive + +constexpr uint32_t kNumVertsInBuffer = 4608; + +uint32_t plMetalTextFont::fBufferCursor = 0; + +//// Constructor & Destructor ///////////////////////////////////////////////// + +plMetalTextFont::plMetalTextFont(plPipeline *pipe, plMetalDevice* device) : plTextFont(pipe), + fTexture() +{ + fDevice = device; + fPipeline = (plMetalPipeline *)pipe; + CreateShared(&(fPipeline->fDevice)); +} + +plMetalTextFont::~plMetalTextFont() +{ + DestroyObjects(); +} + +//// ICreateTexture /////////////////////////////////////////////////////////// + +void plMetalTextFont::ICreateTexture(uint16_t *data) +{ + MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatRGBA8Unorm, fTextureWidth, fTextureHeight, false); + + fTexture->release(); + fTexture = fDevice->fMetalDevice->newTexture(descriptor); + fTexture->setLabel(MTLSTR("Font texture")); + + struct InDataValues + { + uint8_t a : 4; + uint8_t r : 4; + uint8_t g : 4; + uint8_t b : 4; + }; + + struct OutDataValues + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + auto outData = std::make_unique(fTextureWidth * fTextureHeight); + for (size_t i = 0; i < fTextureWidth * fTextureHeight; i++) { + InDataValues* in = (InDataValues*)(data + i); + OutDataValues* out = (OutDataValues*)(outData.get() + i); + + out->r = in->r * 255; + out->b = in->b * 255; + out->g = in->g * 255; + out->a = in->a * 255; + } + + fTexture->replaceRegion(MTL::Region(0, 0, fTextureWidth, fTextureHeight), 0, outData.get(), 4 * fTextureWidth); +} + +void plMetalTextFont::CreateShared(plMetalDevice* device) +{ +} + +void plMetalTextFont::ReleaseShared(MTL::Device* device) +{ +} + +//// IInitStateBlocks ///////////////////////////////////////////////////////// + +void plMetalTextFont::IInitStateBlocks() +{ +} + +//// DestroyObjects /////////////////////////////////////////////////////////// + +void plMetalTextFont::DestroyObjects() +{ + fInitialized = false; +} + +//// IDrawPrimitive /////////////////////////////////////////////////////////// + +void plMetalTextFont::IDrawPrimitive(uint32_t count, plFontVertex* array) +{ + plFontVertex* v; + + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); + + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + constexpr size_t maxCount = 4096 / (sizeof(plFontVertex) * 3); + + uint drawn = 0; + while (count > 0) { + uint drawCount = MIN(maxCount, count); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array + (drawn * 3), drawCount * 3 * sizeof(plFontVertex), 0); + + fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), drawCount * 3); + + count -= drawCount; + drawn += drawCount; + } +} + +//// IDrawLines /////////////////////////////////////////////////////////////// + +void plMetalTextFont::IDrawLines(uint32_t count, plFontVertex* array) +{ + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); + + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array, count * 2 * sizeof(plFontVertex), 0); + + matrix_float4x4 mat = matrix_identity_float4x4; + mat.columns[0][0] = 2.0f / (float)fPipe->Width(); + mat.columns[1][1] = -2.0f / (float)fPipe->Height(); + mat.columns[3][0] = -1.0; + mat.columns[3][1] = 1.0; + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), 1); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); + + fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeLine, NS::UInteger(0), count * 2); +} + +//// FlushDraws /////////////////////////////////////////////////////////////// +// Flushes out and finishes any drawing left to be done. + +void plMetalTextFont::FlushDraws() +{ + // Metal don't flush +} + +//// SaveStates /////////////////////////////////////////////////////////////// + +void plMetalTextFont::SaveStates() +{ + matrix_float4x4 mat = matrix_identity_float4x4; + mat.columns[0][0] = 2.0f / (float)fPipe->Width(); + mat.columns[1][1] = -2.0f / (float)fPipe->Height(); + mat.columns[3][0] = -1.0; + mat.columns[3][1] = 1.0; + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), 1); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); +} + +//// RestoreStates //////////////////////////////////////////////////////////// + +void plMetalTextFont::RestoreStates() +{ +} + +bool plMetalTextFontPipelineState::IsEqual(const plMetalPipelineState& p) const +{ + return true; +} + +plMetalPipelineState* plMetalTextFontPipelineState::Clone() +{ + return new plMetalTextFontPipelineState(fDevice); +} + +const MTL::Function* plMetalTextFontPipelineState::GetVertexFunction(MTL::Library* library) +{ + return library->newFunction(MTLSTR("textFontVertexShader")); +} + +const MTL::Function* plMetalTextFontPipelineState::GetFragmentFunction(MTL::Library* library) +{ + return library->newFunction(MTLSTR("textFontFragmentShader")); +} + +const NS::String* plMetalTextFontPipelineState::GetDescription() +{ + return MTLSTR("Font Rendering"); +} + +void plMetalTextFontPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) +{ + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); +} + +void plMetalTextFontPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) +{ + return; +} + +void plMetalTextFontPipelineState::GetFunctionConstants(MTL::FunctionConstantValues*) const +{ + return; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h new file mode 100644 index 0000000000..7ae2e4b63f --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -0,0 +1,102 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalTextFont_h +#define _plMetalTextFont_h + +#include + +#include "plMetalPipeline.h" +#include "plMetalPipelineState.h" +#include "plPipeline/plTextFont.h" + +//// plDXTextFont Class Definition /////////////////////////////////////////// + +class plPipeline; +class plMetalDevice; + +class plMetalTextFontPipelineState : public plMetalPipelineState +{ +public: + plMetalTextFontPipelineState(plMetalDevice* device) : plMetalPipelineState(device){} + bool IsEqual(const plMetalPipelineState& p) const override; + uint16_t GetID() const override { return plMetalPipelineType::Text; } + plMetalPipelineState* Clone() override; + const MTL::Function* GetVertexFunction(MTL::Library* library) override; + const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + const NS::String* GetDescription() override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; + + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override; + + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; +}; + +class plMetalTextFont : public plTextFont +{ +protected: + static uint32_t fBufferCursor; + + void ICreateTexture(uint16_t* data) override; + void IInitStateBlocks() override; + void IDrawPrimitive(uint32_t count, plFontVertex* array) override; + void IDrawLines(uint32_t count, plFontVertex* array) override; + + MTL::Texture* fTexture; + plMetalDevice* fDevice; + + plMetalPipeline* fPipeline; + +public: + plMetalTextFont(plPipeline* pipe, plMetalDevice* device); + ~plMetalTextFont(); + + static void CreateShared(plMetalDevice* device); + static void ReleaseShared(MTL::Device* device); + + void FlushDraws() override; + void SaveStates() override; + void RestoreStates() override; + void DestroyObjects() override; +}; + +#endif // _plMetalTextFont_h diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp new file mode 100644 index 0000000000..f2303bf89b --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp @@ -0,0 +1,75 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "plMetalVertexShader.h" + +#include "HeadSpin.h" + +#include "plDrawable/plGBufferGroup.h" +#include "plMetalPipeline.h" +#include "plSurface/plShader.h" + +#include + +plMetalVertexShader::plMetalVertexShader(plShader* owner) + : plMetalShader(owner) +{ +} + +plMetalVertexShader::~plMetalVertexShader() +{ + Release(); +} + +void plMetalVertexShader::Release() +{ + fPipe = nullptr; +} + +bool plMetalVertexShader::ISetConstants(plMetalPipeline* pipe) +{ + if (fOwner->GetNumConsts()) { + float* ptr = (float*)fOwner->GetConstBasePtr(); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(simd_float4), VertexShaderArgumentMaterialShaderUniforms); + } + + return true; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h new file mode 100644 index 0000000000..41b39923d6 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h @@ -0,0 +1,65 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plMetalVertexShader_inc +#define plMetalVertexShader_inc + +#include "plMetalShader.h" + +class plShader; +class plMetalPipeline; + +class plMetalVertexShader : public plMetalShader +{ +protected: +public: + bool ISetConstants(plMetalPipeline* pipe) override; // On error, sets error string. + plMetalVertexShader(plShader* owner); + ~plMetalVertexShader() override; + + void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalVertexShader* GetNext() const { return (plMetalVertexShader*)fNext; } + + void Release() override; +}; + +#endif // plMetalVertexShader_inc diff --git a/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h b/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h index e9d97517f1..54f4c37e9f 100644 --- a/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h +++ b/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h @@ -961,6 +961,7 @@ CLASS_INDEX_LIST_START CLASS_INDEX(plLocalizedConfirmationMsg), CLASS_INDEX(plSubtitleMsg), CLASS_INDEX(plDisplayScaleChangedMsg), + CLASS_INDEX(plMetalPipeline), CLASS_INDEX_LIST_END #endif // plCreatableIndex_inc diff --git a/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h b/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h index f420197347..e5599bd0f5 100644 --- a/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h +++ b/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h @@ -64,6 +64,7 @@ class plArmatureMod; class plSharedMesh; class plStateDataRecord; class plDXPipeline; +class plMetalPipeline; struct plClothingItemOptions { @@ -163,6 +164,7 @@ class plClothingBase : public hsKeyedObject class plClothingOutfit : public plSynchedObject { friend class plDXPipeline; + friend class plMetalPipeline; public: plArmatureMod *fAvatar; diff --git a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp index 75d78c0baf..402f2cea42 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp +++ b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp @@ -368,6 +368,7 @@ bool hsG3DDeviceSelector::GetRequested(hsG3DDeviceModeRecord *dmr, uint32_t devT hsG3DDeviceRecord* iTnL = nullptr; hsG3DDeviceRecord* iD3D = nullptr; + hsG3DDeviceRecord* iMetal = nullptr; hsG3DDeviceRecord* iOpenGL = nullptr; hsG3DDeviceRecord* device = nullptr; @@ -396,14 +397,21 @@ bool hsG3DDeviceSelector::GetRequested(hsG3DDeviceModeRecord *dmr, uint32_t devT if (iOpenGL == nullptr || force) iOpenGL = &record; break; + + case kDevTypeMetal: + if (iMetal == nullptr || force) + iMetal = &record; + break; } } - // Pick a default device (Priority D3D T&L, D3D HAL, OpenGL) + // Pick a default device (Priority D3D T&L, D3D HAL, Metal, OpenGL) if (iTnL != nullptr) device = iTnL; else if (iD3D != nullptr) device = iD3D; + else if (iMetal != nullptr) + device = iMetal; else if (iOpenGL != nullptr) device = iOpenGL; else diff --git a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h index ccd6ee77c8..e0760deca5 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h +++ b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h @@ -269,6 +269,7 @@ class hsG3DDeviceSelector : public hsRefCnt kDevTypeUnknown = 0, kDevTypeDirect3D, kDevTypeOpenGL, + kDevTypeMetal, kNumDevTypes }; diff --git a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h index db1f395006..4ff9486fdb 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h +++ b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h @@ -110,7 +110,7 @@ template class pl3DPipeline : public plPipeline { protected: - DeviceType fDevice; + mutable DeviceType fDevice; plPipelineViewSettings fView; std::stack fViewStack; diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp index a88391cb32..b22bfb3866 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp @@ -40,8 +40,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ -#include "hsGMaterial.h" #include +#include "hsGMaterial.h" #include "HeadSpin.h" #include "plProfile.h" @@ -64,7 +64,8 @@ hsGMaterial::hsGMaterial() : fLOD(0), fCompFlags(0), fLoadFlags(0), -fLastUpdateTime(0) +fLastUpdateTime(0), +fDeviceRef() { } @@ -202,7 +203,6 @@ void hsGMaterial::SetLayer(plLayerInterface* layer, int32_t which, bool insert, } } - void hsGMaterial::Write(hsStream* s) { s->WriteLE32(fLoadFlags); diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h index 08ef49fde0..2f411d1b4a 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h @@ -45,6 +45,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #include "hsColorRGBA.h" +#include "hsGDeviceRef.h" #include "hsGMatState.h" #include "pnNetCommon/plSynchedObject.h" @@ -91,6 +92,8 @@ class hsGMaterial : public plSynchedObject uint32_t fLoadFlags; float fLastUpdateTime; + + hsGDeviceRef* fDeviceRef; void IClearLayers(); size_t IMakeExtraLayer(); @@ -128,6 +131,10 @@ class hsGMaterial : public plSynchedObject bool IsDynamic() const { return (fCompFlags & kCompDynamic); } bool IsDecal() const { return (fCompFlags & kCompDecal); } bool NeedsBlendChannel() { return (fCompFlags & kCompNeedsBlendChannel); } + + + void SetDeviceRef(hsGDeviceRef* ref) { hsRefCnt_SafeAssign(fDeviceRef, ref); } + hsGDeviceRef* GetDeviceRef() const { return fDeviceRef; } virtual void Read(hsStream* s); virtual void Write(hsStream* s);