diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index b74f7e406f..333d443d40 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -265,9 +265,9 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], half3 LAmbient = half3(0.0, 0.0, 0.0); half3 LDiffuse = half3(0.0, 0.0, 0.0); - const float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz; + const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz; - float4 position = (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix); if(temp_hasOnlyWeight1) { const float4 position2 = blendMatrix1 * float4(in.position, 1.0); position = (in.weight1 * position) + ((1.0f - in.weight1) * position2); @@ -317,7 +317,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], abs(uniforms.invVtxAlpha - MDiffuse.a)); out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); - const float4 vCamPosition = uniforms.worldToCameraMatrix * position; + const float4 vCamPosition = position * uniforms.worldToCameraMatrix; //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); //Fog @@ -333,13 +333,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], } out.fogColor.rgb = uniforms.fogColor; - const float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0)); + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; for(size_t layer=0; layercolumns[0][0] = src.fMap[0][0]; - dst->columns[1][0] = src.fMap[0][1]; - dst->columns[2][0] = src.fMap[0][2]; - dst->columns[3][0] = src.fMap[0][3]; - - dst->columns[0][1] = src.fMap[1][0]; - dst->columns[1][1] = src.fMap[1][1]; - dst->columns[2][1] = src.fMap[1][2]; - dst->columns[3][1] = src.fMap[1][3]; - - dst->columns[0][2] = src.fMap[2][0]; - dst->columns[1][2] = src.fMap[2][1]; - dst->columns[2][2] = src.fMap[2][2]; - dst->columns[3][2] = src.fMap[2][3]; - - dst->columns[0][3] = src.fMap[3][0]; - dst->columns[1][3] = src.fMap[3][1]; - dst->columns[2][3] = src.fMap[3][2]; - dst->columns[3][3] = src.fMap[3][3]; - } else { - memcpy(dst, &src.fMap, sizeof(matrix_float4x4)); - } + memcpy(dst, &src.fMap, sizeof(matrix_float4x4)); } return dst; @@ -957,13 +933,13 @@ void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src) hsMatrix2SIMD(inv, &fMatrixC2W); } -void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder) +void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src) { hsMatrix44 inv; src.GetInverse(&inv); - hsMatrix2SIMD(src, &fMatrixL2W, swapOrder); - hsMatrix2SIMD(inv, &fMatrixW2L, swapOrder); + hsMatrix2SIMD(src, &fMatrixL2W); + hsMatrix2SIMD(inv, &fMatrixW2L); } void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 4574810f67..089ac3bc61 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -67,7 +67,8 @@ class plCubicEnvironmap; class plLayerInterface; class plMetalPipelineState; -matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder = true); +//NOTE: Results of this will be row major +matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst); class plMetalDevice { @@ -149,7 +150,7 @@ class plMetalDevice void SetProjectionMatrix(const hsMatrix44& src); void SetWorldToCameraMatrix(const hsMatrix44& src); - void SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder = true); + void SetLocalToWorldMatrix(const hsMatrix44& src); void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice); uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index b2b3792630..86f34a269f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2554,15 +2554,10 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); fState.fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState; - //column major layout simd_float4x4 projMat = matrix_identity_float4x4; - //projMat.columns[2][3] = 1.0f; - //projMat.columns[3][1] = -0.5f; - projMat.columns[3][2] = 0.0f; - projMat.columns[1][1] = 1.0f; /// Set up the transform directly - fDevice.SetLocalToWorldMatrix(plate->GetTransform(), false); + fDevice.SetLocalToWorldMatrix(plate->GetTransform()); IPushPiggyBacks(material); @@ -4440,8 +4435,8 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix); if (weights[j]) { //Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine. - destPt_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)pt_buf); - destNorm_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)vec_buf); + destPt_buf += simd_mul(*(simd_float4 *)pt_buf, simdMatrix) * weights[j]; + destNorm_buf += simd_mul(*(simd_float4 *)vec_buf, simdMatrix) * weights[j]; } //ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf); indices >>= 8;