Skip to content

Commit

Permalink
Support for row major hsMatrix
Browse files Browse the repository at this point in the history
Swapping the shaders around to support row major hsMatrices and eliminating the swap function.

This change hasn’t filtered down to the dynamic effects which get their uniforms through a giant buffer provided by the engine itself.
  • Loading branch information
colincornaby committed Aug 15, 2023
1 parent 53bdefb commit 614e2b1
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,9 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]],
half3 LAmbient = half3(0.0, 0.0, 0.0);
half3 LDiffuse = half3(0.0, 0.0, 0.0);

const float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz;
const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz;

float4 position = (uniforms.localToWorldMatrix * float4(in.position, 1.0));
float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix);
if(temp_hasOnlyWeight1) {
const float4 position2 = blendMatrix1 * float4(in.position, 1.0);
position = (in.weight1 * position) + ((1.0f - in.weight1) * position2);
Expand Down Expand Up @@ -317,7 +317,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]],
abs(uniforms.invVtxAlpha - MDiffuse.a));

out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a));
const float4 vCamPosition = uniforms.worldToCameraMatrix * position;
const float4 vCamPosition = position * uniforms.worldToCameraMatrix;
//out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0));

//Fog
Expand All @@ -333,13 +333,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]],
}
out.fogColor.rgb = uniforms.fogColor;

const float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0));
const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix;

for(size_t layer=0; layer<num_layers; layer++) {
(&out.texCoord1)[layer] = uniforms.sampleLocation(layer, &in.texCoord1, normal, vCamPosition);
}

out.position = uniforms.projectionMatrix * vCamPosition;
out.position = vCamPosition * uniforms.projectionMatrix;

return out;
}
Expand All @@ -354,35 +354,35 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co
//Note: If we want to require newer versions of Metal/newer hardware we could pass function pointers instead of doing these ifs.
if (flags & (kMiscUseReflectionXform | kMiscUseRefractionXform)) {
matrix = cameraToWorldMatrix;
matrix[3][0] = matrix[3][1] = matrix[3][2] = 0;
matrix[0][3] = matrix[1][3] = matrix[2][3] = 0;

// This is just a rotation about X of Pi/2 (y = z, z = -y),
// followed by flipping Z to reflect back towards us (z = -z).

// swap mat[1][0] and mat[2][0]
float temp;
temp = matrix[0][1];
matrix[0][1] = matrix[0][2];
matrix[0][2] = temp;
temp = matrix[1][0];
matrix[1][0] = matrix[2][0];
matrix[2][0] = temp;

// swap mat[1][1] and mat[2][1]
temp = matrix[1][1];
matrix[1][1] = matrix[1][2];
matrix[1][2] = temp;
matrix[1][1] = matrix[2][1];
matrix[2][1] = temp;

// swap mat[1][2] and mat[2][2]
temp = matrix[2][1];
matrix[2][1] = matrix[2][2];
temp = matrix[1][2];
matrix[1][2] = matrix[2][2];
matrix[2][2] = temp;

if (flags & kMiscUseRefractionXform) {
// Same as reflection, but then matrix = matrix * scaleMatNegateZ.

// mat[0][2] = -mat[0][2];
matrix[2][0] = -matrix[2][0];
matrix[0][2] = -matrix[0][2];

// mat[1][2] = -mat[1][2];
matrix[2][1] = -matrix[2][1];
matrix[1][2] = -matrix[1][2];

// mat[2][2] = -mat[2][2];
matrix[2][2] = -matrix[2][2];
Expand All @@ -398,10 +398,10 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co
matrix_float4x4 scaleMatrix = matrix_float4x4(1.0);

// hsVector3 camTrans(0.5f, 0.5f, 0.f);
scaleMatrix[3][0] = 0.5f;
scaleMatrix[3][1] = -0.5f;
scaleMatrix[0][3] = 0.5f;
scaleMatrix[1][3] = -0.5f;

matrix = scaleMatrix * translationMatrix;
matrix = translationMatrix * scaleMatrix;

// The scale and trans move us from NDC to Screen space. We need to swap
// the Z and W coordinates so that the texture projection will divide by W
Expand All @@ -410,50 +410,50 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co

// swap mat[2][2] and mat[3][2]
temp = matrix[2][2];
matrix[2][2] = matrix[2][3];
matrix[2][3] = temp;
matrix[2][2] = matrix[3][2];
matrix[3][2] = temp;

// swap mat[2][3] and mat[3][3]
temp = matrix[3][2];
matrix[3][2] = matrix[3][3];
temp = matrix[2][3];
matrix[2][3] = matrix[3][3];
matrix[3][3] = temp;

// Multiply by the projection matrix
matrix = matrix * projectionMatrix;
matrix = projectionMatrix * matrix;
} else if (flags & kMiscProjection) {
matrix_float4x4 cam2World = cameraToWorldMatrix;
if( !(UVWSrc & kUVWPosition) ) {
cam2World.columns[3][0] = 0;
cam2World.columns[3][1] = 0;
cam2World.columns[3][2] = 0;
cam2World.columns[0][3] = 0;
cam2World.columns[1][3] = 0;
cam2World.columns[2][3] = 0;
}

matrix = matrix * cam2World;
matrix = cam2World * matrix;
}

float4 sampleCoord;

switch (UVWSrc) {
case kUVWNormal:
{
sampleCoord = matrix * normal;
sampleCoord = normal * matrix;
}
break;
case kUVWPosition:
{
sampleCoord = matrix * camPosition;
sampleCoord = camPosition * matrix;
}
break;
case kUVWReflect:
{
sampleCoord = matrix * reflect(normalize(camPosition), normalize(normal));
sampleCoord = reflect(normalize(camPosition), normalize(normal)) * matrix;
}
break;
default:
{
const int index = UVWSrc & 0x0F;
if (index < num_uvs) {
sampleCoord = matrix * float4(texCoords[index], 1.0);
sampleCoord = float4(texCoords[index], 1.0) * matrix;
} else {
//The DX engine will use a UV co-ord of 0,0 if the index is out of range
sampleCoord = float4(0.0);
Expand Down Expand Up @@ -649,13 +649,13 @@ vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]],
{
ShadowCasterInOut out;

const float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0));
const float4 vCamPosition = (float4(in.position, 1.0) * uniforms.localToWorldMatrix) * uniforms.worldToCameraMatrix;

const float4x4 matrix = uniforms.uvTransforms[0].transform;

out.texCoord1 = (matrix * vCamPosition).xyz;
out.texCoord1 = (vCamPosition * matrix).xyz;

out.position = uniforms.projectionMatrix * vCamPosition;
out.position = vCamPosition * uniforms.projectionMatrix;

return out;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]],
ColorInOut out;

float4 position = float4(in.position, 0.0, 1.0);
position = uniforms.projectionMatrix * position;
out.position = (uniforms.localToWorldMatrix * position);
position = position * uniforms.projectionMatrix;
out.position = ( position * uniforms.localToWorldMatrix);
out.position.y *= -1.0f;
out.texCoord = (float4(in.texCoord, 1.0) * uniforms.uvTransforms[0].transform).xyz;
out.texCoord.y = 1.0 - out.texCoord.y;
out.normal = float4(0.0, 0.0, 1.0, 0.0);

return out;
Expand Down
34 changes: 5 additions & 29 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,39 +62,15 @@ You can contact Cyan Worlds, Inc. by email [email protected]

#include "plMetalPipelineState.h"

matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder)
matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst)
{
if (src.fFlags & hsMatrix44::kIsIdent)
{
memcpy(dst, &matrix_identity_float4x4, sizeof(float) * 16);
}
else
{
//SIMD is column major, hsMatrix44 is row major.
//We need to flip.
if(swapOrder) {
dst->columns[0][0] = src.fMap[0][0];
dst->columns[1][0] = src.fMap[0][1];
dst->columns[2][0] = src.fMap[0][2];
dst->columns[3][0] = src.fMap[0][3];

dst->columns[0][1] = src.fMap[1][0];
dst->columns[1][1] = src.fMap[1][1];
dst->columns[2][1] = src.fMap[1][2];
dst->columns[3][1] = src.fMap[1][3];

dst->columns[0][2] = src.fMap[2][0];
dst->columns[1][2] = src.fMap[2][1];
dst->columns[2][2] = src.fMap[2][2];
dst->columns[3][2] = src.fMap[2][3];

dst->columns[0][3] = src.fMap[3][0];
dst->columns[1][3] = src.fMap[3][1];
dst->columns[2][3] = src.fMap[3][2];
dst->columns[3][3] = src.fMap[3][3];
} else {
memcpy(dst, &src.fMap, sizeof(matrix_float4x4));
}
memcpy(dst, &src.fMap, sizeof(matrix_float4x4));
}

return dst;
Expand Down Expand Up @@ -957,13 +933,13 @@ void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src)
hsMatrix2SIMD(inv, &fMatrixC2W);
}

void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder)
void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src)
{
hsMatrix44 inv;
src.GetInverse(&inv);

hsMatrix2SIMD(src, &fMatrixL2W, swapOrder);
hsMatrix2SIMD(inv, &fMatrixW2L, swapOrder);
hsMatrix2SIMD(src, &fMatrixL2W);
hsMatrix2SIMD(inv, &fMatrixW2L);
}

void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable)
Expand Down
5 changes: 3 additions & 2 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ class plCubicEnvironmap;
class plLayerInterface;
class plMetalPipelineState;

matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder = true);
//NOTE: Results of this will be row major
matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst);

class plMetalDevice
{
Expand Down Expand Up @@ -149,7 +150,7 @@ class plMetalDevice

void SetProjectionMatrix(const hsMatrix44& src);
void SetWorldToCameraMatrix(const hsMatrix44& src);
void SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder = true);
void SetLocalToWorldMatrix(const hsMatrix44& src);

void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice);
uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap);
Expand Down
11 changes: 3 additions & 8 deletions Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2554,15 +2554,10 @@ void plMetalPipeline::IDrawPlate(plPlate* plate)
fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState);
fState.fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState;

//column major layout
simd_float4x4 projMat = matrix_identity_float4x4;
//projMat.columns[2][3] = 1.0f;
//projMat.columns[3][1] = -0.5f;
projMat.columns[3][2] = 0.0f;
projMat.columns[1][1] = 1.0f;

/// Set up the transform directly
fDevice.SetLocalToWorldMatrix(plate->GetTransform(), false);
fDevice.SetLocalToWorldMatrix(plate->GetTransform());

IPushPiggyBacks(material);

Expand Down Expand Up @@ -4440,8 +4435,8 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette,
hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix);
if (weights[j]) {
//Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine.
destPt_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)pt_buf);
destNorm_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)vec_buf);
destPt_buf += simd_mul(*(simd_float4 *)pt_buf, simdMatrix) * weights[j];
destNorm_buf += simd_mul(*(simd_float4 *)vec_buf, simdMatrix) * weights[j];
}
//ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf);
indices >>= 8;
Expand Down

0 comments on commit 614e2b1

Please sign in to comment.