From ac460eb42ae2a6bf3e64dc36a860b6d23109e4db Mon Sep 17 00:00:00 2001 From: gxw Date: Mon, 18 Mar 2024 15:53:10 +0800 Subject: [PATCH 1/5] loongarch: Fixed i{c/z}amin LSX opt --- kernel/loongarch64/icamin_lsx.S | 116 +++++++++++++++++++++++++++----- 1 file changed, 99 insertions(+), 17 deletions(-) diff --git a/kernel/loongarch64/icamin_lsx.S b/kernel/loongarch64/icamin_lsx.S index a08cd33c59..982a41fe25 100644 --- a/kernel/loongarch64/icamin_lsx.S +++ b/kernel/loongarch64/icamin_lsx.S @@ -70,18 +70,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. LD a1, X, 1 * SIZE FABS a0, a0 FABS a1, a1 - ADD s1, a1, a0 - vreplvei.w VM0, VM0, 0 + ADD s1, a1, a0 // Initialization value vxor.v VI3, VI3, VI3 // 0 #ifdef DOUBLE li.d I, -1 vreplgr2vr.d VI4, I vffint.d.l VI4, VI4 // -1 - bne INCX, TEMP, .L20 + bne INCX, TEMP, .L20 // incx != 1 + + // Init Index addi.d i0, i0, 1 - srai.d I, N, 2 - bge $r0, I, .L21 - slli.d i0, i0, 1 //2 + slli.d i0, i0, 1 // 2 vreplgr2vr.d VINC4, i0 addi.d i0, i0, -3 vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization @@ -91,14 +90,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vinsgr2vr.d VI0, i0, 0 //1 addi.d i0, i0, 1 vinsgr2vr.d VI0, i0, 1 //2 + + srai.d I, N, 2 + bge $r0, I, .L21 + + // Init VM0 + vld VX0, X, 0 * SIZE + vld VX1, X, 2 * SIZE + vpickev.d x1, VX1, VX0 + vpickod.d x2, VX1, VX0 + vfmul.d x3, VI4, x1 + vfmul.d x4, VI4, x2 + vfcmp.clt.d VT0, x1, VI3 + vfcmp.clt.d VINC8, x2, VI3 + vbitsel.v x1, x1, x3, VT0 + vbitsel.v x2, x2, x4, VINC8 + vfadd.d VM0, x1, x2 #else li.w I, -1 vreplgr2vr.w VI4, I vffint.s.w VI4, VI4 // -1 - bne INCX, TEMP, .L20 + bne INCX, TEMP, .L20 // incx != 1 + + // Init Index addi.w i0, i0, 1 - srai.d I, N, 2 - bge $r0, I, .L21 slli.w i0, i0, 2 //4 vreplgr2vr.w VINC4, i0 addi.w i0, i0, -7 @@ -117,6 +132,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vinsgr2vr.w VI0, i0, 2 //3 addi.w i0, i0, 1 vinsgr2vr.w VI0, i0, 3 //4 + + srai.d I, N, 2 + bge $r0, I, .L21 + + // Init VM0 + vld VX0, X, 0 * SIZE + vld VX1, X, 4 * SIZE + vpickev.w x1, VX1, VX0 + vpickod.w x2, VX1, VX0 + vfmul.s x3, VI4, x1 + vfmul.s x4, VI4, x2 + vfcmp.clt.s VT0, x1, VI3 + vfcmp.clt.s VINC8, x2, VI3 + vbitsel.v x1, x1, x3, VT0 + vbitsel.v x2, x2, x4, VINC8 + vfadd.s VM0, x1, x2 #endif .align 3 @@ -139,6 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vfcmp.ceq.d VT0, x3, VM0 vbitsel.v VM0, x3, VM0, VT0 vbitsel.v VI0, VI1, VI0, VT0 + vld VX0, X, 4 * SIZE vadd.d VI1, VI1, VINC4 vld VX1, X, 6 * SIZE @@ -206,9 +238,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L20: // INCX!=1 #ifdef DOUBLE addi.d i0, i0, 1 - srai.d I, N, 2 - bge $r0, I, .L21 - slli.d i0, i0, 1 //2 + // Init index + slli.d i0, i0, 1 //2 vreplgr2vr.d VINC4, i0 addi.d i0, i0, -3 vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization @@ -218,10 +249,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vinsgr2vr.d VI0, i0, 0 //1 addi.d i0, i0, 1 vinsgr2vr.d VI0, i0, 1 //2 + + srai.d I, N, 2 + bge $r0, I, .L21 // N < 4 + + // Init VM0 + ld.d t1, X, 0 * SIZE + ld.d t2, X, 1 * SIZE + add.d i1, X, INCX + ld.d t3, i1, 0 * SIZE + ld.d t4, i1, 1 * SIZE + add.d i1, i1, INCX + vinsgr2vr.d x1, t1, 0 + vinsgr2vr.d x2, t2, 0 + vinsgr2vr.d x1, t3, 1 + vinsgr2vr.d x2, t4, 1 + vfmul.d x3, VI4, x1 + vfmul.d x4, VI4, x2 + vfcmp.clt.d VT0, x1, VI3 + vfcmp.clt.d VINC8, x2, VI3 + vbitsel.v x1, x1, x3, VT0 + vbitsel.v x2, x2, x4, VINC8 + vfadd.d VM0, x1, x2 #else addi.w i0, i0, 1 - srai.d I, N, 2 - bge $r0, I, .L21 + + // Init index slli.w i0, i0, 2 //4 vreplgr2vr.w VINC4, i0 addi.w i0, i0, -7 @@ -240,6 +293,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vinsgr2vr.w VI0, i0, 2 //3 addi.w i0, i0, 1 vinsgr2vr.w VI0, i0, 3 //4 + + srai.d I, N, 2 + bge $r0, I, .L21 // N < 4 + + // Init VM0 + ld.w t1, X, 0 * SIZE + ld.w t2, X, 1 * SIZE + add.d i1, X, INCX + ld.w t3, i1, 0 * SIZE + ld.w t4, i1, 1 * SIZE + add.d i1, i1, INCX + vinsgr2vr.w x1, t1, 0 + vinsgr2vr.w x2, t2, 0 + vinsgr2vr.w x1, t3, 1 + vinsgr2vr.w x2, t4, 1 + ld.w t1, i1, 0 * SIZE + ld.w t2, i1, 1 * SIZE + add.d i1, i1, INCX + ld.w t3, i1, 0 * SIZE + ld.w t4, i1, 1 * SIZE + add.d i1, i1, INCX + vinsgr2vr.w x1, t1, 2 + vinsgr2vr.w x2, t2, 2 + vinsgr2vr.w x1, t3, 3 + vinsgr2vr.w x2, t4, 3 + vfcmp.clt.s VT0, x1, VI3 + vfcmp.clt.s VINC8, x2, VI3 + vbitsel.v x1, x1, x3, VT0 + vbitsel.v x2, x2, x4, VINC8 + vfadd.s VM0, x1, x2 #endif .align 3 @@ -300,8 +383,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vinsgr2vr.w x2, t2, 2 vinsgr2vr.w x1, t3, 3 vinsgr2vr.w x2, t4, 3 - vpickev.w x1, VX1, VX0 - vpickod.w x2, VX1, VX0 #endif addi.d I, I, -1 VFMUL x3, VI4, x1 @@ -358,12 +439,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef DOUBLE vfmina.d VM0, x1, x2 vfcmp.ceq.d VT0, x1, VM0 + vbitsel.v VI0, VI2, VI1, VT0 #else fcmp.ceq.d $fcc0, $f15, $f10 bceqz $fcc0, .L27 vfcmp.clt.s VT0, VI2, VI0 -#endif vbitsel.v VI0, VI0, VI2, VT0 +#endif .align 3 .L27: From bbf82cb624d7a03d230e2411a40cfa326ca9c806 Mon Sep 17 00:00:00 2001 From: gxw Date: Mon, 18 Mar 2024 17:51:42 +0800 Subject: [PATCH 2/5] loongarch: Fixed {s/d}axpby LSX opt --- kernel/loongarch64/axpby_lsx.S | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/loongarch64/axpby_lsx.S b/kernel/loongarch64/axpby_lsx.S index e50d4cdcc1..dae34fec9e 100644 --- a/kernel/loongarch64/axpby_lsx.S +++ b/kernel/loongarch64/axpby_lsx.S @@ -990,6 +990,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif add.d YY, YY, INCY blt $r0, I, .L222 + move Y, YY b .L997 .align 3 From ad13e04669baa3d1e0569c81cc90716325ef6e3a Mon Sep 17 00:00:00 2001 From: gxw Date: Tue, 19 Mar 2024 09:18:44 +0800 Subject: [PATCH 3/5] loongarch: Fixed {s/d/sc/dz}amin LSX opt --- kernel/loongarch64/amin_lsx.S | 12 ++++++------ kernel/loongarch64/camin_lsx.S | 11 +++++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/loongarch64/amin_lsx.S b/kernel/loongarch64/amin_lsx.S index 47701b6e4b..690444ca7c 100644 --- a/kernel/loongarch64/amin_lsx.S +++ b/kernel/loongarch64/amin_lsx.S @@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add.d X, X, INCX vinsgr2vr.d VX1, t3, 0 vinsgr2vr.d VX1, t4, 1 - vfmaxa.d VM1, VX0, VX1 + vfmina.d VM1, VX0, VX1 ld.d t1, X, 0 * SIZE add.d X, X, INCX ld.d t2, X, 0 * SIZE @@ -159,9 +159,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add.d X, X, INCX vinsgr2vr.d VX1, t3, 0 vinsgr2vr.d VX1, t4, 1 - vfmaxa.d VM2, VX0, VX1 - vfmaxa.d VM1, VM1, VM2 - vfmaxa.d VM0, VM0, VM1 + vfmina.d VM2, VX0, VX1 + vfmina.d VM1, VM1, VM2 + vfmina.d VM0, VM0, VM1 #else ld.w t1, X, 0 add.d X, X, INCX @@ -187,8 +187,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vinsgr2vr.w VX1, t2, 1 vinsgr2vr.w VX1, t3, 2 vinsgr2vr.w VX1, t4, 3 - vfmaxa.s VM1, VX0, VX1 - vfmaxa.s VM0, VM0, VM1 + vfmina.s VM1, VX0, VX1 + vfmina.s VM0, VM0, VM1 #endif addi.d I, I, -1 blt $r0, I, .L21 diff --git a/kernel/loongarch64/camin_lsx.S b/kernel/loongarch64/camin_lsx.S index ff666ea8fb..2fd78a2339 100644 --- a/kernel/loongarch64/camin_lsx.S +++ b/kernel/loongarch64/camin_lsx.S @@ -186,7 +186,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FABS t4, t4 ADD t1, t1, t2 ADD t3, t3, t4 - FMIN s1, t1, t3 + FMIN s2, t1, t3 LD t1, X, 0 * SIZE LD t2, X, 1 * SIZE add.d X, X, INCX @@ -214,13 +214,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ADD t1, t1, t2 ADD t3, t3, t4 FMIN s4, t1, t3 + + FMIN s1, s1, s2 + FMIN s3, s3, s4 + FMIN a0, a0, s3 + FMIN a0, a0, s1 blt $r0, I, .L21 .align 3 .L22: - FMIN s1, s1, s2 - FMIN s3, s3, s4 - FMIN s1, s1, s3 + MOV s1, a0 .align 3 .L23: //N<8 From b5eb9d6bacdd1ed0b13b91ed56d9adc96d7ee26e Mon Sep 17 00:00:00 2001 From: gxw Date: Tue, 19 Mar 2024 09:56:11 +0800 Subject: [PATCH 4/5] loongarch: Fixed {sc/dz}amax LSX opt --- kernel/loongarch64/camax_lsx.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/loongarch64/camax_lsx.S b/kernel/loongarch64/camax_lsx.S index cf46cb0160..12922ecd87 100644 --- a/kernel/loongarch64/camax_lsx.S +++ b/kernel/loongarch64/camax_lsx.S @@ -177,7 +177,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FABS t4, t4 ADD t1, t1, t2 ADD t3, t3, t4 - FMAX s1, t1, t3 + FMAX s2, t1, t3 LD t1, X, 0 * SIZE LD t2, X, 1 * SIZE add.d X, X, INCX @@ -205,13 +205,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ADD t1, t1, t2 ADD t3, t3, t4 FMAX s4, t1, t3 + + FMAX s1, s1, s2 + FMAX s3, s3, s4 + FMAX a0, a0, s3 + FMAX a0, a0, s1 blt $r0, I, .L21 .align 3 .L22: - FMAX s1, s1, s2 - FMAX s3, s3, s4 - FMAX s1, s1, s3 + MOV s1, a0 .align 3 .L23: //N<8 From 50869f6ca8d0e5cc93f03cfcec8066a766e1cf56 Mon Sep 17 00:00:00 2001 From: gxw Date: Tue, 19 Mar 2024 10:08:11 +0800 Subject: [PATCH 5/5] loongarch: Fixed zrot LSX opt --- kernel/loongarch64/crot_lsx.S | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/loongarch64/crot_lsx.S b/kernel/loongarch64/crot_lsx.S index 126257edca..af8f13b778 100644 --- a/kernel/loongarch64/crot_lsx.S +++ b/kernel/loongarch64/crot_lsx.S @@ -82,6 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vreplgr2vr.d VXC, t1 vreplgr2vr.d VXS, t2 vreplgr2vr.d VXZ, t3 + srai.d I, N, 1 #else vreplgr2vr.w VXC, t1 vreplgr2vr.w VXS, t2