Skip to content

Commit

Permalink
Merge pull request OpenMathLib#4566 from XiWeiGu/fix_loongarch_lsx
Browse files Browse the repository at this point in the history
LoongArch: Fixed  LSX opt
  • Loading branch information
martin-frbg authored Mar 19, 2024
2 parents 56d114b + 50869f6 commit b4a1153
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 31 deletions.
12 changes: 6 additions & 6 deletions kernel/loongarch64/amin_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d X, X, INCX
vinsgr2vr.d VX1, t3, 0
vinsgr2vr.d VX1, t4, 1
vfmaxa.d VM1, VX0, VX1
vfmina.d VM1, VX0, VX1
ld.d t1, X, 0 * SIZE
add.d X, X, INCX
ld.d t2, X, 0 * SIZE
Expand All @@ -159,9 +159,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d X, X, INCX
vinsgr2vr.d VX1, t3, 0
vinsgr2vr.d VX1, t4, 1
vfmaxa.d VM2, VX0, VX1
vfmaxa.d VM1, VM1, VM2
vfmaxa.d VM0, VM0, VM1
vfmina.d VM2, VX0, VX1
vfmina.d VM1, VM1, VM2
vfmina.d VM0, VM0, VM1
#else
ld.w t1, X, 0
add.d X, X, INCX
Expand All @@ -187,8 +187,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VX1, t2, 1
vinsgr2vr.w VX1, t3, 2
vinsgr2vr.w VX1, t4, 3
vfmaxa.s VM1, VX0, VX1
vfmaxa.s VM0, VM0, VM1
vfmina.s VM1, VX0, VX1
vfmina.s VM0, VM0, VM1
#endif
addi.d I, I, -1
blt $r0, I, .L21
Expand Down
1 change: 1 addition & 0 deletions kernel/loongarch64/axpby_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d YY, YY, INCY
blt $r0, I, .L222
move Y, YY
b .L997
.align 3

Expand Down
11 changes: 7 additions & 4 deletions kernel/loongarch64/camax_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4
ADD t1, t1, t2
ADD t3, t3, t4
FMAX s1, t1, t3
FMAX s2, t1, t3
LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE
add.d X, X, INCX
Expand Down Expand Up @@ -205,13 +205,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2
ADD t3, t3, t4
FMAX s4, t1, t3

FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX a0, a0, s3
FMAX a0, a0, s1
blt $r0, I, .L21
.align 3

.L22:
FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX s1, s1, s3
MOV s1, a0
.align 3

.L23: //N<8
Expand Down
11 changes: 7 additions & 4 deletions kernel/loongarch64/camin_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s1, t1, t3
FMIN s2, t1, t3
LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE
add.d X, X, INCX
Expand Down Expand Up @@ -214,13 +214,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s4, t1, t3

FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN a0, a0, s3
FMIN a0, a0, s1
blt $r0, I, .L21
.align 3

.L22:
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN s1, s1, s3
MOV s1, a0
.align 3

.L23: //N<8
Expand Down
1 change: 1 addition & 0 deletions kernel/loongarch64/crot_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vreplgr2vr.d VXC, t1
vreplgr2vr.d VXS, t2
vreplgr2vr.d VXZ, t3
srai.d I, N, 1
#else
vreplgr2vr.w VXC, t1
vreplgr2vr.w VXS, t2
Expand Down
116 changes: 99 additions & 17 deletions kernel/loongarch64/icamin_lsx.S
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LD a1, X, 1 * SIZE
FABS a0, a0
FABS a1, a1
ADD s1, a1, a0
vreplvei.w VM0, VM0, 0
ADD s1, a1, a0 // Initialization value
vxor.v VI3, VI3, VI3 // 0
#ifdef DOUBLE
li.d I, -1
vreplgr2vr.d VI4, I
vffint.d.l VI4, VI4 // -1
bne INCX, TEMP, .L20
bne INCX, TEMP, .L20 // incx != 1

// Init Index
addi.d i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21
slli.d i0, i0, 1 //2
slli.d i0, i0, 1 // 2
vreplgr2vr.d VINC4, i0
addi.d i0, i0, -3
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
Expand All @@ -91,14 +90,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.d VI0, i0, 0 //1
addi.d i0, i0, 1
vinsgr2vr.d VI0, i0, 1 //2

srai.d I, N, 2
bge $r0, I, .L21

// Init VM0
vld VX0, X, 0 * SIZE
vld VX1, X, 2 * SIZE
vpickev.d x1, VX1, VX0
vpickod.d x2, VX1, VX0
vfmul.d x3, VI4, x1
vfmul.d x4, VI4, x2
vfcmp.clt.d VT0, x1, VI3
vfcmp.clt.d VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.d VM0, x1, x2
#else
li.w I, -1
vreplgr2vr.w VI4, I
vffint.s.w VI4, VI4 // -1
bne INCX, TEMP, .L20
bne INCX, TEMP, .L20 // incx != 1

// Init Index
addi.w i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21
slli.w i0, i0, 2 //4
vreplgr2vr.w VINC4, i0
addi.w i0, i0, -7
Expand All @@ -117,6 +132,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VI0, i0, 2 //3
addi.w i0, i0, 1
vinsgr2vr.w VI0, i0, 3 //4

srai.d I, N, 2
bge $r0, I, .L21

// Init VM0
vld VX0, X, 0 * SIZE
vld VX1, X, 4 * SIZE
vpickev.w x1, VX1, VX0
vpickod.w x2, VX1, VX0
vfmul.s x3, VI4, x1
vfmul.s x4, VI4, x2
vfcmp.clt.s VT0, x1, VI3
vfcmp.clt.s VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.s VM0, x1, x2
#endif
.align 3

Expand All @@ -139,6 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vfcmp.ceq.d VT0, x3, VM0
vbitsel.v VM0, x3, VM0, VT0
vbitsel.v VI0, VI1, VI0, VT0

vld VX0, X, 4 * SIZE
vadd.d VI1, VI1, VINC4
vld VX1, X, 6 * SIZE
Expand Down Expand Up @@ -206,9 +238,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L20: // INCX!=1
#ifdef DOUBLE
addi.d i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21
slli.d i0, i0, 1 //2
// Init index
slli.d i0, i0, 1 //2
vreplgr2vr.d VINC4, i0
addi.d i0, i0, -3
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
Expand All @@ -218,10 +249,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.d VI0, i0, 0 //1
addi.d i0, i0, 1
vinsgr2vr.d VI0, i0, 1 //2

srai.d I, N, 2
bge $r0, I, .L21 // N < 4

// Init VM0
ld.d t1, X, 0 * SIZE
ld.d t2, X, 1 * SIZE
add.d i1, X, INCX
ld.d t3, i1, 0 * SIZE
ld.d t4, i1, 1 * SIZE
add.d i1, i1, INCX
vinsgr2vr.d x1, t1, 0
vinsgr2vr.d x2, t2, 0
vinsgr2vr.d x1, t3, 1
vinsgr2vr.d x2, t4, 1
vfmul.d x3, VI4, x1
vfmul.d x4, VI4, x2
vfcmp.clt.d VT0, x1, VI3
vfcmp.clt.d VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.d VM0, x1, x2
#else
addi.w i0, i0, 1
srai.d I, N, 2
bge $r0, I, .L21

// Init index
slli.w i0, i0, 2 //4
vreplgr2vr.w VINC4, i0
addi.w i0, i0, -7
Expand All @@ -240,6 +293,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w VI0, i0, 2 //3
addi.w i0, i0, 1
vinsgr2vr.w VI0, i0, 3 //4

srai.d I, N, 2
bge $r0, I, .L21 // N < 4

// Init VM0
ld.w t1, X, 0 * SIZE
ld.w t2, X, 1 * SIZE
add.d i1, X, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
vinsgr2vr.w x1, t1, 0
vinsgr2vr.w x2, t2, 0
vinsgr2vr.w x1, t3, 1
vinsgr2vr.w x2, t4, 1
ld.w t1, i1, 0 * SIZE
ld.w t2, i1, 1 * SIZE
add.d i1, i1, INCX
ld.w t3, i1, 0 * SIZE
ld.w t4, i1, 1 * SIZE
add.d i1, i1, INCX
vinsgr2vr.w x1, t1, 2
vinsgr2vr.w x2, t2, 2
vinsgr2vr.w x1, t3, 3
vinsgr2vr.w x2, t4, 3
vfcmp.clt.s VT0, x1, VI3
vfcmp.clt.s VINC8, x2, VI3
vbitsel.v x1, x1, x3, VT0
vbitsel.v x2, x2, x4, VINC8
vfadd.s VM0, x1, x2
#endif
.align 3

Expand Down Expand Up @@ -300,8 +383,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vinsgr2vr.w x2, t2, 2
vinsgr2vr.w x1, t3, 3
vinsgr2vr.w x2, t4, 3
vpickev.w x1, VX1, VX0
vpickod.w x2, VX1, VX0
#endif
addi.d I, I, -1
VFMUL x3, VI4, x1
Expand Down Expand Up @@ -358,12 +439,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
vfmina.d VM0, x1, x2
vfcmp.ceq.d VT0, x1, VM0
vbitsel.v VI0, VI2, VI1, VT0
#else
fcmp.ceq.d $fcc0, $f15, $f10
bceqz $fcc0, .L27
vfcmp.clt.s VT0, VI2, VI0
#endif
vbitsel.v VI0, VI0, VI2, VT0
#endif
.align 3

.L27:
Expand Down

0 comments on commit b4a1153

Please sign in to comment.