Skip to content

Commit

Permalink
s390: Add LEN_LOAD/LEN_STORE support.
Browse files Browse the repository at this point in the history
This patch adds LEN_LOAD/LEN_STORE support for z13 and newer.
It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE
optabs.

Add vll/vstl testcases adapted from Power.

Also change expectations for SLP testcases with more than one rgroup.

gcc/ChangeLog:

	* config/s390/predicates.md (vll_bias_operand): Add -1 bias.
	* config/s390/s390.cc (s390_option_override_internal): Make
	partial vector usage the default from z13 on.
	* config/s390/vector.md (len_load_v16qi): Add.
	(len_store_v16qi): Add.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/s390.exp: Add partial subdirectory.
	* gcc.target/s390/vector/vec-nopeel-2.c: Change test
	expectation.
	* lib/target-supports.exp: Add s390.
	* gcc.target/s390/vector/partial/s390-vec-length-1.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-2.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-3.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-7.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-1.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-2.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-3.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-run-7.h: New test.
	* gcc.target/s390/vector/partial/s390-vec-length-small.c: New test.
	* gcc.target/s390/vector/partial/s390-vec-length.h: New test.
  • Loading branch information
rdapp-ibm committed Mar 2, 2023
1 parent 61407e0 commit da19e37
Show file tree
Hide file tree
Showing 32 changed files with 450 additions and 3 deletions.
8 changes: 8 additions & 0 deletions gcc/config/s390/predicates.md
Original file line number Diff line number Diff line change
Expand Up @@ -606,3 +606,11 @@
(and (match_code "reg")
(match_test "reload_completed || reload_in_progress")
(match_test "register_operand (op, GET_MODE (op))"))))

; Bias value for LEN_LOAD and LEN_STORE. The bias will be added to the
; length (in bytes for s390) to be loaded. vll/vstl expect the lowest byte
; to load while LEN_LOAD/LEN_STORE use the actual length in bytes. This implies
; that we cannot load a length of 0.
(define_predicate "vll_bias_operand"
(and (match_code "const_int")
(match_test "op == CONSTM1_RTX (QImode)")))
9 changes: 8 additions & 1 deletion gcc/config/s390/s390.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15857,6 +15857,14 @@ s390_option_override_internal (struct gcc_options *opts,

/* Use the alternative scheduling-pressure algorithm by default. */
SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);

/* Allow simple vector masking using vll/vstl for epilogues. */
if (TARGET_Z13)
SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 1);
else
SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 0);

/* Do not vectorize loops with a low trip count for now. */
SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);

/* Set the default alignment. */
Expand Down Expand Up @@ -17825,7 +17833,6 @@ s390_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
#undef TARGET_VECTORIZE_VEC_PERM_CONST
#define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const


struct gcc_target targetm = TARGET_INITIALIZER;

#include "gt-s390.h"
35 changes: 35 additions & 0 deletions gcc/config/s390/vector.md
Original file line number Diff line number Diff line change
Expand Up @@ -2947,6 +2947,41 @@
""
[(set_attr "op_type" "*,VRX,VRX")])

;
; Implement len_load/len_store optabs with vll/vstl.
(define_expand "len_load_v16qi"
[(match_operand:V16QI 0 "register_operand")
(match_operand:V16QI 1 "memory_operand")
(match_operand:QI 2 "register_operand")
(match_operand:QI 3 "vll_bias_operand")
]
"TARGET_VX && TARGET_64BIT"
{
rtx mem = adjust_address (operands[1], BLKmode, 0);

rtx len = gen_reg_rtx (SImode);
emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2]));
emit_insn (gen_vllv16qi (operands[0], len, mem));
DONE;
})

(define_expand "len_store_v16qi"
[(match_operand:V16QI 0 "memory_operand")
(match_operand:V16QI 1 "register_operand")
(match_operand:QI 2 "register_operand")
(match_operand:QI 3 "vll_bias_operand")
]
"TARGET_VX && TARGET_64BIT"
{
rtx mem = adjust_address (operands[0], BLKmode, 0);

rtx len = gen_reg_rtx (SImode);
emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2]));
emit_insn (gen_vstlv16qi (operands[1], len, mem));
DONE;
});;


; reduc_smin
; reduc_smax
; reduc_umin
Expand Down
3 changes: 3 additions & 0 deletions gcc/testsuite/gcc.target/s390/s390.exp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ dg-runtest [lsort [prune [glob -nocomplain $srcdir/$subdir/*.{c,S}] \
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.{c,S}]] \
"" $DEFAULT_CFLAGS

dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/partial/*.{c,S}]] \
"" $DEFAULT_CFLAGS

gfortran-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.F90]] \
"" $DEFAULT_FFLAGS

Expand Down
18 changes: 18 additions & 0 deletions gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-1.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "s390-vec-length.h"

/* Test the case loop iteration is known. */

#define N 127

#define test(TYPE) \
extern TYPE a_##TYPE[N]; \
extern TYPE b_##TYPE[N]; \
extern TYPE c_##TYPE[N]; \
void __attribute__ ((noinline, noclone)) test##TYPE () \
{ \
unsigned int i = 0; \
for (i = 0; i < N; i++) \
c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i]; \
}

TEST_ALL (test)
18 changes: 18 additions & 0 deletions gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "s390-vec-length.h"

/* Test the case loop iteration is unknown. */

#define N 255

#define test(TYPE) \
extern TYPE a_##TYPE[N]; \
extern TYPE b_##TYPE[N]; \
extern TYPE c_##TYPE[N]; \
void __attribute__ ((noinline, noclone)) test##TYPE (unsigned int n) \
{ \
unsigned int i = 0; \
for (i = 0; i < n; i++) \
c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i]; \
}

TEST_ALL (test)
31 changes: 31 additions & 0 deletions gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-3.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "s390-vec-length.h"

/* Test the case loop iteration less than VF. */

/* For char. */
#define N_uint8_t 15
#define N_int8_t 15
/* For short. */
#define N_uint16_t 6
#define N_int16_t 6
/* For int/float. */
#define N_uint32_t 3
#define N_int32_t 3
#define N_float 3
/* For long/double. */
#define N_uint64_t 1
#define N_int64_t 1
#define N_double 1

#define test(TYPE) \
extern TYPE a_##TYPE[N_##TYPE]; \
extern TYPE b_##TYPE[N_##TYPE]; \
extern TYPE c_##TYPE[N_##TYPE]; \
void __attribute__ ((noinline, noclone)) test##TYPE () \
{ \
unsigned int i = 0; \
for (i = 0; i < N_##TYPE; i++) \
c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i]; \
}

TEST_ALL (test)
17 changes: 17 additions & 0 deletions gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-7.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "s390-vec-length.h"

#define N 64
#define START 1
#define END 59

#define test(TYPE) \
TYPE x_##TYPE[N] __attribute__((aligned(16))); \
void __attribute__((noinline, noclone)) test_npeel_##TYPE() { \
TYPE v = 0; \
for (unsigned int i = START; i < END; i++) { \
x_##TYPE[i] = v; \
v += 1; \
} \
}

TEST_ALL (test)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */

/* Test that we only vectorize the epilogue with vector load/store with length,
the main body still uses normal vector load/store. */

#include "s390-vec-length-1.h"

/* { dg-final { scan-assembler-times {\mvll\M} 14 } } */
/* { dg-final { scan-assembler-times {\mvstl\M} 7 } } */

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */

/* Test that we only vectorize the epilogue with vector load/store with length,
the main body still uses normal vector load/store. */

#include "s390-vec-length-2.h"

/* { dg-final { scan-assembler-times {\mvll\M} 20 } } */
/* { dg-final { scan-assembler-times {\mvstl\M} 10 } } */

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */

/* Test that we only vectorize the epilogue with vector load/store with length,
the main body still uses normal vector load/store. */

#include "s390-vec-length-3.h"

/* { dg-final { scan-assembler-not {\mvl\M} } } */
/* { dg-final { scan-assembler-not {\mvst\M} } } */
/* 64bit types get completely unrolled, so only check the others. */
/* { dg-final { scan-assembler-times {\mvll\M} 14 } } */
/* { dg-final { scan-assembler-times {\mvstl\M} 7 } } */

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -ffast-math -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */

/* Test that we only vectorize the epilogue with vector load/store with length,
the main body still uses normal vector load/store. */

#include "s390-vec-length-7.h"

/* { dg-final { scan-assembler-times {\mvstl\M} 4 } } */
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */

#include "s390-vec-length-run-1.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */

#include "s390-vec-length-run-2.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */

#include "s390-vec-length-run-3.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=1" } */

#include "s390-vec-length-run-7.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */

/* Test that the loop body uses vector load/store with length,
there should not be any epilogues. */

#include "s390-vec-length-1.h"

/* { dg-final { scan-assembler-times {\mvll\M} 20 } } */
/* { dg-final { scan-assembler-times {\mvstl\M} 10 } } */
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */

/* Test that the loop body uses vector load/store with length,
there should not be any epilogues. */

#include "s390-vec-length-2.h"

/* { dg-final { scan-assembler-times {\mvll\M} 20 } } */
/* { dg-final { scan-assembler-times {\mvstl\M} 10 } } */
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2 --param=min-vect-loop-bound=0" } */

/* Test that the loop body uses vector load/store with length,
there should not be any epilogues. */

#include "s390-vec-length-3.h"

/* 64bit types get completely unrolled, so only check the others. */
/* { dg-final { scan-assembler-times {\mvll\M} 14 } } */
/* { dg-final { scan-assembler-times {\mvstl\M} 7 } } */
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* { dg-do compile { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */

/* Test that the loop body uses vector load/store with length,
there should not be any epilogues. */

#include "s390-vec-length-7.h"

/* Each type should have one vstl but we do not currently vectorize the
float and double variants and the [u]int64_t ones which do not require
partial vectors. */
/* { dg-final { scan-assembler-times {\mvstl\M} 6 } } */
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */

#include "s390-vec-length-run-1.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */

#include "s390-vec-length-run-2.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */

#include "s390-vec-length-run-3.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* { dg-do run { target { lp64 && s390_vx } } } */
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-trapping-math" } */

/* { dg-additional-options "--param=vect-partial-vector-usage=2" } */

#include "s390-vec-length-run-7.h"

Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include "s390-vec-length-1.h"

#define decl(TYPE) \
TYPE a_##TYPE[N]; \
TYPE b_##TYPE[N]; \
TYPE c_##TYPE[N];

#define run(TYPE) \
{ \
unsigned int i = 0; \
for (i = 0; i < N; i++) \
{ \
a_##TYPE[i] = i * 2 + 1; \
b_##TYPE[i] = i % 2 - 2; \
} \
test##TYPE (); \
for (i = 0; i < N; i++) \
{ \
TYPE a1 = i * 2 + 1; \
TYPE b1 = i % 2 - 2; \
TYPE exp_c = a1 + b1; \
if (c_##TYPE[i] != exp_c) \
__builtin_abort (); \
} \
}

TEST_ALL (decl)

int
main (void)
{
TEST_ALL (run)
return 0;
}
Loading

0 comments on commit da19e37

Please sign in to comment.