Skip to content

Commit

Permalink
This patch adds support to vectorize sum of abslolute differences (SA…
Browse files Browse the repository at this point in the history
…D_EXPR)

using SVE.

Given this input code:

int
sum_abs (uint8_t *restrict x, uint8_t *restrict y, int n)
{
  int sum = 0;

  for (int i = 0; i < n; i++)
    {
      sum += __builtin_abs (x[i] - y[i]);
    }

  return sum;
}

The resulting SVE code is:

0000000000000000 <sum_abs>:
   0:	7100005f 	cmp	w2, #0x0
   4:	5400026d 	b.le	50 <sum_abs+0x50>
   8:	d2800003 	mov	x3, #0x0                   	// #0
   c:	93407c42 	sxtw	x2, w2
  10:	2538c002 	mov	z2.b, #0
  14:	25221fe0 	whilelo	p0.b, xzr, x2
  18:	2538c023 	mov	z3.b, #1
  1c:	2518e3e1 	ptrue	p1.b
  20:	a4034000 	ld1b	{z0.b}, p0/z, [x0, x3]
  24:	a4034021 	ld1b	{z1.b}, p0/z, [x1, x3]
  28:	0430e3e3 	incb	x3
  2c:	0520c021 	sel	z1.b, p0, z1.b, z0.b
  30:	25221c60 	whilelo	p0.b, x3, x2
  34:	040d0420 	uabd	z0.b, p1/m, z0.b, z1.b
  38:	44830402 	udot	z2.s, z0.b, z3.b
  3c:	54ffff21 	b.ne	20 <sum_abs+0x20>  // b.any
  40:	2598e3e0 	ptrue	p0.s
  44:	04812042 	uaddv	d2, p0, z2.s
  48:	1e260040 	fmov	w0, s2
  4c:	d65f03c0 	ret
  50:	1e2703e2 	fmov	s2, wzr
  54:	1e260040 	fmov	w0, s2
  58:	d65f03c0 	ret

Notice how udot is used inside a fully masked loop.


gcc/Changelog:

2019-05-07  Alejandro Martinez  <[email protected]>

	* config/aarch64/aarch64-sve.md (<su>abd<mode>_3): New define_expand.
	(aarch64_<su>abd<mode>_3): Likewise.
	(*aarch64_<su>abd<mode>_3): New define_insn.
	(<sur>sad<vsi2qi>): New define_expand.
	* config/aarch64/iterators.md: Added MAX_OPP attribute.
	* tree-vect-loop.c (use_mask_by_cond_expr_p): Add SAD_EXPR.
	(build_vect_cond_expr): Likewise.

gcc/testsuite/Changelog:
 
2019-05-07  Alejandro Martinez  <[email protected]>

	* gcc.target/aarch64/sve/sad_1.c: New test for sum of absolute
	differences.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@270975 138bc75d-0d04-0410-961f-82ee72b054a4
  • Loading branch information
alejandro committed May 7, 2019
1 parent b16ca97 commit 2cbc1ad
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 0 deletions.
10 changes: 10 additions & 0 deletions gcc/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
2019-05-07 Alejandro Martinez <[email protected]>

* config/aarch64/aarch64-sve.md (<su>abd<mode>_3): New define_expand.
(aarch64_<su>abd<mode>_3): Likewise.
(*aarch64_<su>abd<mode>_3): New define_insn.
(<sur>sad<vsi2qi>): New define_expand.
* config/aarch64/iterators.md: Added MAX_OPP attribute.
* tree-vect-loop.c (use_mask_by_cond_expr_p): Add SAD_EXPR.
(build_vect_cond_expr): Likewise.

2019-05-07 Uroš Bizjak <[email protected]>

* cfgexpand.c (asm_clobber_reg_is_valid): Reject
Expand Down
61 changes: 61 additions & 0 deletions gcc/config/aarch64/aarch64-sve.md
Original file line number Diff line number Diff line change
Expand Up @@ -3148,3 +3148,64 @@
movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
[(set_attr "movprfx" "*,yes")]
)

;; Helper expander for aarch64_<su>abd<mode>_3 to save the callers
;; the hassle of constructing the other arm of the MINUS.
(define_expand "<su>abd<mode>_3"
[(use (match_operand:SVE_I 0 "register_operand"))
(USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
(match_operand:SVE_I 2 "register_operand"))]
"TARGET_SVE"
{
rtx pred = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
rtx other_arm = gen_rtx_<MAX_OPP> (<MODE>mode, operands[1], operands[2]);
emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
operands[2], other_arm));
DONE;
}
)

;; Predicated integer absolute difference.
(define_insn "aarch64_<su>abd<mode>_3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(minus:SVE_I
(USMAX:SVE_I
(match_operand:SVE_I 2 "register_operand" "0, w")
(match_operand:SVE_I 3 "register_operand" "w, w"))
(match_operator 4 "aarch64_<max_opp>"
[(match_dup 2)
(match_dup 3)]))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)

;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
;; operands 1 and 2. The sequence also has to perform a widening reduction of
;; the difference into a vector and accumulate that into operand 3 before
;; copying that into the result operand 0.
;; Perform that with a sequence of:
;; MOV ones.b, #1
;; [SU]ABD diff.b, p0/m, op1.b, op2.b
;; MOVPRFX op0, op3 // If necessary
;; UDOT op0.s, diff.b, ones.b

(define_expand "<sur>sad<vsi2qi>"
[(use (match_operand:SVE_SDI 0 "register_operand"))
(unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
(use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
(use (match_operand:SVE_SDI 3 "register_operand"))]
"TARGET_SVE"
{
rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
rtx diff = gen_reg_rtx (<VSI2QI>mode);
emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
DONE;
}
)
3 changes: 3 additions & 0 deletions gcc/config/aarch64/iterators.md
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,9 @@
;; Map smax to smin and umax to umin.
(define_code_attr max_opp [(smax "smin") (umax "umin")])

;; Same as above, but louder.
(define_code_attr MAX_OPP [(smax "SMIN") (umax "UMIN")])

;; The number of subvectors in an SVE_STRUCT.
(define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
(VNx8SI "2") (VNx4DI "2")
Expand Down
5 changes: 5 additions & 0 deletions gcc/testsuite/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
2019-05-07 Alejandro Martinez <[email protected]>

* gcc.target/aarch64/sve/sad_1.c: New test for sum of absolute
differences.

2019-05-07 Uroš Bizjak <[email protected]>

* gcc.target/i386/asm-7.c: New test.
Expand Down
28 changes: 28 additions & 0 deletions gcc/testsuite/gcc.target/aarch64/sve/sad_1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */

#include <stdint.h>

#define DEF_SAD(TYPE1, TYPE2) \
TYPE1 __attribute__ ((noinline, noclone)) \
sum_abs_##TYPE1##_##TYPE2 (TYPE2 *restrict x, TYPE2 *restrict y, int n) \
{ \
TYPE1 sum = 0; \
for (int i = 0; i < n; i++) \
{ \
sum += __builtin_abs (x[i] - y[i]); \
} \
return sum; \
}

DEF_SAD(int32_t, uint8_t)
DEF_SAD(int32_t, int8_t)
DEF_SAD(int64_t, uint16_t)
DEF_SAD(int64_t, int16_t)

/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
12 changes: 12 additions & 0 deletions gcc/tree-vect-loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -5973,6 +5973,7 @@ use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn,
switch (code)
{
case DOT_PROD_EXPR:
case SAD_EXPR:
return true;

default:
Expand Down Expand Up @@ -6002,6 +6003,17 @@ build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
break;
}

case SAD_EXPR:
{
tree vectype = TREE_TYPE (vop[1]);
tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
mask, vop[1], vop[0]);
gsi_insert_before (gsi, select, GSI_SAME_STMT);
vop[1] = masked_op1;
break;
}

default:
gcc_unreachable ();
}
Expand Down

0 comments on commit 2cbc1ad

Please sign in to comment.