forked from chemwolf6922/CMSIS_NN_Fast
-
Notifications
You must be signed in to change notification settings - Fork 0
/
avg_pool_q7_HWC_opt.c
67 lines (63 loc) · 1.82 KB
/
avg_pool_q7_HWC_opt.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#include "nn_functions.h"
/**
* @brief Fast Q7 average pooling
* @param[in] Im_in Pointer to the input tensor
* @param[in] dim_im_in Input tensor dimention
* @param[in] ch_im_in Input tensor channel
* @param[in,out] Im_out Pointer to the output tensor
*
* @details
* Uses SIMD to calculate average pooling by 2*2 kernel.
*
* Constrains:
* 1. Square input
* 2. Kernel size is 2.
*/
void avg_pool_q7_HWC_opt(q7_t* im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
q7_t* im_out)
{
q7_t *pSrc1_1;
q7_t *pSrc1_2;
q7_t *pSrc2_1;
q7_t *pSrc2_2;
q7_t *pDst;
pSrc1_1 = im_in;
pSrc1_2 = im_in + ch_im_in;
pSrc2_1 = im_in + ch_im_in*dim_im_in;
pSrc2_2 = im_in + ch_im_in*dim_im_in + ch_im_in;
pDst = im_out;
uint32_t row_cnt = dim_im_in >> 1;
while(row_cnt)
{
uint32_t col_cnt = dim_im_in >> 1;
while(col_cnt)
{
uint32_t ch_cnt = ch_im_in >> 2;
while(ch_cnt)
{
uint32_t in1,in2,out;
in1 = *__SIMD32(pSrc1_1)++;
in2 = *__SIMD32(pSrc1_2)++;
out = __SHADD8(in1,in2);
in1 = *__SIMD32(pSrc2_1)++;
in2 = *__SIMD32(pSrc2_2)++;
in1 = __SHADD8(in1,in2);
out = __SHADD8(in1,out);
*__SIMD32(pDst)++ = out;
ch_cnt--;
}
pSrc1_1 += ch_im_in;
pSrc1_2 += ch_im_in;
pSrc2_1 += ch_im_in;
pSrc2_2 += ch_im_in;
col_cnt--;
}
pSrc1_1 += ch_im_in*dim_im_in;
pSrc1_2 += ch_im_in*dim_im_in;
pSrc2_1 += ch_im_in*dim_im_in;
pSrc2_2 += ch_im_in*dim_im_in;
row_cnt--;
}
}