-
Notifications
You must be signed in to change notification settings - Fork 57
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Adds arm_convolve_wrapper function for 4bit weights - Adds arm_convolve_1x1 normal and fast variant for 4bit weights - Adds arm_nn_mat_mult_kernel_s4_s16 for multiply 4bit weights with 16bit input - Adds mat_mult_nt_t_s4 function for dsp and scalar - Adds scalar and dsp implementation for arm_convolve with 4bit weights - Adds Unit tests for 4bit weight convolutions Change-Id: Idea55432fdab2db05a033889d7c39dd0ea69f8ad Signed-off-by: Ryan O'Shea <[email protected]>
- Loading branch information
Showing
231 changed files
with
10,857 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/* | ||
* SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates <[email protected]> | ||
* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the License); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT | ||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/* ---------------------------------------------------------------------- | ||
* Project: CMSIS NN Library | ||
* Title: arm_convolve_1x1_s4.c | ||
* Description: Generic s4 version of 1x1 convolution | ||
* | ||
* $Date: 01 November 2023 | ||
* $Revision: V.1.0.0 | ||
* | ||
* Target : Arm(R) M-Profile Architecture | ||
* | ||
* -------------------------------------------------------------------- */ | ||
|
||
#include "arm_nnfunctions.h" | ||
#include "arm_nnsupportfunctions.h" | ||
|
||
/** | ||
* @ingroup Public | ||
*/ | ||
|
||
/** | ||
* @addtogroup NNConv | ||
* @{ | ||
*/ | ||
|
||
/* | ||
* A more generic version of s4 1x1 convolution intended for non-unity strides. This is slower | ||
* than the _fast() version if used for unity stride values. | ||
* | ||
* Refer header file for details. | ||
* | ||
*/ | ||
arm_cmsis_nn_status arm_convolve_1x1_s4(const cmsis_nn_context *ctx, | ||
const cmsis_nn_conv_params *conv_params, | ||
const cmsis_nn_per_channel_quant_params *quant_params, | ||
const cmsis_nn_dims *input_dims, | ||
const int8_t *input_data, | ||
const cmsis_nn_dims *filter_dims, | ||
const int8_t *filter_data, | ||
const cmsis_nn_dims *bias_dims, | ||
const int32_t *bias_data, | ||
const cmsis_nn_dims *output_dims, | ||
int8_t *output_data) | ||
{ | ||
(void)ctx; | ||
(void)filter_dims; | ||
(void)bias_dims; | ||
if (conv_params->padding.w != 0 || conv_params->padding.h != 0) | ||
{ | ||
return ARM_CMSIS_NN_ARG_ERROR; | ||
} | ||
|
||
const int32_t lhs_rows = output_dims->w; | ||
const int32_t rhs_rows = output_dims->c; | ||
const int32_t rhs_cols = input_dims->c; | ||
const int32_t stride_w = conv_params->stride.w; | ||
const int32_t input_inc = input_dims->w * conv_params->stride.h * rhs_cols; | ||
const int32_t output_inc = output_dims->w * rhs_rows; | ||
const int32_t output_h = output_dims->h; | ||
const int32_t batch = input_dims->n; | ||
const int8_t *input_data_ref = input_data; | ||
|
||
for (int i_batch = 0; i_batch < batch; i_batch++) | ||
{ | ||
input_data = input_data_ref + (i_batch * rhs_cols * input_dims->w * input_dims->h); | ||
for (int i_output_h = 0; i_output_h < output_h; i_output_h++) | ||
{ | ||
// Process one input row | ||
arm_cmsis_nn_status result = arm_nn_mat_mult_nt_t_s4(input_data, | ||
filter_data, | ||
bias_data, | ||
output_data, | ||
quant_params->multiplier, | ||
quant_params->shift, | ||
lhs_rows, | ||
rhs_rows, | ||
rhs_cols, | ||
conv_params->input_offset, | ||
conv_params->output_offset, | ||
conv_params->activation.min, | ||
conv_params->activation.max, | ||
rhs_cols * stride_w); | ||
if (result != ARM_CMSIS_NN_SUCCESS) | ||
{ | ||
return result; | ||
} | ||
input_data += input_inc; | ||
output_data += output_inc; | ||
} | ||
} | ||
|
||
/* Return to application */ | ||
return ARM_CMSIS_NN_SUCCESS; | ||
} | ||
|
||
/** | ||
* @} end of NNConv group | ||
*/ |
Oops, something went wrong.