arm_convolve_1_x_n_s8.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /*
  2. * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_convolve_1_x_n_s8.c
  21. * Description: s8 version of 1xN convolution using symmetric quantization.
  22. *
  23. * $Date: 20 February 2024
  24. * $Revision: V.3.5.1
  25. *
  26. * Target : Arm(R) M-Profile Architecture
  27. *
  28. * -------------------------------------------------------------------- */
  29. #include "arm_nnfunctions.h"
  30. #include "arm_nnsupportfunctions.h"
  31. /**
  32. * @ingroup Public
  33. */
  34. /**
  35. * @addtogroup NNConv
  36. * @{
  37. */
  38. /*
  39. * 1xN s8 convolution function.
  40. *
  41. * Refer header file for details.
  42. *
  43. */
  44. arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
  45. const cmsis_nn_conv_params *conv_params,
  46. const cmsis_nn_per_channel_quant_params *quant_params,
  47. const cmsis_nn_dims *input_dims,
  48. const int8_t *input_data,
  49. const cmsis_nn_dims *filter_dims,
  50. const int8_t *filter_data,
  51. const cmsis_nn_dims *bias_dims,
  52. const int32_t *bias_data,
  53. const cmsis_nn_dims *output_dims,
  54. int8_t *output_data)
  55. {
  56. arm_cmsis_nn_status status = ARM_CMSIS_NN_SUCCESS;
  57. int32_t buffer_size = arm_convolve_1_x_n_s8_get_buffer_size(conv_params, input_dims, filter_dims, output_dims);
  58. /* The wrapper API is the ultimate reference for argument check */
  59. if ((input_dims->h != 1) || conv_params->dilation.w != 1 || (buffer_size != 0 && ctx->buf == NULL) ||
  60. conv_params->stride.w == 0 || (conv_params->stride.w * input_dims->c % 4 != 0))
  61. {
  62. status = ARM_CMSIS_NN_ARG_ERROR;
  63. goto out;
  64. }
  65. #if defined(ARM_MATH_MVEI)
  66. (void)bias_dims;
  67. const uint16_t input_x = input_dims->w;
  68. const uint16_t kernel_x = filter_dims->w;
  69. const uint16_t output_x = output_dims->w;
  70. const uint16_t output_ch = output_dims->c;
  71. const uint16_t input_ch = input_dims->c;
  72. const uint16_t pad_x = conv_params->padding.w;
  73. const uint16_t stride_x = conv_params->stride.w;
  74. // Total pad for dilation of 1
  75. const int32_t total_pad = ((output_x - 1) * stride_x + kernel_x - input_x);
  76. const int32_t asym_pad = total_pad % 2;
  77. if (pad_x * 2 + asym_pad != total_pad)
  78. {
  79. return ARM_CMSIS_NN_FAILURE;
  80. }
  81. const int32_t right_pad_num = pad_x + asym_pad != 0 ? MAX(1, (pad_x + asym_pad + stride_x - 1) / stride_x) : 0;
  82. const int32_t left_pad_num = pad_x != 0 ? MAX(1, (pad_x + stride_x - 1) / stride_x) : 0;
  83. const int32_t no_pad_num = MAX(output_x - (right_pad_num + left_pad_num), 0);
  84. if (right_pad_num + no_pad_num + left_pad_num != output_x)
  85. {
  86. return arm_convolve_s8(ctx,
  87. conv_params,
  88. quant_params,
  89. input_dims,
  90. input_data,
  91. filter_dims,
  92. filter_data,
  93. bias_dims,
  94. bias_data,
  95. output_dims,
  96. output_data);
  97. }
  98. for (int i_batch = 0; i_batch < input_dims->n; i_batch++)
  99. {
  100. // Handle left padded sections
  101. int32_t lhs_rows = left_pad_num;
  102. const int32_t rhs_cols = kernel_x * input_dims->c;
  103. const int32_t rhs_rows = output_dims->c;
  104. const int32_t lhs_offset = input_ch * stride_x;
  105. int32_t out_idx = 0;
  106. for (int i = 0; i < lhs_rows; i++)
  107. {
  108. const int32_t est_input_x_idx = stride_x * i - pad_x;
  109. const int32_t ker_begin_idx = -est_input_x_idx;
  110. const int32_t actual_kernel_len = kernel_x - ker_begin_idx;
  111. status = arm_nn_mat_mul_core_1x_s8(actual_kernel_len * input_ch,
  112. ker_begin_idx * input_ch,
  113. input_data,
  114. filter_data + (ker_begin_idx * input_ch),
  115. output_ch,
  116. conv_params,
  117. quant_params,
  118. bias_data,
  119. output_data);
  120. output_data += output_ch;
  121. }
  122. out_idx += lhs_rows;
  123. int32_t input_start = stride_x * lhs_rows - pad_x;
  124. if (input_start < 0)
  125. {
  126. return ARM_CMSIS_NN_FAILURE;
  127. }
  128. /* Non padded elements */
  129. input_start *= input_ch;
  130. lhs_rows = no_pad_num;
  131. arm_nn_mat_mult_nt_t_s8(input_data + input_start,
  132. filter_data,
  133. bias_data,
  134. output_data,
  135. quant_params->multiplier,
  136. quant_params->shift,
  137. lhs_rows,
  138. rhs_rows,
  139. rhs_cols,
  140. conv_params->input_offset,
  141. conv_params->output_offset,
  142. conv_params->activation.min,
  143. conv_params->activation.max,
  144. rhs_rows,
  145. lhs_offset);
  146. output_data += lhs_rows * rhs_rows;
  147. /* Right padded elements */
  148. out_idx += lhs_rows;
  149. lhs_rows = output_x - out_idx;
  150. if (lhs_rows < 0)
  151. {
  152. return ARM_CMSIS_NN_FAILURE;
  153. }
  154. for (int i = out_idx; i < output_x; i++)
  155. {
  156. const int32_t est_input_x_idx = stride_x * i - pad_x;
  157. const int32_t ker_end_idx = MIN(kernel_x, input_x - est_input_x_idx);
  158. status = arm_nn_mat_mul_core_1x_s8(ker_end_idx * input_ch,
  159. (kernel_x - ker_end_idx) * input_ch,
  160. input_data + est_input_x_idx * input_ch,
  161. filter_data,
  162. output_ch,
  163. conv_params,
  164. quant_params,
  165. bias_data,
  166. output_data);
  167. output_data += output_ch;
  168. }
  169. /* Advance to the next batch */
  170. input_data += (input_x * input_ch);
  171. }
  172. #else
  173. status = arm_convolve_s8(ctx,
  174. conv_params,
  175. quant_params,
  176. input_dims,
  177. input_data,
  178. filter_dims,
  179. filter_data,
  180. bias_dims,
  181. bias_data,
  182. output_dims,
  183. output_data);
  184. #endif
  185. out:
  186. /* Return to application */
  187. return status;
  188. }
  189. /**
  190. * @} end of NNConv group
  191. */