Просмотр исходного кода

CMSIS-NN: revert intrinsics for vdup(MVE)

The new _x intrinsic is not availble for vdup in ARMC6.14. Reverting
back to the old one.

Change-Id: I9b10efa82543bee96c5bc45c2e2a9e0ec07f3a88
Felix Johnny 5 лет назад
Родитель
Сommit
cf92cdcfe8
1 измененных файлов с 4 добавлено и 4 удалено
  1. 4 4
      CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c

+ 4 - 4
CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c

@@ -21,8 +21,8 @@
  * Title:        arm_nn_mat_mult_s8.c
  * Title:        arm_nn_mat_mult_s8.c
  * Description:  General Matrix-multiplication function
  * Description:  General Matrix-multiplication function
  *
  *
- * $Date:        March 1, 2020
- * $Revision:    V.2.0.0
+ * $Date:        March 5, 2020
+ * $Revision:    V.2.0.1
  *
  *
  * Target Processor:  Cortex-M cores
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
  * -------------------------------------------------------------------- */
@@ -74,7 +74,7 @@ q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
             for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
             for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
             {
             {
                 mve_pred16_t p = vctp16q(row_len_tmp);
                 mve_pred16_t p = vctp16q(row_len_tmp);
-                const int16x8_t offset = vdupq_x_n_s16(col_offset, p);
+                const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p);
                 row_len_tmp -= 8;
                 row_len_tmp -= 8;
 
 
                 int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
                 int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
@@ -130,7 +130,7 @@ q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
                 for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
                 for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
                 {
                 {
                     const mve_pred16_t p = vctp16q(row_len_tmp);
                     const mve_pred16_t p = vctp16q(row_len_tmp);
-                    const int16x8_t offset = vdupq_x_n_s16(col_offset, p);
+                    const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p);
                     row_len_tmp -= 8;
                     row_len_tmp -= 8;
 
 
                     int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
                     int16x8_t r0 = vldrbq_z_s16(ip_r0, p);