Преглед на файлове

CMSIS_NN: Remove DSP extension optimization for avg pool

The existing optimization(partial x and y) for DSP extension
is found not to be bit exact. As a first step, the optimization
is removed and will be replaced by a new optimization in the second
step.

Change-Id: Ibd96fd9342a8008fd1c87753672639003076fd0e
Felix Johnny преди 6 години
родител
ревизия
ffd84e819b
променени са 3 файла, в които са добавени 10 реда и са изтрити 140 реда
  1. 3 6
      CMSIS/NN/Include/arm_nnfunctions.h
  2. 1 1
      CMSIS/NN/README.md
  3. 6 133
      CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c

+ 3 - 6
CMSIS/NN/Include/arm_nnfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        February 27, 2020
- * $Revision:    V.1.0.4
+ * $Date:        March 4, 2020
+ * $Revision:    V.1.1.4
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -979,7 +979,7 @@ extern    "C"
                                        const int32_t dilation_x,
                                        const int32_t dilation_y,
                                        int16_t *buffer_a);
-    
+
 /**
    * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel
    * @param[in]       input      pointer to input tensor. Range: int8, format: [H,W,in_ch]
@@ -1640,10 +1640,7 @@ extern    "C"
    *                                     <code>ARM_MATH_SUCCESS</code> - Successful operation
    *                                     <code>ARM_MATH_ARGUMENT_ERROR</code> - Implementation not available
    *
-   * @note This pooling function is input-destructive. Input data is undefined after calling this function.
-   *
    * @details
-   *    - The pooling function is implemented as split x-pooling then y-pooling.
    *    - Supported Framework: TensorFlow Lite
    *
    */

+ 1 - 1
CMSIS/NN/README.md

@@ -29,7 +29,7 @@ Group | API | Base Operator | Input Constraints | Additional memory required for
 |[Fully Connected](https://arm-software.github.io/CMSIS_5/NN/html/group__FC.html)||||| |  | |
 || arm_fully_connected_s8() |FULLY CONNECTED & <br/> MAT MUL  | None | 0 | Yes | Yes | |
 |[Pooling](https://arm-software.github.io/CMSIS_5/NN/html/group__Pooling.html)||||| |  ||
-|| arm_avgpool_s8() | AVERAGE POOL | None | input_ch * output_x * 2 | Yes| Yes| Best case case is when channels are multiple of 4 or <br/> at the least >= 4 |
+|| arm_avgpool_s8() | AVERAGE POOL | None | None | No| Yes| Best case case is when channels are multiple of 4 or <br/> at the least >= 4 |
 || arm_maxpool_s8() | MAX POOL | None | None | No| No|  |
 || arm_maxpool_s8_opt() | MAX POOL | None | input_ch * output_x * 2 | Yes|Yes| Best case case is when channels are multiple of 4 or <br/> at the least >= 4 |
 |[Softmax](https://arm-software.github.io/CMSIS_5/NN/html/group__Softmax.html)||||| |  ||

+ 6 - 133
CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c

@@ -21,47 +21,16 @@
  * Title:        arm_avgpool_s8.c
  * Description:  Pooling function implementations
  *
- * $Date:        February 27, 2020
- * $Revision:    V.1.0.1
+ * $Date:        March 4,2020
+ * $Revision:    V.1.1.1
  *
- * Target Processor:  Cortex-M and Cortex-A cores
+ * Target Processor:  Cortex-M CPUs
  *
  * -------------------------------------------------------------------- */
 
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 
-
-#if defined (ARM_MATH_DSP) && !defined (ARM_MATH_MVEI)
-
-static void buffer_scale_back_q15_to_q7(q15_t * buffer, q7_t * target, uint16_t length, uint16_t scale)
-{
-    int       i;
-
-    for (i = 0; i < length; i++)
-    {
-
-        target[i] = (q7_t) (buffer[i] / scale);
-    }
-}
-
-static void buffer_scale_back_q15_to_q7_and_clamp(q15_t * buffer, q7_t * target, uint16_t length, uint16_t count,const int act_min,
-  const int act_max)
-{
-    int       i;
-    int sum;
-
-    for (i = 0; i < length; i++)
-    {
-        sum = buffer[i] > 0 ? (buffer[i] + count / 2) / count : (buffer[i] - count / 2) / count;
-
-        sum = MAX(sum, act_min);
-        sum = MIN(sum, act_max);
-
-        target[i] = (q7_t) (sum);
-    }
-}
-#endif
 /**
  *  @ingroup groupNN
  */
@@ -78,8 +47,7 @@ static void buffer_scale_back_q15_to_q7_and_clamp(q15_t * buffer, q7_t * target,
    *
    */
 
-#if defined (ARM_MATH_MVEI)
-
+#if defined(ARM_MATH_MVEI)
 
 arm_status arm_avgpool_s8(const int dim_src_height,
                           const int dim_src_width,
@@ -260,97 +228,6 @@ arm_status arm_avgpool_s8(const int dim_src_height,
                           int8_t *dst)
 {
 
-#if defined (ARM_MATH_DSP)
-
-    /* Run the following code for Cortex-M4 and Cortex-M7 */
-
-    q15_t    *buffer = (q15_t *) bufferA;
-    int16_t   i_x, i_y;
-    int16_t   count = 0;
-
-    /* first does the pooling along x axis */
-    for (i_y = 0; i_y < dim_src_height; i_y++)
-    {
-
-        for (i_x = 0; i_x < dim_dst_width; i_x++)
-        {
-            /* for each output sample */
-            q7_t     *target = src + (i_y * dim_src_width + i_x) * ch_src;
-            q7_t     *win_start;
-            q7_t     *win_stop;
-            if (i_x * stride_width - padding_width < 0)
-            {
-                win_start = target;
-            } else
-            {
-                win_start = src + (i_y * dim_src_width + i_x * stride_width - padding_width) * ch_src;
-            }
-
-            if (i_x * stride_width - padding_width + dim_kernel_width >= dim_src_width)
-            {
-                win_stop = src + (i_y * dim_src_width + dim_src_width) * ch_src;
-            } else
-            {
-                win_stop = src + (i_y * dim_src_width + i_x * stride_width - padding_width + dim_kernel_width) * ch_src;
-            }
-            /* first step is to copy over initial data */
-            arm_q7_to_q15_no_shift(win_start, buffer, ch_src);
-            count = 1;
-
-            /* start the average operation from the second part */
-            win_start += ch_src;
-            for (; win_start < win_stop; win_start += ch_src)
-            {
-                arm_nn_accumulate_q7_to_q15(buffer, win_start, ch_src);
-                count++;
-            }
-            buffer_scale_back_q15_to_q7(buffer, target, ch_src, count);
-        }
-    }
-
-
-    /* then does the pooling along y axis */
-    for (i_y = 0; i_y < dim_dst_height; i_y++)
-    {
-        /* for each output row */
-        q7_t     *target = dst + i_y * dim_dst_width * ch_src;
-        q7_t     *row_start;
-        q7_t     *row_end;
-        /* setting the starting row */
-        if (i_y * stride_height - padding_height < 0)
-        {
-            row_start = src;
-        } else
-        {
-            row_start = src + (i_y * stride_height - padding_height) * dim_src_width * ch_src;
-        }
-        /* setting the stopping row */
-        if (i_y * stride_height - padding_height + dim_kernel_height >= dim_src_height)
-        {
-            row_end = src + dim_src_height * dim_src_width * ch_src;
-        } else
-        {
-            row_end = src + (i_y * stride_height - padding_height + dim_kernel_height) * dim_src_width * ch_src;
-        }
-
-        /* copy over the first row */
-        arm_q7_to_q15_no_shift(row_start, buffer, dim_dst_width * ch_src);
-        count = 1;
-
-        /* move over to next row */
-        row_start += ch_src * dim_src_width;
-
-        for (; row_start < row_end; row_start += dim_src_width * ch_src)
-        {
-            arm_nn_accumulate_q7_to_q15(buffer, row_start, dim_dst_width * ch_src);
-
-            count++;
-        }
-        buffer_scale_back_q15_to_q7_and_clamp(buffer, target, dim_dst_width * ch_src, count,act_min,act_max);
-    }
-
-#else
-
 /* Reference C code adapted from CMSIS-NN arm_avepool_q7_HWC.
  */
     (void)bufferA;
@@ -384,22 +261,18 @@ arm_status arm_avgpool_s8(const int dim_src_height,
             }
         }
     }
-#endif
+
     return ARM_MATH_SUCCESS;
 }
 
-#endif /* ARM_MATH_HELIUM */
+#endif /* ARM_MATH_MVEI */
 
 int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width,
                                        const int ch_src)
 {
-#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
-    return (ch_src * dim_dst_width) * sizeof(int16_t);
-#else
     (void)dim_dst_width;
     (void)ch_src;
     return 0;
-#endif
 }
 /**
  * @} end of Pooling group