преди 6 години · ffd84e819b
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
 
				  * Title:        arm_nnfunctions.h
			
 
				  * Description:  Public header file for CMSIS NN Library
			
 
				  *
			
 
				- * $Date:        February 27, 2020
			
 
				- * $Revision:    V.1.0.4
			
 
				+ * $Date:        March 4, 2020
			
 
				+ * $Revision:    V.1.1.4
			
 
				  *
			
 
				  * Target Processor:  Cortex-M cores
			
 
				  * -------------------------------------------------------------------- */
			
@@ -979,7 +979,7 @@ extern    "C"
 
				                                        const int32_t dilation_x,
			
 
				                                        const int32_t dilation_y,
			
 
				                                        int16_t *buffer_a);
			
 
				-    
			
 
				+
			
 
				 /**
			
 
				    * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel
			
 
				    * @param[in]       input      pointer to input tensor. Range: int8, format: [H,W,in_ch]
			
@@ -1640,10 +1640,7 @@ extern    "C"
 
				    *                                     <code>ARM_MATH_SUCCESS</code> - Successful operation
			
 
				    *                                     <code>ARM_MATH_ARGUMENT_ERROR</code> - Implementation not available
			
 
				    *
			
 
				-   * @note This pooling function is input-destructive. Input data is undefined after calling this function.
			
 
				-   *
			
 
				    * @details
			
 
				-   *    - The pooling function is implemented as split x-pooling then y-pooling.
			
 
				    *    - Supported Framework: TensorFlow Lite
			
 
				    *
			
 
				    */
			
--- a/CMSIS/NN/README.md
+++ b/CMSIS/NN/README.md
@@ -29,7 +29,7 @@ Group | API | Base Operator | Input Constraints | Additional memory required for
 
				 |[Fully Connected](https://arm-software.github.io/CMSIS_5/NN/html/group__FC.html)||||| |  | |
			
 
				 || arm_fully_connected_s8() |FULLY CONNECTED & <br/> MAT MUL  | None | 0 | Yes | Yes | |
			
 
				 |[Pooling](https://arm-software.github.io/CMSIS_5/NN/html/group__Pooling.html)||||| |  ||
			
 
				-|| arm_avgpool_s8() | AVERAGE POOL | None | input_ch * output_x * 2 | Yes| Yes| Best case case is when channels are multiple of 4 or <br/> at the least >= 4 |
			
 
				+|| arm_avgpool_s8() | AVERAGE POOL | None | None | No| Yes| Best case case is when channels are multiple of 4 or <br/> at the least >= 4 |
			
 
				 || arm_maxpool_s8() | MAX POOL | None | None | No| No|  |
			
 
				 || arm_maxpool_s8_opt() | MAX POOL | None | input_ch * output_x * 2 | Yes|Yes| Best case case is when channels are multiple of 4 or <br/> at the least >= 4 |
			
 
				 |[Softmax](https://arm-software.github.io/CMSIS_5/NN/html/group__Softmax.html)||||| |  ||
			
--- a/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
+++ b/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
@@ -21,47 +21,16 @@
 
				  * Title:        arm_avgpool_s8.c
			
 
				  * Description:  Pooling function implementations
			
 
				  *
			
 
				- * $Date:        February 27, 2020
			
 
				- * $Revision:    V.1.0.1
			
 
				+ * $Date:        March 4,2020
			
 
				+ * $Revision:    V.1.1.1
			
 
				  *
			
 
				- * Target Processor:  Cortex-M and Cortex-A cores
			
 
				+ * Target Processor:  Cortex-M CPUs
			
 
				  *
			
 
				  * -------------------------------------------------------------------- */
			
 
				 
			
 
				 #include "arm_math.h"
			
 
				 #include "arm_nnfunctions.h"
			
 
				 
			
 
				-
			
 
				-#if defined (ARM_MATH_DSP) && !defined (ARM_MATH_MVEI)
			
 
				-
			
 
				-static void buffer_scale_back_q15_to_q7(q15_t * buffer, q7_t * target, uint16_t length, uint16_t scale)
			
 
				-{
			
 
				-    int       i;
			
 
				-
			
 
				-    for (i = 0; i < length; i++)
			
 
				-    {
			
 
				-
			
 
				-        target[i] = (q7_t) (buffer[i] / scale);
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-static void buffer_scale_back_q15_to_q7_and_clamp(q15_t * buffer, q7_t * target, uint16_t length, uint16_t count,const int act_min,
			
 
				-  const int act_max)
			
 
				-{
			
 
				-    int       i;
			
 
				-    int sum;
			
 
				-
			
 
				-    for (i = 0; i < length; i++)
			
 
				-    {
			
 
				-        sum = buffer[i] > 0 ? (buffer[i] + count / 2) / count : (buffer[i] - count / 2) / count;
			
 
				-
			
 
				-        sum = MAX(sum, act_min);
			
 
				-        sum = MIN(sum, act_max);
			
 
				-
			
 
				-        target[i] = (q7_t) (sum);
			
 
				-    }
			
 
				-}
			
 
				-#endif
			
 
				 /**
			
 
				  *  @ingroup groupNN
			
 
				  */
			
@@ -78,8 +47,7 @@ static void buffer_scale_back_q15_to_q7_and_clamp(q15_t * buffer, q7_t * target,
 
				    *
			
 
				    */
			
 
				 
			
 
				-#if defined (ARM_MATH_MVEI)
			
 
				-
			
 
				+#if defined(ARM_MATH_MVEI)
			
 
				 
			
 
				 arm_status arm_avgpool_s8(const int dim_src_height,
			
 
				                           const int dim_src_width,
			
@@ -260,97 +228,6 @@ arm_status arm_avgpool_s8(const int dim_src_height,
 
				                           int8_t *dst)
			
 
				 {
			
 
				 
			
 
				-#if defined (ARM_MATH_DSP)
			
 
				-
			
 
				-    /* Run the following code for Cortex-M4 and Cortex-M7 */
			
 
				-
			
 
				-    q15_t    *buffer = (q15_t *) bufferA;
			
 
				-    int16_t   i_x, i_y;
			
 
				-    int16_t   count = 0;
			
 
				-
			
 
				-    /* first does the pooling along x axis */
			
 
				-    for (i_y = 0; i_y < dim_src_height; i_y++)
			
 
				-    {
			
 
				-
			
 
				-        for (i_x = 0; i_x < dim_dst_width; i_x++)
			
 
				-        {
			
 
				-            /* for each output sample */
			
 
				-            q7_t     *target = src + (i_y * dim_src_width + i_x) * ch_src;
			
 
				-            q7_t     *win_start;
			
 
				-            q7_t     *win_stop;
			
 
				-            if (i_x * stride_width - padding_width < 0)
			
 
				-            {
			
 
				-                win_start = target;
			
 
				-            } else
			
 
				-            {
			
 
				-                win_start = src + (i_y * dim_src_width + i_x * stride_width - padding_width) * ch_src;
			
 
				-            }
			
 
				-
			
 
				-            if (i_x * stride_width - padding_width + dim_kernel_width >= dim_src_width)
			
 
				-            {
			
 
				-                win_stop = src + (i_y * dim_src_width + dim_src_width) * ch_src;
			
 
				-            } else
			
 
				-            {
			
 
				-                win_stop = src + (i_y * dim_src_width + i_x * stride_width - padding_width + dim_kernel_width) * ch_src;
			
 
				-            }
			
 
				-            /* first step is to copy over initial data */
			
 
				-            arm_q7_to_q15_no_shift(win_start, buffer, ch_src);
			
 
				-            count = 1;
			
 
				-
			
 
				-            /* start the average operation from the second part */
			
 
				-            win_start += ch_src;
			
 
				-            for (; win_start < win_stop; win_start += ch_src)
			
 
				-            {
			
 
				-                arm_nn_accumulate_q7_to_q15(buffer, win_start, ch_src);
			
 
				-                count++;
			
 
				-            }
			
 
				-            buffer_scale_back_q15_to_q7(buffer, target, ch_src, count);
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-    /* then does the pooling along y axis */
			
 
				-    for (i_y = 0; i_y < dim_dst_height; i_y++)
			
 
				-    {
			
 
				-        /* for each output row */
			
 
				-        q7_t     *target = dst + i_y * dim_dst_width * ch_src;
			
 
				-        q7_t     *row_start;
			
 
				-        q7_t     *row_end;
			
 
				-        /* setting the starting row */
			
 
				-        if (i_y * stride_height - padding_height < 0)
			
 
				-        {
			
 
				-            row_start = src;
			
 
				-        } else
			
 
				-        {
			
 
				-            row_start = src + (i_y * stride_height - padding_height) * dim_src_width * ch_src;
			
 
				-        }
			
 
				-        /* setting the stopping row */
			
 
				-        if (i_y * stride_height - padding_height + dim_kernel_height >= dim_src_height)
			
 
				-        {
			
 
				-            row_end = src + dim_src_height * dim_src_width * ch_src;
			
 
				-        } else
			
 
				-        {
			
 
				-            row_end = src + (i_y * stride_height - padding_height + dim_kernel_height) * dim_src_width * ch_src;
			
 
				-        }
			
 
				-
			
 
				-        /* copy over the first row */
			
 
				-        arm_q7_to_q15_no_shift(row_start, buffer, dim_dst_width * ch_src);
			
 
				-        count = 1;
			
 
				-
			
 
				-        /* move over to next row */
			
 
				-        row_start += ch_src * dim_src_width;
			
 
				-
			
 
				-        for (; row_start < row_end; row_start += dim_src_width * ch_src)
			
 
				-        {
			
 
				-            arm_nn_accumulate_q7_to_q15(buffer, row_start, dim_dst_width * ch_src);
			
 
				-
			
 
				-            count++;
			
 
				-        }
			
 
				-        buffer_scale_back_q15_to_q7_and_clamp(buffer, target, dim_dst_width * ch_src, count,act_min,act_max);
			
 
				-    }
			
 
				-
			
 
				-#else
			
 
				-
			
 
				 /* Reference C code adapted from CMSIS-NN arm_avepool_q7_HWC.
			
 
				  */
			
 
				     (void)bufferA;
			
@@ -384,22 +261,18 @@ arm_status arm_avgpool_s8(const int dim_src_height,
 
				             }
			
 
				         }
			
 
				     }
			
 
				-#endif
			
 
				+
			
 
				     return ARM_MATH_SUCCESS;
			
 
				 }
			
 
				 
			
 
				-#endif /* ARM_MATH_HELIUM */
			
 
				+#endif /* ARM_MATH_MVEI */
			
 
				 
			
 
				 int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width,
			
 
				                                        const int ch_src)
			
 
				 {
			
 
				-#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
			
 
				-    return (ch_src * dim_dst_width) * sizeof(int16_t);
			
 
				-#else
			
 
				     (void)dim_dst_width;
			
 
				     (void)ch_src;
			
 
				     return 0;
			
 
				-#endif
			
 
				 }
			
 
				 /**
			
 
				  * @} end of Pooling group