6 years ago · 084ca21287
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
			
 
				+ * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -21,8 +21,8 @@
 
				  * Title:        arm_nnfunctions.h
			
 
				  * Description:  Public header file for CMSIS NN Library
			
 
				  *
			
 
				- * $Date:        25 November 2019
			
 
				- * $Revision:    V.1.0.0
			
 
				+ * $Date:        January 20, 2020
			
 
				+ * $Revision:    V.1.0.1
			
 
				  *
			
 
				  * Target Processor:  Cortex-M cores
			
 
				  * -------------------------------------------------------------------- */
			
@@ -518,7 +518,6 @@ extern    "C"
 
				    *   - Supported framework : TensorFlow Lite Micro
			
 
				    *   - The following constrains on the arguments apply
			
 
				    *      -# input_ch is a multiple of 4
			
 
				-   *      -# output_ch is a multiple of 2
			
 
				    *      -# padding equals 0
			
 
				    *      -# Stride equals 1
			
 
				    *      -# kernel dimension is 1x1 (Not provided in the argument list)
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
@@ -21,8 +21,8 @@
 
				  * Title:        arm_convolve_1x1_s8_fast.c
			
 
				  * Description:  Fast q7 version of 1x1 convolution (non-square shape)
			
 
				  *
			
 
				- * $Date:        January 15, 2020
			
 
				- * $Revision:    V.1.0.1
			
 
				+ * $Date:        January 20, 2020
			
 
				+ * $Revision:    V.1.0.2
			
 
				  *
			
 
				  * Target Processor:  Cortex-M cores
			
 
				  *
			
@@ -72,7 +72,7 @@ arm_status arm_convolve_1x1_s8_fast(const q7_t *input,
 
				                                     const uint16_t output_y,
			
 
				                                     q15_t *buffer_a)
			
 
				 {
			
 
				-    if (input_ch % 4 != 0 || output_ch % 2 != 0 ||
			
 
				+    if (input_ch % 4 != 0 ||
			
 
				         pad_x != 0 || pad_y != 0 ||
			
 
				         stride_x != 1 || stride_y != 1)
			
 
				     {
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
			
 
				+ * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -21,8 +21,8 @@
 
				  * Title:        arm_nn_mat_mult_kernel_s8_s16_reordered.c
			
 
				  * Description:  Matrix-multiplication function for convolution with reordered columns
			
 
				  *
			
 
				- * $Date:        August 2019
			
 
				- * $Revision:    V.1.0.0
			
 
				+ * $Date:        January 20, 2020
			
 
				+ * $Revision:    V.1.0.1
			
 
				  *
			
 
				  * Target Processor:  Cortex-M cores
			
 
				  * -------------------------------------------------------------------- */
			
@@ -134,6 +134,51 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
 
				         ip_a0 += num_col_a;
			
 
				         row_count--;
			
 
				     }
			
 
				+
			
 
				+    if (output_ch & 1)
			
 
				+    {
			
 
				+        /* setup pointers for B */
			
 
				+        const q15_t *ip_b0 = input_b;
			
 
				+        const q15_t *ip_b1 = ip_b0 + num_col_a;
			
 
				+
			
 
				+        /* Init accumulator with bias for channel N + 1 */
			
 
				+        q31_t ch_0_out_0 = *bias;
			
 
				+        q31_t ch_0_out_1 = ch_0_out_0;
			
 
				+
			
 
				+        int32_t col_count = num_col_a / 4;
			
 
				+        while (col_count)
			
 
				+        {
			
 
				+            q31_t a01, a02;
			
 
				+            q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
			
 
				+            q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
			
 
				+
			
 
				+            ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
			
 
				+
			
 
				+            ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0);
			
 
				+            ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1);
			
 
				+
			
 
				+            b0 = arm_nn_read_q15x2_ia(&ip_b0);
			
 
				+            b1 = arm_nn_read_q15x2_ia(&ip_b1);
			
 
				+
			
 
				+            ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0);
			
 
				+            ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1);
			
 
				+
			
 
				+            col_count--;
			
 
				+        } /* while over col_count */
			
 
				+
			
 
				+        ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
			
 
				+        ch_0_out_0 += out_offset;
			
 
				+        ch_0_out_0 = MAX(ch_0_out_0, activation_min);
			
 
				+        ch_0_out_0 = MIN(ch_0_out_0, activation_max);
			
 
				+        *out_0++ = (q7_t)ch_0_out_0;
			
 
				+
			
 
				+        ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
			
 
				+        ch_0_out_1 += out_offset;
			
 
				+        ch_0_out_1 = MAX(ch_0_out_1, activation_min);
			
 
				+        ch_0_out_1 = MIN(ch_0_out_1, activation_max);
			
 
				+        *out_1++ = (q7_t)ch_0_out_1;
			
 
				+    }
			
 
				+
			
 
				     out_0 += output_ch;
			
 
				 
			
 
				     /* return the new output pointer with offset */