hace 6 años · 8366f81a3f
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -38,18 +38,30 @@
 
				    * performance and minimize the memory footprint of neural networks on Cortex-M processor cores.
			
 
				    *
			
 
				    * The library is divided into a number of functions each covering a specific category:
			
 
				-   * - Neural Network Convolution Functions
			
 
				-   * - Neural Network Activation Functions
			
 
				+   * - Convolution Functions
			
 
				+   * - Activation Functions
			
 
				    * - Fully-connected Layer Functions
			
 
				-   * - Neural Network Pooling Functions
			
 
				+   * - Pooling Functions
			
 
				    * - Softmax Functions
			
 
				-   * - Neural Network Support Functions
			
 
				+   * - Basic math Functions
			
 
				    *
			
 
				    * The library has separate functions for operating on different weight and activation data
			
 
				    * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the
			
 
				    * kernels are included in the function description. The implementation details are also
			
 
				    * described in this paper [1].
			
 
				    *
			
 
				+   * Function Classification
			
 
				+   * --------
			
 
				+   * The functions can be classified into two segments
			
 
				+   * - Legacy functions supporting ARM's internal symmetric quantization(8 bits).
			
 
				+   * - Functions that support TensorFlow Lite framework with symmetric quantization(8 bits).
			
 
				+   *
			
 
				+   * The legacy functions can be identified with their suffix of _q7 or _q15 and are no new development is done there. The article in [2] describes in detail
			
 
				+   * how to run a network using the legacy functions.
			
 
				+   *
			
 
				+   * The functions supporting TensorFlow Lite framework is identified by the _s8 suffix and can be invoked from TFL micro. The functions are bit exact to
			
 
				+   * TensorFlow Lite. Refer to the TensorFlow's documentation in [3] on how to run a TensorFlow Lite model using optimized CMSIS-NN kernels.
			
 
				+   *
			
 
				    * Block Diagram
			
 
				    * --------
			
 
				    * \image html CMSIS-NN-OVERVIEW.PNG
			
@@ -62,15 +74,20 @@
 
				    * Pre-processor Macros
			
 
				    * ------------
			
 
				    *
			
 
				-   * Each library project have differant pre-processor macros.
			
 
				+   * Each library project have different pre-processor macros.
			
 
				    *
			
 
				    * - ARM_MATH_DSP:
			
 
				    *
			
 
				    * Define macro ARM_MATH_DSP, If the silicon supports DSP instructions.
			
 
				    *
			
 
				+   * - ARM_MATH_MVEI:
			
 
				+   *
			
 
				+   * Define macro ARM_MATH_MVEI, If the silicon supports M-Profile Vector Extension.
			
 
				+   *
			
 
				    * - ARM_MATH_BIG_ENDIAN:
			
 
				    *
			
 
				-   * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
			
 
				+   * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. This is supported only for the legacy functions i.e, functions targetted at
			
 
				+   * TensorFlow Lite do not support big endianness. By default library builds for little endian targets.
			
 
				    *
			
 
				    * - ARM_NN_TRUNCATE:
			
 
				    *
			
@@ -79,14 +96,20 @@
 
				    * Copyright Notice
			
 
				    * ------------
			
 
				    *
			
 
				-   * Copyright (C) 2010-2018 Arm Limited. All rights reserved.
			
 
				+   * Copyright (C) 2010-2019 Arm Limited. All rights reserved.
			
 
				    *
			
 
				    * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601
			
 
				+   *
			
 
				+   * [2] Converting a Neural Network for Arm Cortex-M with CMSIS-NN
			
 
				+   *     https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides/converting-a-neural-network-for-arm-cortex-m-with-cmsis-nn/single-page
			
 
				+   * [3] https://www.tensorflow.org/lite/microcontrollers/library
			
 
				+   *
			
 
				    */
			
 
				 
			
 
				 /**
			
 
				  * @defgroup groupNN Neural Network Functions
			
 
				- * These functions perform basic operations for neural network layers.
			
 
				+ * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support
			
 
				+ * TensorFlow Lite framework.
			
 
				  */
			
 
				 
			
 
				 #ifndef _ARM_NNFUNCTIONS_H
			
@@ -105,9 +128,9 @@ extern    "C"
 
				 #endif
			
 
				 
			
 
				 /**
			
 
				- * @defgroup NNConv Neural Network Convolution Functions
			
 
				+ * @defgroup NNConv Convolution Functions
			
 
				  *
			
 
				- * Perform convolution layer
			
 
				+ * Collection of convolution, depthwise convolution functions and their variants.
			
 
				  *
			
 
				  * The convolution is implemented in 2 steps: im2col and GEMM
			
 
				  *
			
@@ -920,7 +943,7 @@ int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const uint16_t input_ch,
 
				 /**
			
 
				  * @defgroup FC Fully-connected Layer Functions
			
 
				  *
			
 
				- * Perform fully-connected layer
			
 
				+ * Collection of fully-connected and matrix multiplication functions.
			
 
				  *
			
 
				  * Fully-connected layer is basically a matrix-vector multiplication
			
 
				  * with bias. The matrix is the weights and the input/output vectors
			
@@ -1279,7 +1302,7 @@ extern    "C"
 
				 /**
			
 
				  * @defgroup BasicMath Basic math functions
			
 
				  *
			
 
				- * Perform element wise add and multiplication operations.
			
 
				+ * Element wise add and multiplication functions.
			
 
				  *
			
 
				  */
			
 
				 
			
@@ -1349,7 +1372,7 @@ extern    "C"
 
				                                     const int32_t out_activation_max,
			
 
				                                     const uint32_t block_size);
			
 
				 /**
			
 
				- * @defgroup Acti Neural Network Activation Functions
			
 
				+ * @defgroup Acti Activation Functions
			
 
				  *
			
 
				  * Perform activation layers, including ReLU (Rectified Linear Unit),
			
 
				  * sigmoid and tanh
			
@@ -1415,7 +1438,7 @@ extern    "C"
 
				                                             arm_nn_activation_type type);
			
 
				 
			
 
				 /**
			
 
				- * @defgroup Pooling Neural Network Pooling Functions
			
 
				+ * @defgroup Pooling Pooling Functions
			
 
				  *
			
 
				  * Perform pooling functions, including max pooling and average pooling
			
 
				  *
			
@@ -1612,7 +1635,7 @@ extern    "C"
 
				 /**
			
 
				  * @defgroup Softmax Softmax Functions
			
 
				  *
			
 
				- * EXP(2) based softmax function
			
 
				+ * EXP(2) based softmax functions.
			
 
				  *
			
 
				  */
			
 
				 
			
@@ -1787,6 +1810,11 @@ void arm_softmax_u8(const uint8_t *input,
 
				                         int8_t *output,
			
 
				                         const uint32_t total_size);
			
 
				 
			
 
				+/**
			
 
				+ * @defgroup Concatenation Concatenation Functions
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				   /**
			
 
				    * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
			
 
				    *        This function should be called for each input tensor to concatenate. The argument offset_x
			
@@ -1817,7 +1845,6 @@ void arm_softmax_u8(const uint8_t *input,
 
				    * @param[in]  output_x Width of output tensor
			
 
				    * @param[in]  offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
			
 
				    *                      It is user responsibility to provide the correct value
			
 
				-   * @return     None
			
 
				    *
			
 
				    * <b> Input constraints</b>
			
 
				    * offset_x is less than output_x
			
@@ -1862,7 +1889,6 @@ void arm_softmax_u8(const uint8_t *input,
 
				    * @param[in]  output_y Height of output tensor
			
 
				    * @param[in]  offset_y The offset on the Y axis to start concatenating the input tensor
			
 
				    *                      It is user responsibility to provide the correct value
			
 
				-   * @return     None
			
 
				    *
			
 
				    * <b> Input constraints</b>
			
 
				    * offset_y is less than output_y
			
@@ -1907,7 +1933,6 @@ void arm_softmax_u8(const uint8_t *input,
 
				    * @param[in]  output_z Channels in output tensor
			
 
				    * @param[in]  offset_z The offset on the Z axis to start concatenating the input tensor
			
 
				    *                      It is user responsibility to provide the correct value
			
 
				-   * @return     None
			
 
				    *
			
 
				    * <b> Input constraints</b>
			
 
				    * offset_z is less than output_z
			
@@ -1951,7 +1976,6 @@ void arm_softmax_u8(const uint8_t *input,
 
				    * @param[out] output   Pointer to output tensor
			
 
				    * @param[in]  offset_w The offset on the W axis to start concatenating the input tensor
			
 
				    *                      It is user responsibility to provide the correct value
			
 
				-   * @return     None
			
 
				    *
			
 
				    */
			
 
				     void arm_concatenation_s8_w(const int8_t *input,
			
--- a/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/CMSIS/NN/Include/arm_nnsupportfunctions.h
@@ -179,7 +179,7 @@ void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_siz
 
				  * @param[in]    activation_max   maximum value to clamp the output to. Range : int8
			
 
				  * @param[in]    kernel_size   number of elements in one column.
			
 
				  * @param[in]    output_bias per output channel bias. Range : int32
			
 
				- * @param[out]   out_0       pointer to output
			
 
				+ * @param[out]   out         pointer to output
			
 
				  * @return     The function returns one of the two
			
 
				  *              1. The incremented output pointer for a successful operation or
			
 
				  *              2. NULL if implementation is not available.
			
--- a/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
+++ b/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
@@ -35,6 +35,16 @@
 
				 #include "arm_helium_utils.h"
			
 
				 #endif
			
 
				 
			
 
				+#if defined(ARM_MATH_MVEI)
			
 
				+#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT)               \
			
 
				+  __INPUT_V = arm_mve_sat_doubling_high_mult(__INPUT_V, __MULT); \
			
 
				+  __INPUT_V = arm_mve_divide_by_power_of_two(__INPUT_V, -__SHIFT);
			
 
				+#endif
			
 
				+
			
 
				+#define SAT_INPUT(__INPUT, __MULT, __SHIFT)                 \
			
 
				+  __INPUT = arm_nn_sat_doubling_high_mult(__INPUT, __MULT); \
			
 
				+  __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
			
 
				+
			
 
				 /**
			
 
				  *  @ingroup groupNN
			
 
				  */
			
@@ -53,15 +63,6 @@
 
				 
			
 
				 /* Note: __SHIFT is expected to be <=0 */
			
 
				 
			
 
				-#if defined(ARM_MATH_MVEI)
			
 
				-#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT)               \
			
 
				-  __INPUT_V = arm_sat_doubling_high_mult_mve(__INPUT_V, __MULT); \
			
 
				-  __INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT);
			
 
				-#endif
			
 
				-
			
 
				-#define SAT_INPUT(__INPUT, __MULT, __SHIFT)                 \
			
 
				-  __INPUT = arm_nn_sat_doubling_high_mult(__INPUT, __MULT); \
			
 
				-  __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
			
 
				 
			
 
				 arm_status
			
 
				 arm_elementwise_add_s8(const int8_t *input_1_vect,
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
@@ -30,6 +30,9 @@
 
				 
			
 
				 #include "arm_nnfunctions.h"
			
 
				 
			
 
				+#define DIM_KER_X (1U)
			
 
				+#define DIM_KER_Y (1U)
			
 
				+
			
 
				 /**
			
 
				  *  @ingroup groupNN
			
 
				  */
			
@@ -46,9 +49,6 @@
 
				    *
			
 
				    */
			
 
				 
			
 
				-#define DIM_KER_X (1U)
			
 
				-#define DIM_KER_Y (1U)
			
 
				-
			
 
				 arm_status arm_convolve_1x1_s8_fast(const q7_t *input,
			
 
				                                     const uint16_t input_x,
			
 
				                                     const uint16_t input_y,
			
--- a/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
+++ b/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
@@ -52,3 +52,7 @@ void arm_reshape_s8(const int8_t *input,
 
				 {
			
 
				     memcpy(output, input, total_size);
			
 
				 }
			
 
				+
			
 
				+/**
			
 
				+ * @} end of Reshape group
			
 
				+ */