Explorar el Código

CMSIS-NN: Documentation update

1. ARM_MATH_MVEI is explained.
2. Link to TensorFlow is provided
3. Doxygen warning fixes

Change-Id: I09a796b606a27c7b0c8cf66c9fc38c907548fed7
Felix Johnny hace 6 años
padre
commit
8366f81a3f

+ 43 - 19
CMSIS/NN/Include/arm_nnfunctions.h

@@ -38,18 +38,30 @@
    * performance and minimize the memory footprint of neural networks on Cortex-M processor cores.
    *
    * The library is divided into a number of functions each covering a specific category:
-   * - Neural Network Convolution Functions
-   * - Neural Network Activation Functions
+   * - Convolution Functions
+   * - Activation Functions
    * - Fully-connected Layer Functions
-   * - Neural Network Pooling Functions
+   * - Pooling Functions
    * - Softmax Functions
-   * - Neural Network Support Functions
+   * - Basic math Functions
    *
    * The library has separate functions for operating on different weight and activation data
    * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the
    * kernels are included in the function description. The implementation details are also
    * described in this paper [1].
    *
+   * Function Classification
+   * --------
+   * The functions can be classified into two segments
+   * - Legacy functions supporting ARM's internal symmetric quantization(8 bits).
+   * - Functions that support TensorFlow Lite framework with symmetric quantization(8 bits).
+   *
+   * The legacy functions can be identified with their suffix of _q7 or _q15 and are no new development is done there. The article in [2] describes in detail
+   * how to run a network using the legacy functions.
+   *
+   * The functions supporting TensorFlow Lite framework is identified by the _s8 suffix and can be invoked from TFL micro. The functions are bit exact to
+   * TensorFlow Lite. Refer to the TensorFlow's documentation in [3] on how to run a TensorFlow Lite model using optimized CMSIS-NN kernels.
+   *
    * Block Diagram
    * --------
    * \image html CMSIS-NN-OVERVIEW.PNG
@@ -62,15 +74,20 @@
    * Pre-processor Macros
    * ------------
    *
-   * Each library project have differant pre-processor macros.
+   * Each library project have different pre-processor macros.
    *
    * - ARM_MATH_DSP:
    *
    * Define macro ARM_MATH_DSP, If the silicon supports DSP instructions.
    *
+   * - ARM_MATH_MVEI:
+   *
+   * Define macro ARM_MATH_MVEI, If the silicon supports M-Profile Vector Extension.
+   *
    * - ARM_MATH_BIG_ENDIAN:
    *
-   * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
+   * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. This is supported only for the legacy functions i.e, functions targetted at
+   * TensorFlow Lite do not support big endianness. By default library builds for little endian targets.
    *
    * - ARM_NN_TRUNCATE:
    *
@@ -79,14 +96,20 @@
    * Copyright Notice
    * ------------
    *
-   * Copyright (C) 2010-2018 Arm Limited. All rights reserved.
+   * Copyright (C) 2010-2019 Arm Limited. All rights reserved.
    *
    * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601
+   *
+   * [2] Converting a Neural Network for Arm Cortex-M with CMSIS-NN
+   *     https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides/converting-a-neural-network-for-arm-cortex-m-with-cmsis-nn/single-page
+   * [3] https://www.tensorflow.org/lite/microcontrollers/library
+   *
    */
 
 /**
  * @defgroup groupNN Neural Network Functions
- * These functions perform basic operations for neural network layers.
+ * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support
+ * TensorFlow Lite framework.
  */
 
 #ifndef _ARM_NNFUNCTIONS_H
@@ -105,9 +128,9 @@ extern    "C"
 #endif
 
 /**
- * @defgroup NNConv Neural Network Convolution Functions
+ * @defgroup NNConv Convolution Functions
  *
- * Perform convolution layer
+ * Collection of convolution, depthwise convolution functions and their variants.
  *
  * The convolution is implemented in 2 steps: im2col and GEMM
  *
@@ -920,7 +943,7 @@ int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const uint16_t input_ch,
 /**
  * @defgroup FC Fully-connected Layer Functions
  *
- * Perform fully-connected layer
+ * Collection of fully-connected and matrix multiplication functions.
  *
  * Fully-connected layer is basically a matrix-vector multiplication
  * with bias. The matrix is the weights and the input/output vectors
@@ -1279,7 +1302,7 @@ extern    "C"
 /**
  * @defgroup BasicMath Basic math functions
  *
- * Perform element wise add and multiplication operations.
+ * Element wise add and multiplication functions.
  *
  */
 
@@ -1349,7 +1372,7 @@ extern    "C"
                                     const int32_t out_activation_max,
                                     const uint32_t block_size);
 /**
- * @defgroup Acti Neural Network Activation Functions
+ * @defgroup Acti Activation Functions
  *
  * Perform activation layers, including ReLU (Rectified Linear Unit),
  * sigmoid and tanh
@@ -1415,7 +1438,7 @@ extern    "C"
                                             arm_nn_activation_type type);
 
 /**
- * @defgroup Pooling Neural Network Pooling Functions
+ * @defgroup Pooling Pooling Functions
  *
  * Perform pooling functions, including max pooling and average pooling
  *
@@ -1612,7 +1635,7 @@ extern    "C"
 /**
  * @defgroup Softmax Softmax Functions
  *
- * EXP(2) based softmax function
+ * EXP(2) based softmax functions.
  *
  */
 
@@ -1787,6 +1810,11 @@ void arm_softmax_u8(const uint8_t *input,
                         int8_t *output,
                         const uint32_t total_size);
 
+/**
+ * @defgroup Concatenation Concatenation Functions
+ *
+ */
+
   /**
    * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
    *        This function should be called for each input tensor to concatenate. The argument offset_x
@@ -1817,7 +1845,6 @@ void arm_softmax_u8(const uint8_t *input,
    * @param[in]  output_x Width of output tensor
    * @param[in]  offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
    *                      It is user responsibility to provide the correct value
-   * @return     None
    *
    * <b> Input constraints</b>
    * offset_x is less than output_x
@@ -1862,7 +1889,6 @@ void arm_softmax_u8(const uint8_t *input,
    * @param[in]  output_y Height of output tensor
    * @param[in]  offset_y The offset on the Y axis to start concatenating the input tensor
    *                      It is user responsibility to provide the correct value
-   * @return     None
    *
    * <b> Input constraints</b>
    * offset_y is less than output_y
@@ -1907,7 +1933,6 @@ void arm_softmax_u8(const uint8_t *input,
    * @param[in]  output_z Channels in output tensor
    * @param[in]  offset_z The offset on the Z axis to start concatenating the input tensor
    *                      It is user responsibility to provide the correct value
-   * @return     None
    *
    * <b> Input constraints</b>
    * offset_z is less than output_z
@@ -1951,7 +1976,6 @@ void arm_softmax_u8(const uint8_t *input,
    * @param[out] output   Pointer to output tensor
    * @param[in]  offset_w The offset on the W axis to start concatenating the input tensor
    *                      It is user responsibility to provide the correct value
-   * @return     None
    *
    */
     void arm_concatenation_s8_w(const int8_t *input,

+ 1 - 1
CMSIS/NN/Include/arm_nnsupportfunctions.h

@@ -179,7 +179,7 @@ void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_siz
  * @param[in]    activation_max   maximum value to clamp the output to. Range : int8
  * @param[in]    kernel_size   number of elements in one column.
  * @param[in]    output_bias per output channel bias. Range : int32
- * @param[out]   out_0       pointer to output
+ * @param[out]   out         pointer to output
  * @return     The function returns one of the two
  *              1. The incremented output pointer for a successful operation or
  *              2. NULL if implementation is not available.

+ 10 - 9
CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c

@@ -35,6 +35,16 @@
 #include "arm_helium_utils.h"
 #endif
 
+#if defined(ARM_MATH_MVEI)
+#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT)               \
+  __INPUT_V = arm_mve_sat_doubling_high_mult(__INPUT_V, __MULT); \
+  __INPUT_V = arm_mve_divide_by_power_of_two(__INPUT_V, -__SHIFT);
+#endif
+
+#define SAT_INPUT(__INPUT, __MULT, __SHIFT)                 \
+  __INPUT = arm_nn_sat_doubling_high_mult(__INPUT, __MULT); \
+  __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
+
 /**
  *  @ingroup groupNN
  */
@@ -53,15 +63,6 @@
 
 /* Note: __SHIFT is expected to be <=0 */
 
-#if defined(ARM_MATH_MVEI)
-#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT)               \
-  __INPUT_V = arm_sat_doubling_high_mult_mve(__INPUT_V, __MULT); \
-  __INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT);
-#endif
-
-#define SAT_INPUT(__INPUT, __MULT, __SHIFT)                 \
-  __INPUT = arm_nn_sat_doubling_high_mult(__INPUT, __MULT); \
-  __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
 
 arm_status
 arm_elementwise_add_s8(const int8_t *input_1_vect,

+ 3 - 3
CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c

@@ -30,6 +30,9 @@
 
 #include "arm_nnfunctions.h"
 
+#define DIM_KER_X (1U)
+#define DIM_KER_Y (1U)
+
 /**
  *  @ingroup groupNN
  */
@@ -46,9 +49,6 @@
    *
    */
 
-#define DIM_KER_X (1U)
-#define DIM_KER_Y (1U)
-
 arm_status arm_convolve_1x1_s8_fast(const q7_t *input,
                                     const uint16_t input_x,
                                     const uint16_t input_y,

+ 4 - 0
CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c

@@ -52,3 +52,7 @@ void arm_reshape_s8(const int8_t *input,
 {
     memcpy(output, input, total_size);
 }
+
+/**
+ * @} end of Reshape group
+ */