فهرست منبع

Add LSTM support (#9)

Non-SIMD implementation of LSTM as per
Keras version 2.9.0 API is added along
with unit tests.

Co-authored-by: Hannes Mogensen
Co-authored-by: Måns Nilsson <mans.nilsson@arm.com>
Co-authored-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Måns Nilsson 3 سال پیش
والد
کامیت
9c30d1949b
100فایلهای تغییر یافته به همراه1903 افزوده شده و 55 حذف شده
  1. 9 5
      ARM.CMSIS-NN.pdsc
  2. 6 4
      CMakeLists.txt
  3. 3 2
      Include/arm_nn_tables.h
  4. 115 2
      Include/arm_nn_types.h
  5. 93 17
      Include/arm_nnfunctions.h
  6. 168 2
      Include/arm_nnsupportfunctions.h
  7. 4 4
      Source/ActivationFunctions/CMakeLists.txt
  8. 119 0
      Source/ActivationFunctions/arm_nn_activation_s16.c
  9. 75 0
      Source/BasicMathFunctions/arm_elementwise_mul_s16_s8.c
  10. 11 16
      Source/CMakeLists.txt
  11. 20 0
      Source/LSTMFunctions/CMakeLists.txt
  12. 184 0
      Source/LSTMFunctions/arm_lstm_unidirectional_s8_s16.c
  13. 2 1
      Source/NNSupportFunctions/CMakeLists.txt
  14. 99 0
      Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s8_s16.c
  15. 156 0
      Source/NNSupportFunctions/arm_nn_lstm_step_s8_s16.c
  16. 67 0
      Source/NNSupportFunctions/arm_nn_lstm_update_cell_state_s16.c
  17. 88 0
      Source/NNSupportFunctions/arm_nn_lstm_update_output_s8_s16.c
  18. 79 0
      Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s8.c
  19. 24 2
      Source/NNSupportFunctions/arm_nntables.c
  20. 1 0
      Tests/UnitTest/CMakeLists.txt
  21. 2 0
      Tests/UnitTest/PregeneratedData/lstm_1/bias.txt
  22. 11 0
      Tests/UnitTest/PregeneratedData/lstm_1/input.txt
  23. 6 0
      Tests/UnitTest/PregeneratedData/lstm_1/kernel.txt
  24. 4 0
      Tests/UnitTest/PregeneratedData/lstm_1/kernel_hidden.txt
  25. 4 0
      Tests/UnitTest/PregeneratedData/lstm_1/kernel_recurrent.txt
  26. 2 0
      Tests/UnitTest/PregeneratedData/lstm_2/bias.txt
  27. 19 0
      Tests/UnitTest/PregeneratedData/lstm_2/input.txt
  28. 7 0
      Tests/UnitTest/PregeneratedData/lstm_2/kernel.txt
  29. 6 0
      Tests/UnitTest/PregeneratedData/lstm_2/kernel_hidden.txt
  30. 6 0
      Tests/UnitTest/PregeneratedData/lstm_2/kernel_recurrent.txt
  31. 2 0
      Tests/UnitTest/PregeneratedData/lstm_one_time_step/bias.txt
  32. 3 0
      Tests/UnitTest/PregeneratedData/lstm_one_time_step/input.txt
  33. 5 0
      Tests/UnitTest/PregeneratedData/lstm_one_time_step/kernel.txt
  34. 5 0
      Tests/UnitTest/PregeneratedData/lstm_one_time_step/kernel_hidden.txt
  35. 1 0
      Tests/UnitTest/README.md
  36. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/cell_gate_bias_data.h
  37. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/cell_norm_coeff_data.h
  38. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/cell_state_data.h
  39. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/cell_to_forget_data.h
  40. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/cell_to_input_data.h
  41. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/cell_to_output_data.h
  42. 33 0
      Tests/UnitTest/TestCases/TestData/lstm_1/config_data.h
  43. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/forget_gate_bias_data.h
  44. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/forget_norm_coeff_data.h
  45. 8 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_data.h
  46. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_gate_bias_data.h
  47. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_norm_coeff_data.h
  48. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_cell_eff_bias_data.h
  49. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_cell_w_data.h
  50. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_forget_eff_bias_data.h
  51. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_forget_w_data.h
  52. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_input_eff_bias_data.h
  53. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_input_w_data.h
  54. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_output_eff_bias_data.h
  55. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/input_to_output_w_data.h
  56. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/output_gate_bias_data.h
  57. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/output_norm_coeff_data.h
  58. 7 0
      Tests/UnitTest/TestCases/TestData/lstm_1/output_ref_data.h
  59. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/output_state_data.h
  60. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/projection_bias_data.h
  61. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/projection_weights_data.h
  62. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_cell_w_data.h
  63. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_forget_w_data.h
  64. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_input_w_data.h
  65. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_output_w_data.h
  66. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_cell_eff_bias_data.h
  67. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_forget_eff_bias_data.h
  68. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_input_eff_bias_data.h
  69. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_output_eff_bias_data.h
  70. 36 0
      Tests/UnitTest/TestCases/TestData/lstm_1/test_data.h
  71. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/cell_gate_bias_data.h
  72. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/cell_norm_coeff_data.h
  73. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/cell_state_data.h
  74. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/cell_to_forget_data.h
  75. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/cell_to_input_data.h
  76. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/cell_to_output_data.h
  77. 33 0
      Tests/UnitTest/TestCases/TestData/lstm_2/config_data.h
  78. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/forget_gate_bias_data.h
  79. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/forget_norm_coeff_data.h
  80. 11 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_data.h
  81. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_gate_bias_data.h
  82. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_norm_coeff_data.h
  83. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_cell_eff_bias_data.h
  84. 7 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_cell_w_data.h
  85. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_forget_eff_bias_data.h
  86. 7 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_forget_w_data.h
  87. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_input_eff_bias_data.h
  88. 8 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_input_w_data.h
  89. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_output_eff_bias_data.h
  90. 8 0
      Tests/UnitTest/TestCases/TestData/lstm_2/input_to_output_w_data.h
  91. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/output_gate_bias_data.h
  92. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/output_norm_coeff_data.h
  93. 11 0
      Tests/UnitTest/TestCases/TestData/lstm_2/output_ref_data.h
  94. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/output_state_data.h
  95. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/projection_bias_data.h
  96. 6 0
      Tests/UnitTest/TestCases/TestData/lstm_2/projection_weights_data.h
  97. 7 0
      Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_cell_w_data.h
  98. 7 0
      Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_forget_w_data.h
  99. 7 0
      Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_input_w_data.h
  100. 7 0
      Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_output_w_data.h

+ 9 - 5
ARM.CMSIS-NN.pdsc

@@ -32,6 +32,7 @@
         <file category="header" name="Include/arm_nnfunctions.h"/>
         <file category="header" name="Include/arm_nnsupportfunctions.h"/>
         <file category="header" name="Include/arm_nn_tables.h"/>
+        <file category="header" name="Include/arm_nn_math_types.h"/>
 
         <file category="source" name="Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c"/>
         <file category="source" name="Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c"/>
@@ -65,19 +66,16 @@
         <file category="source" name="Source/BasicMathFunctions/arm_elementwise_mul_s16.c"/>
         <file category="source" name="Source/BasicMathFunctions/arm_elementwise_add_s8.c"/>
         <file category="source" name="Source/BasicMathFunctions/arm_elementwise_add_s16.c"/>
+        <file category="source" name="Source/BasicMathFunctions/arm_elementwise_mul_s16_s8.c"/>
         <file category="source" name="Source/ActivationFunctions/arm_relu6_s8.c"/>
         <file category="source" name="Source/ActivationFunctions/arm_relu_q15.c"/>
         <file category="source" name="Source/ActivationFunctions/arm_relu_q7.c"/>
-        <!-- file category="source" name="Source/ActivationFunctions/arm_nn_activations_q15.c"/ -->
-        <!-- file category="source" name="Source/ActivationFunctions/arm_nn_activations_q7.c"/ -->
+        <file category="source" name="Source/ActivationFunctions/arm_nn_activation_s16.c"/>
         <file category="source" name="Source/ReshapeFunctions/arm_reshape_s8.c"/>
-        <!-- file category="source" name="Source/NNSupportFunctions/arm_nn_mult_q7.c"/ -->
-        <!-- file category="source" name="Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c"/ -->
         <file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c"/>
         <file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c"/>
         <file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c"/>
         <file category="source" name="Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c"/>
-        <!-- file category="source" name="Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c"/ -->
         <file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c"/>
         <file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c"/>
         <file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c"/>
@@ -86,8 +84,14 @@
         <file category="source" name="Source/NNSupportFunctions/arm_nntables.c"/>
         <file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c"/>
         <file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c"/>
+        <file category="source" name="Source/NNSupportFunctions/arm_nn_lstm_update_output_s8_s16.c"/>
+        <file category="source" name="Source/NNSupportFunctions/arm_nn_lstm_step_s8_s16.c"/>
+        <file category="source" name="Source/NNSupportFunctions/arm_nn_lstm_update_cell_state_s16.c"/>
+        <file category="source" name="Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s8_s16.c"/>
+        <file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s8.c"/>
         <file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s8.c"/>
         <file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s16.c"/>
+        <file category="source" name="Source/LSTMFunctions/arm_lstm_unidirectional_s8_s16.c"/>
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_s8.c"/>
         <file category="source" name="Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c"/>
         <file category="source" name="Source/SoftmaxFunctions/arm_softmax_s8_s16.c"/>

+ 6 - 4
CMakeLists.txt

@@ -26,8 +26,10 @@ if (${CMSIS_PATH} STREQUAL "</path/to/CMSIS>")
   message(FATAL_ERROR "CMSIS_PATH not set. Did you provide -DCMSIS_PATH=<path/to/CMSIS>?")
 endif()
 
-option(BUILD_CMSIS_NN_FUNCTIONS "Build CMSIS-NN Source." ON)
+add_library(cmsis-nn STATIC)
 
-if(BUILD_CMSIS_NN_FUNCTIONS)
-    add_subdirectory(Source)
-endif()
+target_compile_options(cmsis-nn PRIVATE -Ofast)
+
+target_include_directories(cmsis-nn PUBLIC "Include")
+
+add_subdirectory(Source)

+ 3 - 2
Include/arm_nn_tables.h

@@ -21,8 +21,8 @@
  * Title:        arm_nn_tables.h
  * Description:  Extern declaration for NN tables
  *
- * $Date:        30. September 2022
- * $Revision:    V.2.0.0
+ * $Date:        28 October 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -37,5 +37,6 @@
  *
  */
 
+extern const uint16_t sigmoid_table_uint16[256];
 
 #endif /*  ARM_NN_TABLES_H */

+ 115 - 2
Include/arm_nn_types.h

@@ -22,8 +22,8 @@
  * Description:  Public header file to contain the CMSIS-NN structs for the
  *               TensorFlowLite micro compliant functions
  *
- * $Date:        22. Februari 2022
- * $Revision:    V.2.1.0
+ * $Date:        4 November 2022
+ * $Revision:    V.2.2.0
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -33,6 +33,13 @@
 
 #include <stdint.h>
 
+/** Enum for specifying activation function types */
+typedef enum
+{
+    ARM_SIGMOID = 0, /**< Sigmoid activation function */
+    ARM_TANH = 1,    /**< Tanh activation function */
+} arm_nn_activation_type;
+
 /** CMSIS-NN object to contain the width and height of a tile */
 typedef struct
 {
@@ -57,6 +64,15 @@ typedef struct
     int32_t c; /**< Input channels */
 } cmsis_nn_dims;
 
+/** CMSIS-NN object to contain LSTM specific input parameters related to dimensions */
+typedef struct
+{
+    int32_t max_time;
+    int32_t num_inputs;
+    int32_t num_batches;
+    int32_t num_outputs;
+} cmsis_nn_lstm_dims;
+
 /** CMSIS-NN object for the per-channel quantization parameters */
 typedef struct
 {
@@ -134,4 +150,101 @@ typedef struct
     const int16_t *one_by_one_lut;
 } cmsis_nn_softmax_lut_s16;
 
+/** LSTM guard parameters */
+typedef struct
+{
+    int32_t input_variance;
+    int32_t forget_variance;
+    int32_t cell_variance;
+    int32_t output_variance;
+} cmsis_nn_lstm_guard_params;
+
+/** LSTM scratch buffer container */
+typedef struct
+{
+    int16_t *input_gate;
+    int16_t *forget_gate;
+    int16_t *cell_gate;
+    int16_t *output_gate;
+    int8_t *scratch;
+} cmsis_nn_lstm_context;
+
+/** Quantized clip value for cell and projection of LSTM input. Zero value means no clipping. */
+typedef struct
+{
+    int16_t cell;
+    int8_t projection;
+} cmsis_nn_lstm_clip_params;
+
+/** CMSIS-NN object for quantization parameters */
+typedef struct
+{
+    int32_t multiplier; /**< Multiplier value */
+    int32_t shift;      /**< Shift value */
+} cmsis_nn_scaling;
+
+/** CMSIS-NN norm layer coefficients */
+typedef struct
+{
+    int16_t *input_weight;
+    int16_t *forget_weight;
+    int16_t *cell_weight;
+    int16_t *output_weight;
+} cmsis_nn_layer_norm;
+
+/** Parameters for integer LSTM, as defined in TFLM */
+typedef struct
+{
+    int32_t time_major; /**< Nonzero (true) if first row of data is timestamps for input */
+    cmsis_nn_scaling input_to_input_scaling;
+    cmsis_nn_scaling input_to_forget_scaling;
+    cmsis_nn_scaling input_to_cell_scaling;
+    cmsis_nn_scaling input_to_output_scaling;
+    cmsis_nn_scaling recurrent_to_input_scaling;
+    cmsis_nn_scaling recurrent_to_forget_scaling;
+    cmsis_nn_scaling recurrent_to_cell_scaling;
+    cmsis_nn_scaling recurrent_to_output_scaling;
+    cmsis_nn_scaling cell_to_input_scaling;
+    cmsis_nn_scaling cell_to_forget_scaling;
+    cmsis_nn_scaling cell_to_output_scaling;
+    cmsis_nn_scaling projection_scaling;
+    cmsis_nn_scaling hidden_scaling;
+    cmsis_nn_scaling layer_norm_input_scaling;  /**< layer normalization for input layer */
+    cmsis_nn_scaling layer_norm_forget_scaling; /**< layer normalization for forget gate */
+    cmsis_nn_scaling layer_norm_cell_scaling;   /**< layer normalization for cell */
+    cmsis_nn_scaling layer_norm_output_scaling; /**< layer normalization for outpus layer */
+
+    int32_t cell_state_shift;
+    int32_t hidden_offset;
+    int32_t output_state_offset;
+
+    cmsis_nn_lstm_clip_params clip;
+    cmsis_nn_lstm_guard_params guard;
+    cmsis_nn_layer_norm layer_norm;
+
+    /* Effective bias is precalculated as bias + zero_point * weight.
+    Only applicable to when input/output are s8 and weights are s16 */
+    const int32_t *i2i_effective_bias; /**< input to input effective bias */
+    const int32_t *i2f_effective_bias; /**< input to forget gate effective bias */
+    const int32_t *i2c_effective_bias; /**< input to cell effective bias */
+    const int32_t *i2o_effective_bias; /**< input to output effective bias */
+
+    const int32_t *r2i_effective_bias; /**< recurrent gate to input effective bias */
+    const int32_t *r2f_effective_bias; /**< recurrent gate to forget gate effective bias */
+    const int32_t *r2c_effective_bias; /**< recurrent gate to cell effective bias */
+    const int32_t *r2o_effective_bias; /**< recurrent gate to output effective bias */
+
+    const int32_t *projection_effective_bias;
+
+    /* Not precalculated bias */
+    const int32_t *input_gate_bias;
+    const int32_t *forget_gate_bias;
+    const int32_t *cell_gate_bias;
+    const int32_t *output_gate_bias;
+
+    /* Activation min and max */
+    cmsis_nn_activation activation;
+
+} cmsis_nn_lstm_params;
+
 #endif // _ARM_NN_TYPES_H

+ 93 - 17
Include/arm_nnfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        26 October 2022
- * $Revision:    V.11.1.0
+ * $Date:        7 November 2022
+ * $Revision:    V.11.2.0
  *
  * Target Processor:  Arm Cortex-M Processors
  * -------------------------------------------------------------------- */
@@ -121,18 +121,6 @@
 extern "C" {
 #endif
 
-/**
- * @brief Struct for specifying activation function types
- *
- */
-typedef enum
-{
-    ARM_SIGMOID = 0,
-    /**< Sigmoid activation function */
-    ARM_TANH = 1,
-    /**< Tanh activation function */
-} arm_nn_activation_type;
-
 /**
  * @defgroup NNConv Convolution Functions
  *
@@ -1176,7 +1164,6 @@ arm_cmsis_nn_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
  * @param[in,out]   data        pointer to input
  * @param[in]       size        number of elements
  */
-
 void arm_relu_q7(int8_t *data, uint16_t size);
 
 /**
@@ -1184,7 +1171,6 @@ void arm_relu_q7(int8_t *data, uint16_t size);
  * @param[in,out]   data        pointer to input
  * @param[in]       size        number of elements
  */
-
 void arm_relu6_s8(int8_t *data, uint16_t size);
 
 /**
@@ -1192,9 +1178,33 @@ void arm_relu6_s8(int8_t *data, uint16_t size);
  * @param[in,out]   data        pointer to input
  * @param[in]       size        number of elements
  */
-
 void arm_relu_q15(int16_t *data, uint16_t size);
 
+/**
+ * @brief s16 neural network activation function using direct table look-up
+ * @param[in]       input        pointer to input data
+ * @param[out]      output      pointer to output
+ * @param[in]       size        number of elements
+ * @param[in]       left_shift  bit-width of the integer part, assume to be smaller than 3
+ * @param[in]       type        type of activation functions
+ *
+ * @details Supported framework: TensorFlow Lite for Microcontrollers.
+ * This activation function must be bit precise congruent with the corresponding TFLM tanh and sigmoid actication
+ * functions
+ */
+void arm_nn_activation_s16(const int16_t *input,
+                           int16_t *output,
+                           const uint16_t size,
+                           const uint16_t left_shift,
+                           const arm_nn_activation_type type);
+
+/**
+ * @defgroup Pooling Pooling Functions
+ *
+ * Perform max and average pooling operations
+ *
+ */
+
 /**
  * @brief s8 average pooling function.
  *
@@ -1754,6 +1764,72 @@ arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
                                           const cmsis_nn_dims *output_dims,
                                           int8_t *output_data);
 
+/**
+ * @defgroup LSTM LSTM Layer Functions
+ *
+ */
+
+/**
+ * @brief LSTM unidirectional function with 8 bit input and output and 16 bit gate output
+ * Peephole connections, projection, clipping, combined input/forget gate and layer normalization are not supported.
+ *
+ * @param[in]   scratch_buffers                 Struct containing scratch buffers
+ * @param[in]   input_data                      Pointer to input data
+ * @param[in]   lstm_dims                       LSTM input parameters related to dimensions
+ * @param[in]   input_to_input_weights          Input to input weights
+ * @param[in]   input_to_forget_weights         Input to forget weights
+ * @param[in]   input_to_cell_weights           Input to cell weights
+ * @param[in]   input_to_output_weights         Input to output weights
+ * @param[in]   recurrent_to_input_weights      Recurrent to input weights
+ * @param[in]   recurrent_to_forget_weights     Recurrent to forget weights
+ * @param[in]   recurrent_to_cell_weights       Recurrent to cell weights
+ * @param[in]   recurrent_to_output_weights     Recurrent to output weights
+ * @param[in]   cell_to_input_weights           Cell to input weights. Not used.
+ * @param[in]   cell_to_forget_weights          Cell to forget weights. Not used.
+ * @param[in]   cell_to_output_weights          Cell to output weights. Not used.
+ * @param[in]   projection_weights              Projection weights. Not used.
+ * @param[in]   lstm                            LSTM parameters. See struct declaration
+ * @param[in]   output_state                    Pointer to (recurrent) output state
+ * @param[in]   cell_state                      Pointer to cell state
+ * @param[in]   output_data                     Pointer to output state
+ *
+ * @note Following assumptions are done based on LSTM functionality as supported by
+ *       Keras version 2.9.0 at the time of development. As stated here,
+ *       https://github.com/tensorflow/community/blob/master/rfcs/20180920-unify-rnn-interface.md
+ *       Keras's LSTMCell is equivalent to TensorFlow's BasicLSTMCell,
+ *       which does not support peephole, clipping or projection.
+ *       Layer normalization and combined input/forget gate are not supported either.
+ *
+ *       1 Input to input weight can not be nullptr. Otherwise nullptr for combined input/forgat gate.
+ *       2 Cell weights are not used and should be nullptr. Otherwise needed for peephole connections.
+ *       3 Projection weight is not used and should be nullpr. Otherwise needed for projection.
+ *
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *
+ */
+arm_cmsis_nn_status arm_lstm_unidirectional_s16_s8(cmsis_nn_lstm_context *scratch_buffers,
+                                                   const int8_t *input_data,
+                                                   const cmsis_nn_lstm_dims *lstm_dims,
+                                                   const int8_t *input_to_input_weights,
+                                                   const int8_t *input_to_forget_weights,
+                                                   const int8_t *input_to_cell_weights,
+                                                   const int8_t *input_to_output_weights,
+                                                   const int8_t *recurrent_to_input_weights,
+                                                   const int8_t *recurrent_to_forget_weights,
+                                                   const int8_t *recurrent_to_cell_weights,
+                                                   const int8_t *recurrent_to_output_weights,
+                                                   const int16_t *cell_to_input_weights,
+                                                   const int16_t *cell_to_forget_weights,
+                                                   const int16_t *cell_to_output_weights,
+                                                   const int8_t *projection_weights,
+                                                   const cmsis_nn_lstm_params *lstm,
+                                                   int8_t *output_state,
+                                                   int16_t *cell_state,
+                                                   int8_t *output_data);
+
 #ifdef __cplusplus
 }
 #endif

+ 168 - 2
Include/arm_nnsupportfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        03 November 2022
- * $Revision:    V.13.0.0
+ * $Date:        07 November 2022
+ * $Revision:    V.13.1.0
  *
  * Target Processor:  Arm Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -1139,6 +1139,172 @@ __STATIC_FORCEINLINE void arm_nn_write_q15x2_ia(int16_t **dest_q15, int32_t src_
     *dest_q15 += 2;
 }
 
+// Support functions for LSTM
+/**
+ * @brief Update LSTM function for an iteration step
+ *
+ * param[in]    input                           Input data
+ * param[in]    input_to_input_weight           Input to input gate weights
+ * param[in]    input_to_forget_weight          Input to forget gate weights
+ * param[in]    input_to_cell_weight            Input to cell gate weights
+ * param[in]    input_to_output_weight          Input to output weights
+ * param[in]    recurrent_to_input_weight       Recurrent signal to input weights
+ * param[in]    recurrent_to_forget_weight      Recurrent signal to forget gate weights
+ * param[in]    recurrent_to_cell_weight        Recurrent signal to cell gate weighst
+ * param[in]    recurrent_to_output_weight      Recurrent signal to output weights
+ * param[in]    lstm                            LSTM parameters
+ * param[in]    n_batch                         Batch size
+ * param[in]    n_cell                          Cell size
+ * param[in]    n_input                         Input size
+ * param[in]    n_output                        Output size
+ * param[out]   output_state                    Output state
+ * param[out]   cell_state                      Internal state
+ * param[out]   output                          Output signal
+ * param[in] *scratch_buffers                   Struct containing scratch buffers
+ */
+arm_cmsis_nn_status arm_nn_lstm_step_s8_s16(const int8_t *input,
+                                            const int8_t *input_to_input_weight,
+                                            const int8_t *input_to_forget_weight,
+                                            const int8_t *input_to_cell_weight,
+                                            const int8_t *input_to_output_weight,
+                                            const int8_t *recurrent_to_input_weight,
+                                            const int8_t *recurrent_to_forget_weight,
+                                            const int8_t *recurrent_to_cell_weight,
+                                            const int8_t *recurrent_to_output_weight,
+                                            const cmsis_nn_lstm_params *lstm,
+                                            const int n_batch,
+                                            const int n_cell,
+                                            const int n_input,
+                                            const int n_output,
+                                            int8_t *output_state,
+                                            int16_t *cell_state,
+                                            int8_t *output,
+                                            cmsis_nn_lstm_context *scratch_buffers);
+
+/**
+ * @brief         Updates a LSTM gate for an iteration step of LSTM function, int8x8_16 version.
+ *
+ * param[in]    input                           Input data
+ * param[in]    input_to_gate_weights           Input to gate weights
+ * param[in]    input_to_gate_bias              Input to gate weights
+ * param[in]    input_to_gate_scaling           Input to gate scaling
+ * param[in]    activation                      Actival min and max values
+ * param[in]    output_state                    Output state
+ * param[in]    recurrent_to_gate_weights       Recurrent to gate weights
+ * param[in]    recurrent_to_gate_bias          Recurrent to gate bias
+ * param[in]    recurrent_to_gate_scaling       Recurrent to gate scaling
+ * param[in]    n_batch                         Batch size
+ * param[in]    n_input                         Input size
+ * param[out]   n_output                        Output size
+ * param[in]    activation_type                 Activation type (sigmoid or tanh)
+ * param[out]   n_cell                          Cell size
+ */
+void arm_nn_lstm_calculate_gate_s8_s16(const int8_t *input,
+                                       const int8_t *input_to_gate_weights,
+                                       const int32_t *input_to_gate_bias,
+                                       const cmsis_nn_scaling input_to_gate_scaling,
+                                       const int8_t *output_state,
+                                       const int8_t *recurrent_to_gate_weights,
+                                       const int32_t *recurrent_to_gate_bias,
+                                       const cmsis_nn_scaling recurrent_to_gate_scaling,
+                                       const int32_t n_batch,
+                                       const int32_t n_input,
+                                       const int32_t n_output,
+                                       const int32_t n_cell,
+                                       const arm_nn_activation_type activation_type,
+                                       int16_t *gate);
+
+/**
+ * @brief       Update cell state for a single LSTM iteration step, int8x8_16 version.
+ * @param[in]   n_block             total number of cells for all batches
+ * @param[in]   cell_state_scale    Scaling factor of cell state
+ * @param[in]   cell_state          Input/output vector, size n_batch*n_cell
+ * @param[in]   input_gate          Input vector of size n_block
+ * @param[in]   forget_gate         Input/scratch vector of size n_block, always modified
+ * @param[in]   cell_gate           Input vector of size, n_block
+ */
+void arm_nn_lstm_update_cell_state_s16(const int32_t n_block,
+                                       const int32_t cell_state_scale,
+                                       int16_t *cell_state,
+                                       const int16_t *input_gate,
+                                       const int16_t *forget_gate,
+                                       const int16_t *cell_gate);
+
+/**
+ * @brief       Calculate the output state tensor of an LSTM step, s8 input/output and s16 weight version.
+ *
+ * @param[in]       n_batch                     The number of distinct vectors in each array
+ * @param[in]       n_cell                      Number of cells
+ * @param[in]       n_output                    Number of outputs
+ * @param[in,out]   cell_state                  Cell state, size n_batch*n_cell
+ * @param[in]       cell_state_scale            Scaling of cell_state
+ * @param[in]       output_gate                 Output gate
+ * @param[in]       hidden_scale                Effective scaling of cell_state .* output_gate
+ * @param[in]       hidden_offset               Zero point for cell_state .* output_gate
+ * @param[out]      output_state                Output state
+ * @param[in]       scratch0                    Scratch buffer
+ * @param[in]       scratch1                    Scratch buffer
+ */
+void arm_nn_lstm_update_output_s8_s16(const int n_batch,
+                                      const int n_cell,
+                                      const int n_output,
+                                      int16_t *cell_state,
+                                      const int32_t cell_state_scale,
+                                      const int16_t *output_gate,
+                                      const cmsis_nn_scaling hidden_scale,
+                                      const int32_t hidden_offset,
+                                      int8_t *output_state,
+                                      int16_t *scratch0,
+                                      int8_t *scratch1);
+
+/**
+ * @brief The result of the multiplication is accumulated to the passed result buffer.
+ * Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch dimension composed by input vectors independent
+ * from each other).
+ *
+ * @param[in]   lhs          Batched vector
+ * @param[in]   rhs          Weights input
+ * @param[in]   bias         Bias vector
+ * @param[out]  dst          Output
+ * @param[in]   dst_offset   Output offset
+ * @param[in]   multiplier   Multiplier for quantization
+ * @param[in]   shift        Shift for quantization
+ * @param[in]   rhs_cols     Input size (for each batch)
+ * @param[in]   rhs_rows     Output size (for each batch)
+ * @param[in]   batch        Batch size
+ */
+void arm_nn_vec_mat_mul_result_acc_s8(const int8_t *lhs,
+                                      const int8_t *rhs,
+                                      const int32_t *bias,
+                                      int16_t *dst,
+                                      const int32_t dst_offset,
+                                      const int32_t multiplier,
+                                      const int32_t shift,
+                                      const int32_t rhs_cols,
+                                      const int32_t rhs_rows,
+                                      const int32_t batch);
+
+/**
+ * @brief s16 elementwise multiplication with s8 output
+ * @param[in]       input_1_vect        pointer to input vector 1
+ * @param[in]       input_2_vect        pointer to input vector 2
+ * @param[in,out]   output              pointer to output vector
+ * @param[in]       out_offset          output offset
+ * @param[in]       out_mult            output multiplier
+ * @param[in]       out_shift           output shift
+ * @param[in]       block_size          number of samples
+ * @return          The function returns ARM_CMSIS_NN_SUCCESS
+ *
+ * @details   Supported framework: TensorFlow Lite micro
+ */
+arm_cmsis_nn_status arm_elementwise_mul_s16_s8(const int16_t *input_1_vect,
+                                               const int16_t *input_2_vect,
+                                               int8_t *output,
+                                               const int32_t out_offset,
+                                               const int32_t out_mult,
+                                               const int32_t out_shift,
+                                               const int32_t block_size);
+
 #ifdef __cplusplus
 }
 #endif

+ 4 - 4
Source/ActivationFunctions/CMakeLists.txt

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2021 Arm Limited.
+# SPDX-FileCopyrightText: Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -15,6 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-file(GLOB SRC "./*_s8.c")
-target_sources(cmsis-nn PRIVATE ${SRC})
+file(GLOB SRC "./*_s8*.c")
+file(GLOB SRC_S16 "./*_s16*.c")
+target_sources(cmsis-nn PRIVATE ${SRC} ${SRC_S16})

+ 119 - 0
Source/ActivationFunctions/arm_nn_activation_s16.c

@@ -0,0 +1,119 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_activations_q15.c
+ * Description:  Q15 neural network activation function using direct table look-up
+ *
+ * $Date:        8 September 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nn_tables.h"
+#include "arm_nnfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup Acti
+ * @{
+ */
+
+/*
+ * @brief Neural network activation function using direct table look-up
+ *
+ * @note  Refer header file for details.
+ *
+ */
+
+void arm_nn_activation_s16(const int16_t *input,
+                           int16_t *output,
+                           const uint16_t size,
+                           const uint16_t left_shift,
+                           const arm_nn_activation_type type)
+{
+    uint32_t abs_input_shift, max_saturation;
+    switch (type)
+    {
+    case ARM_SIGMOID:
+        abs_input_shift = 9;
+        max_saturation = 0x7FFF << 10;
+        break;
+    case ARM_TANH:
+    default:
+        abs_input_shift = 8;
+        max_saturation = 0xFFFF << 8;
+        break;
+    }
+
+    // Use the LUT for sigmoid and take into account, that
+    // tanh(x) = 2*sigmoid(2*x) - 1
+    int32_t input_multiplier = ((int32_t)3) << left_shift;
+
+    for (int i = 0; i < size; ++i, input++, output++)
+    {
+        int32_t input_data = ((*input) * input_multiplier);
+
+        uint32_t abs_input_data = input_data > 0 ? input_data : -input_data;
+
+        uint32_t uh = abs_input_data >> abs_input_shift;
+
+        uint32_t result;
+
+        if (uh >= 255)
+        {
+            result = max_saturation;
+        }
+        else
+        {
+            uint32_t ua = sigmoid_table_uint16[uh];
+            uint32_t ub = sigmoid_table_uint16[uh + 1];
+            uint32_t ut;
+            if (type == ARM_SIGMOID)
+            {
+                ut = abs_input_data & 0x1ff;
+            }
+            else
+            {
+                ut = abs_input_data & 0x0ff;
+            }
+            result = (ua << abs_input_shift) + ut * (ub - ua);
+        }
+        if (type == ARM_SIGMOID)
+        {
+            result = (input_data >= 0) ? (result + (1 << 9)) : ((1 << 25) - result + (1 << 9) - 1);
+            result >>= 10;
+        }
+        else
+        {
+            result = (input_data >= 0) ? (result - (1 << 23)) + (1 << 7) : ((-result + (1 << 23)) + (1 << 7) - 1);
+            result >>= 8;
+        }
+        *output = (int16_t)result;
+    }
+}
+
+/**
+ * @} end of Acti group
+ */

+ 75 - 0
Source/BasicMathFunctions/arm_elementwise_mul_s16_s8.c

@@ -0,0 +1,75 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_elementwise_mul_s16_s8.c
+ * Description:  Elementwise multiplication of 16 bit input with 8 bit output
+ *
+ * $Date:        8 September 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup BasicMath
+ * @{
+ */
+
+/*
+ * s16 elementwise multiplication with s8 output
+ *
+ * Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_elementwise_mul_s16_s8(const int16_t *input_1_vect,
+                                               const int16_t *input_2_vect,
+                                               int8_t *output,
+                                               const int32_t out_offset,
+                                               const int32_t out_mult,
+                                               const int32_t out_shift,
+                                               const int32_t block_size)
+{
+    int32_t loop_count = block_size;
+    while (loop_count > 0)
+    {
+        int16_t input_1 = *input_1_vect++;
+        int16_t input_2 = *input_2_vect++;
+
+        int32_t mul_res = input_1 * input_2;
+        mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset;
+
+        mul_res = CLAMP(mul_res, NN_Q7_MAX, NN_Q7_MIN);
+        *output++ = (int8_t)mul_res;
+
+        loop_count--;
+    }
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+/**
+ * @} end of BasicMath group
+ */

+ 11 - 16
Source/CMakeLists.txt

@@ -16,6 +16,8 @@
 # limitations under the License.
 #
 
+SET(ROOT ${CMSIS_PATH})
+
 # Select which parts of the CMSIS-DSP must be compiled.
 # There are some dependencies between the parts but they are not tracked
 # by this cmake. So, enabling some functions may require to enable some
@@ -29,26 +31,15 @@ option(SOFTMAX              "Softmax"               ON)
 option(BASICMATHSNN         "Basic Maths for NN"    ON)
 option(RESHAPE              "Reshape"               ON)
 option(SVDF                 "SVDF"                  ON)
+option(LSTM                 "LSTM"                  ON)
 
-# When OFF it is the default behavior : all tables are included.
+# Always needed if any other module above is on.
 option(NNSUPPORT            "NN Support"            ON)
 
+list(APPEND CMAKE_MODULE_PATH Source)
 
-###########################
-#
-# CMSIS NN
-#
-###########################
-
-list(APPEND CMAKE_MODULE_PATH ${NN}/Source)
-
-add_library(cmsis-nn STATIC)
-
-target_compile_options(cmsis-nn PRIVATE -Ofast)
-
-### Includes
-target_include_directories(cmsis-nn PUBLIC "../Include")
-target_include_directories(cmsis-nn PUBLIC "${CMSIS_PATH}/CMSIS/Core/Include")
+# There is a dependency to CMSIS-Core.
+target_include_directories(cmsis-nn PUBLIC "${ROOT}/CMSIS/Core/Include")
 
 if (BASICMATHSNN)
   add_subdirectory(BasicMathFunctions)
@@ -82,6 +73,10 @@ if (SVDF)
   add_subdirectory(SVDFunctions)
 endif()
 
+if (LSTM)
+  add_subdirectory(LSTMFunctions)
+endif()
+
 if (RESHAPE)
   add_subdirectory(ReshapeFunctions)
 endif()

+ 20 - 0
Source/LSTMFunctions/CMakeLists.txt

@@ -0,0 +1,20 @@
+#
+# SPDX-FileCopyrightText: Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+file(GLOB SRC "./*_s16.c")
+target_sources(cmsis-nn PRIVATE ${SRC})

+ 184 - 0
Source/LSTMFunctions/arm_lstm_unidirectional_s8_s16.c

@@ -0,0 +1,184 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_lstm_unidirectional_s16_s8.c
+ * Description:  S8 LSTM function with S16 gate output
+ *
+ * $Date:        4 November 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M processors
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup Public
+ */
+
+/**
+ * @addtogroup LSTM
+ * @{
+ */
+
+/*
+ * S8 LSTM function for TensorFlow Lite with S16 gate output
+ *
+ * Refer to header file for details.
+ *
+ */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/*
+ * LSTM unidirectional function with 8 bit input and output and 16 bit weights
+ *
+ * Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_lstm_unidirectional_s16_s8(cmsis_nn_lstm_context *scratch_buffers,
+                                                   const int8_t *input_data,
+                                                   const cmsis_nn_lstm_dims *lstm_dims,
+                                                   const int8_t *in_to_in_weights,
+                                                   const int8_t *in_to_forget_weights,
+                                                   const int8_t *in_to_cell_weights,
+                                                   const int8_t *in_to_out_weights,
+                                                   const int8_t *recurrent_to_in_weights,
+                                                   const int8_t *recurrent_to_forget_weights,
+                                                   const int8_t *recurrent_to_cell_weights,
+                                                   const int8_t *recurrent_to_out_weights,
+                                                   const int16_t *cell_to_in_weights,
+                                                   const int16_t *cell_to_forget_weights,
+                                                   const int16_t *cell_to_out_weights,
+                                                   const int8_t *projection_weights,
+                                                   const cmsis_nn_lstm_params *lstm,
+                                                   int8_t *output_state,
+                                                   int16_t *cell_state,
+                                                   int8_t *output_data)
+{
+    (void)cell_to_in_weights;
+    (void)cell_to_forget_weights;
+    (void)cell_to_out_weights;
+
+    const int32_t num_batch = lstm_dims->num_batches;
+    const int32_t num_input = lstm_dims->num_inputs;
+    const int32_t max_time = lstm_dims->max_time;
+
+    const int32_t num_output = lstm_dims->num_outputs;
+    const int32_t out_batch_leading_dim = num_output;
+
+    // num_cell = num_output is considered in the code under the assumption that projection is NULL.
+    const int32_t num_cell = num_output;
+
+    if (projection_weights != NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (lstm->i2f_effective_bias == NULL || lstm->i2c_effective_bias == NULL || lstm->i2o_effective_bias == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (lstm->r2f_effective_bias == NULL || lstm->r2c_effective_bias == NULL || lstm->r2o_effective_bias == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (lstm->i2i_effective_bias == NULL || lstm->r2i_effective_bias == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (lstm->time_major)
+    {
+        const int32_t in_step = num_batch * num_input;
+        const int32_t out_step = num_batch * out_batch_leading_dim;
+        for (int i_max_time = 0; i_max_time < max_time; i_max_time++)
+        {
+            arm_cmsis_nn_status status = arm_nn_lstm_step_s8_s16(input_data + i_max_time * in_step,
+                                                                 in_to_in_weights,
+                                                                 in_to_forget_weights,
+                                                                 in_to_cell_weights,
+                                                                 in_to_out_weights,
+                                                                 recurrent_to_in_weights,
+                                                                 recurrent_to_forget_weights,
+                                                                 recurrent_to_cell_weights,
+                                                                 recurrent_to_out_weights,
+                                                                 lstm,
+                                                                 num_batch,
+                                                                 num_cell,
+                                                                 num_input,
+                                                                 num_output,
+                                                                 output_state,
+                                                                 cell_state,
+                                                                 output_data + i_max_time * out_step,
+                                                                 scratch_buffers);
+            if (status != ARM_CMSIS_NN_SUCCESS)
+            {
+                return status;
+            }
+        }
+    }
+    else
+    {
+        for (int i_num_batch = 0; i_num_batch < num_batch; i_num_batch++)
+        {
+            const int32_t in_step = num_input;
+            const int32_t out_step = out_batch_leading_dim;
+            for (int i_max_time = 0; i_max_time < max_time; i_max_time++)
+            {
+                const int32_t time_offset = i_num_batch * max_time + i_max_time;
+
+                arm_cmsis_nn_status status = arm_nn_lstm_step_s8_s16(input_data + time_offset * in_step,
+                                                                     in_to_in_weights,
+                                                                     in_to_forget_weights,
+                                                                     in_to_cell_weights,
+                                                                     in_to_out_weights,
+                                                                     recurrent_to_in_weights,
+                                                                     recurrent_to_forget_weights,
+                                                                     recurrent_to_cell_weights,
+                                                                     recurrent_to_out_weights,
+                                                                     lstm,
+                                                                     /*num_batch=*/1,
+                                                                     num_cell,
+                                                                     num_input,
+                                                                     num_output,
+                                                                     output_state + i_num_batch * out_batch_leading_dim,
+                                                                     cell_state + i_num_batch * num_cell,
+                                                                     output_data + time_offset * out_step,
+                                                                     scratch_buffers);
+                if (status != ARM_CMSIS_NN_SUCCESS)
+                {
+                    return status;
+                }
+            }
+        }
+    }
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of LSTM group
+ */

+ 2 - 1
Source/NNSupportFunctions/CMakeLists.txt

@@ -18,4 +18,5 @@
 
 file(GLOB SRC "./*_s8*.c")
 file(GLOB SRC_S16 "./*_s16*.c")
-target_sources(cmsis-nn PRIVATE ${SRC} ${SRC_S16} arm_q7_to_q15_with_offset.c)
+target_sources(cmsis-nn PRIVATE ${SRC} ${SRC_S16} arm_nntables.c
+                                                  arm_q7_to_q15_with_offset.c)

+ 99 - 0
Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s8_s16.c

@@ -0,0 +1,99 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_lstm_calculate_gate_s8_s16.c
+ * Description:  Update single gate for an incremental step of LSTM function.
+ *
+ * $Date:        8 September 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nn_tables.h"
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @defgroup supportLSTM LSTM
+ *
+ * Support functions for LSTM
+ *
+ */
+
+/**
+ * @addtogroup supportLSTM
+ * @{
+ */
+
+/*
+ * Calculates a single LSTM gate, int8x8_16 version.
+ * Refer to header file for details
+ */
+void arm_nn_lstm_calculate_gate_s8_s16(const int8_t *input,
+                                       const int8_t *input_to_gate_weights,
+                                       const int32_t *input_to_gate_bias,
+                                       const cmsis_nn_scaling input_to_gate_scaling,
+                                       const int8_t *output_state,
+                                       const int8_t *recurrent_to_gate_weights,
+                                       const int32_t *recurrent_to_gate_bias,
+                                       const cmsis_nn_scaling recurrent_to_gate,
+                                       const int32_t n_batch,
+                                       const int32_t n_input,
+                                       const int32_t n_output,
+                                       const int32_t n_cell,
+                                       const arm_nn_activation_type activation_type,
+                                       int16_t *gate)
+{
+    const int32_t n_block = n_batch * n_cell;
+
+    memset(gate, 0, n_block * sizeof(int16_t));
+    arm_nn_vec_mat_mul_result_acc_s8(input,
+                                     input_to_gate_weights,
+                                     input_to_gate_bias,
+                                     gate,
+                                     0,
+                                     input_to_gate_scaling.multiplier,
+                                     input_to_gate_scaling.shift,
+                                     n_input,
+                                     n_cell,
+                                     n_batch);
+
+    arm_nn_vec_mat_mul_result_acc_s8(output_state,
+                                     recurrent_to_gate_weights,
+                                     recurrent_to_gate_bias,
+                                     gate,
+                                     0,
+                                     recurrent_to_gate.multiplier,
+                                     recurrent_to_gate.shift,
+                                     n_output,
+                                     n_cell,
+                                     n_batch);
+
+    arm_nn_activation_s16(gate, gate, n_block, 0, activation_type);
+}
+/**
+ * @} end of supportLSTM group
+ */

+ 156 - 0
Source/NNSupportFunctions/arm_nn_lstm_step_s8_s16.c

@@ -0,0 +1,156 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_lstm_step_s8_s16.c
+ * Description:  Update LSTM function for a single iteration step.
+ *
+ * $Date:        8 September 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+#include "arm_nnsupportfunctions.h"
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup supportLSTM
+ * @{
+ */
+
+/*
+ * Calculate the output state tensor of an LSTM step, s8 input/output and s16 weight version.
+ * Refer to header file for details.
+ */
+arm_cmsis_nn_status arm_nn_lstm_step_s8_s16(const int8_t *input,
+                                            const int8_t *input_to_input_weight,
+                                            const int8_t *input_to_forget_weight,
+                                            const int8_t *input_to_cell_weight,
+                                            const int8_t *input_to_output_weight,
+                                            const int8_t *recurrent_to_input_weight,
+                                            const int8_t *recurrent_to_forget_weight,
+                                            const int8_t *recurrent_to_cell_weight,
+                                            const int8_t *recurrent_to_output_weight,
+                                            const cmsis_nn_lstm_params *lstm,
+                                            const int n_batch,
+                                            const int n_cell,
+                                            const int n_input,
+                                            const int n_output,
+                                            int8_t *output_state,
+                                            int16_t *cell_state,
+                                            int8_t *output,
+                                            cmsis_nn_lstm_context *scratch_buffers)
+{
+    const int32_t n_block = n_batch * n_cell;
+
+    // Calculate the input gate
+    arm_nn_lstm_calculate_gate_s8_s16(input,
+                                      input_to_input_weight,
+                                      lstm->i2i_effective_bias,
+                                      lstm->input_to_input_scaling,
+                                      output_state,
+                                      recurrent_to_input_weight,
+                                      lstm->r2i_effective_bias,
+                                      lstm->recurrent_to_input_scaling,
+                                      n_batch,
+                                      n_input,
+                                      n_output,
+                                      n_cell,
+                                      ARM_SIGMOID,
+                                      scratch_buffers->input_gate);
+
+    // Calculate the forget gate
+    arm_nn_lstm_calculate_gate_s8_s16(input,
+                                      input_to_forget_weight,
+                                      lstm->i2f_effective_bias,
+                                      lstm->input_to_forget_scaling,
+                                      output_state,
+                                      recurrent_to_forget_weight,
+                                      lstm->r2f_effective_bias,
+                                      lstm->recurrent_to_forget_scaling,
+                                      n_batch,
+                                      n_input,
+                                      n_output,
+                                      n_cell,
+                                      ARM_SIGMOID,
+                                      scratch_buffers->forget_gate);
+
+    // Calculate the cell update gate
+    arm_nn_lstm_calculate_gate_s8_s16(input,
+                                      input_to_cell_weight,
+                                      lstm->i2c_effective_bias,
+                                      lstm->input_to_cell_scaling,
+                                      output_state,
+                                      recurrent_to_cell_weight,
+                                      lstm->r2c_effective_bias,
+                                      lstm->recurrent_to_cell_scaling,
+                                      n_batch,
+                                      n_input,
+                                      n_output,
+                                      n_cell,
+                                      ARM_TANH,
+                                      scratch_buffers->cell_gate);
+
+    // Update the cell state
+    arm_nn_lstm_update_cell_state_s16(n_block,
+                                      lstm->cell_state_shift,
+                                      cell_state,
+                                      scratch_buffers->input_gate,
+                                      scratch_buffers->forget_gate,
+                                      scratch_buffers->cell_gate);
+
+    // Calculate the output gate
+    arm_nn_lstm_calculate_gate_s8_s16(input,
+                                      input_to_output_weight,
+                                      lstm->i2o_effective_bias,
+                                      lstm->input_to_output_scaling,
+                                      output_state,
+                                      recurrent_to_output_weight,
+                                      lstm->r2o_effective_bias,
+                                      lstm->recurrent_to_output_scaling,
+                                      n_batch,
+                                      n_input,
+                                      n_output,
+                                      n_cell,
+                                      ARM_SIGMOID,
+                                      scratch_buffers->output_gate);
+
+    // Update the output state
+    arm_nn_lstm_update_output_s8_s16(n_batch,
+                                     n_cell,
+                                     n_output,
+                                     cell_state,
+                                     lstm->cell_state_shift,
+                                     scratch_buffers->output_gate,
+                                     lstm->hidden_scaling,
+                                     lstm->hidden_offset,
+                                     output_state,
+                                     scratch_buffers->input_gate,
+                                     scratch_buffers->scratch);
+
+    arm_memcpy_s8(output, output_state, n_batch * n_output * sizeof(int8_t));
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+/**
+ * @} end of supportLSTM group
+ */

+ 67 - 0
Source/NNSupportFunctions/arm_nn_lstm_update_cell_state_s16.c

@@ -0,0 +1,67 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_lstm_update_cell_state_s16.c
+ * Description:  Update cell state for an incremental step of LSTM function.
+ *
+ * $Date:        8 September 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnsupportfunctions.h"
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup supportLSTM
+ * @{
+ */
+
+/*
+ * Update cell state for a single LSTM iteration step, int8x8_16 version.
+ *
+ * Refer to header file for more details
+ */
+void arm_nn_lstm_update_cell_state_s16(const int32_t n_block,
+                                       const int32_t cell_state_scale,
+                                       int16_t *cell_state,
+                                       const int16_t *input_gate,
+                                       const int16_t *forget_gate,
+                                       const int16_t *cell_gate)
+{
+    const int32_t cell_scale = 30 + cell_state_scale;
+    for (int i = 0; i < n_block; ++i)
+    {
+        int32_t value = cell_state[i] * forget_gate[i];
+        int32_t value_1 = input_gate[i] * cell_gate[i];
+
+        value = arm_nn_divide_by_power_of_two(value, 15);
+        value_1 = arm_nn_divide_by_power_of_two(value_1, cell_scale);
+
+        cell_state[i] = CLAMP(value + value_1, NN_Q15_MAX, NN_Q15_MIN);
+    }
+}
+/**
+ * @} end of supportLSTM group
+ */

+ 88 - 0
Source/NNSupportFunctions/arm_nn_lstm_update_output_s8_s16.c

@@ -0,0 +1,88 @@
+
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_lstm_update_output_s8_s16.c
+ * Description:  Update output gate for an incremental step of LSTM function.
+ *
+ * $Date:        8 September 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup supportLSTM
+ * @{
+ */
+
+/*
+ * Calculate the output state tensor of an LSTM step, s8 input/output and s16 weight version.
+ * Refer to header files for details
+ */
+void arm_nn_lstm_update_output_s8_s16(const int n_batch,
+                                      const int n_cell,
+                                      const int n_output,
+                                      int16_t *cell_state,
+                                      const int32_t cell_state_scale,
+                                      const int16_t *output_gate,
+                                      const cmsis_nn_scaling hidden_scaling,
+                                      const int32_t hidden_offset,
+                                      int8_t *output_state,
+                                      int16_t *cell_gate_scratch,
+                                      int8_t *scratch)
+{
+    const int32_t size = n_batch * n_cell;
+
+    int32_t tanh_input_left_shift = (15 + cell_state_scale) - 3;
+    if (tanh_input_left_shift < 0)
+    {
+        tanh_input_left_shift = -tanh_input_left_shift;
+        for (int32_t i = 0; i < size; i++)
+        {
+            cell_state[i] = cell_state[i] >> tanh_input_left_shift;
+        }
+        tanh_input_left_shift = 0;
+    }
+    arm_nn_activation_s16(cell_state, cell_gate_scratch, size, tanh_input_left_shift, ARM_TANH);
+
+    if (n_cell == n_output)
+    {
+        scratch = output_state;
+    }
+
+    arm_elementwise_mul_s16_s8(
+        output_gate, cell_gate_scratch, scratch, hidden_offset, hidden_scaling.multiplier, hidden_scaling.shift, size);
+    if (n_cell != n_output)
+    {
+        arm_memcpy_s8(output_state, scratch, n_batch * n_output * sizeof(int8_t));
+    }
+}
+/**
+ * @} end of supportLSTM group
+ */

+ 79 - 0
Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s8.c

@@ -0,0 +1,79 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_vec_mat_mul_result_acc_s8.c
+ * Description:  Multiplies a matrix by a vector and accumulate with output.
+ *
+ * $Date:        8 September 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup supportLSTM
+ * @{
+ */
+
+/*
+ *  Refer to header file for details.
+ */
+void arm_nn_vec_mat_mul_result_acc_s8(const int8_t *lhs,
+                                      const int8_t *rhs,
+                                      const int32_t *bias,
+                                      int16_t *dst,
+                                      const int32_t output_offset,
+                                      const int32_t multiplier,
+                                      const int32_t shift,
+                                      const int32_t rhs_cols,
+                                      const int32_t rhs_rows,
+                                      const int32_t batch)
+{
+    for (int i_batch = 0; i_batch < batch; ++i_batch)
+    {
+        const int8_t *rhs_0 = rhs;
+        for (int i_rhs_rows = 0; i_rhs_rows < rhs_rows; ++i_rhs_rows)
+        {
+            const int8_t *lhs_vec = lhs + i_batch * rhs_cols;
+            int32_t acc = bias[i_rhs_rows];
+            for (int i_rhs_cols = 0; i_rhs_cols < rhs_cols; ++i_rhs_cols)
+            {
+                acc += (*lhs_vec++) * (*rhs_0++);
+            }
+            acc = arm_nn_requantize(acc, multiplier, shift);
+            acc += output_offset;
+
+            acc += *dst;
+            acc = CLAMP(acc, NN_Q15_MAX, NN_Q15_MIN);
+            *dst++ = (int16_t)acc;
+        }
+    }
+}
+
+/**
+ * @} end of supportLSTM group
+ */

+ 24 - 2
Source/NNSupportFunctions/arm_nntables.c

@@ -21,8 +21,8 @@
  * Title:        arm_nntables.c
  * Description:  Converts the elements of the Q7 vector to Q15 vector without left-shift
  *
- * $Date:        30 September 2022
- * $Revision:    V.2.0.0
+ * $Date:        28 October 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M cores
  *
@@ -38,3 +38,25 @@
  *
  */
 
+// Table of sigmoid(i/24) at 0.16 format - 256 elements.
+// Combined sigmoid and tanh look-up table, since
+// tanh(x) = 2*sigmoid(2*x) -1.
+// Both functions are symmetric, so the LUT table is only needed
+// for the absolute value of the input.
+const uint16_t sigmoid_table_uint16[256] = {
+    32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498, 40149, 40794, 41432, 42064, 42688,
+    43304, 43912, 44511, 45102, 45683, 46255, 46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409,
+    51865, 52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174, 56503, 56823, 57133, 57433,
+    57724, 58007, 58280, 58544, 58800, 59048, 59288, 59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110,
+    61279, 61441, 61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886, 62990, 63090, 63186,
+    63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835, 63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308,
+    64357, 64405, 64450, 64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845, 64873, 64900,
+    64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097, 65115, 65132, 65149, 65164, 65179, 65194, 65208,
+    65221, 65234, 65246, 65258, 65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360, 65367,
+    65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425, 65429, 65433, 65438, 65442, 65445, 65449,
+    65453, 65456, 65459, 65462, 65465, 65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
+    65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508, 65509, 65510, 65511, 65512, 65513,
+    65514, 65515, 65516, 65517, 65517, 65518, 65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524,
+    65525, 65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529, 65529, 65529, 65530, 65530,
+    65530, 65530, 65531, 65531, 65531, 65531, 65531, 65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533,
+    65533, 65533, 65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65535};

+ 1 - 0
Tests/UnitTest/CMakeLists.txt

@@ -83,6 +83,7 @@ add_subdirectory(TestCases/test_arm_elementwise_mul_s16)
 add_subdirectory(TestCases/test_arm_elementwise_mul_s8)
 add_subdirectory(TestCases/test_arm_fully_connected_s16)
 add_subdirectory(TestCases/test_arm_fully_connected_s8)
+add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16_s8)
 add_subdirectory(TestCases/test_arm_max_pool_s16)
 add_subdirectory(TestCases/test_arm_max_pool_s8)
 add_subdirectory(TestCases/test_arm_softmax_s16)

+ 2 - 0
Tests/UnitTest/PregeneratedData/lstm_1/bias.txt

@@ -0,0 +1,2 @@
+# 12
+-2.941763401031494141e-01,-4.574780464172363281e-01,6.231174468994140625e-01,3.350811004638671875e-01,-6.914455890655517578e-01,-9.450943470001220703e-01,-6.410059928894042969e-01,-7.997453808784484863e-01,-9.903407096862792969e-01,-5.439190864562988281e-01,-8.339915275573730469e-01,-9.280035495758056641e-01

+ 11 - 0
Tests/UnitTest/PregeneratedData/lstm_1/input.txt

@@ -0,0 +1,11 @@
+# 1,10,5
+-1.070000000000000000e+02,1.200000000000000000e+01,1.090000000000000000e+02,1.140000000000000000e+02,-7.400000000000000000e+01
+-1.900000000000000000e+01,6.600000000000000000e+01,-1.300000000000000000e+01,3.300000000000000000e+01,-8.600000000000000000e+01
+-1.010000000000000000e+02,1.030000000000000000e+02,-3.500000000000000000e+01,9.400000000000000000e+01,5.000000000000000000e+01
+1.000000000000000000e+02,8.000000000000000000e+01,3.200000000000000000e+01,-3.000000000000000000e+00,-1.180000000000000000e+02
+8.700000000000000000e+01,8.700000000000000000e+01,1.150000000000000000e+02,1.130000000000000000e+02,7.900000000000000000e+01
+7.300000000000000000e+01,3.000000000000000000e+01,-3.300000000000000000e+01,1.180000000000000000e+02,-8.600000000000000000e+01
+-1.190000000000000000e+02,-1.010000000000000000e+02,1.700000000000000000e+01,9.200000000000000000e+01,-7.100000000000000000e+01
+-1.000000000000000000e+00,-7.000000000000000000e+00,-9.100000000000000000e+01,-4.000000000000000000e+01,-1.400000000000000000e+01
+2.000000000000000000e+00,-1.050000000000000000e+02,8.500000000000000000e+01,2.000000000000000000e+00,-1.270000000000000000e+02
+-9.500000000000000000e+01,-2.900000000000000000e+01,1.080000000000000000e+02,1.300000000000000000e+01,2.100000000000000000e+01

+ 6 - 0
Tests/UnitTest/PregeneratedData/lstm_1/kernel.txt

@@ -0,0 +1,6 @@
+# 5,12
+-2.916452884674072266e-01,-9.699029326438903809e-01,2.766107954084873199e-02,-9.544720649719238281e-01,1.968124061822891235e-01,-3.822360038757324219e-01,-8.586256504058837891e-01,-9.332587718963623047e-01,4.119925498962402344e-01,6.044430732727050781e-01,2.970669269561767578e-01,-6.419694423675537109e-01
+-6.444039344787597656e-01,1.679508537054061890e-01,-4.187808036804199219e-01,-6.229739189147949219e-01,7.253146171569824219e-01,-9.623918533325195312e-01,-7.083365321159362793e-01,9.855685830116271973e-01,-8.267076015472412109e-01,-7.529933452606201172e-01,-3.265476226806640625e-01,-3.899813070893287659e-02
+-1.616172790527343750e-01,1.812674999237060547e-01,7.406637668609619141e-01,-2.739210128784179688e-01,-9.473168849945068359e-01,-1.252675056457519531e-01,7.770959734916687012e-01,-9.471528530120849609e-01,2.439949661493301392e-01,-1.397728919982910156e-01,-4.851088523864746094e-01,-4.171228408813476562e-01
+-9.176707267761230469e-02,-8.886914253234863281e-01,-6.644551753997802734e-01,8.271861076354980469e-02,5.160858631134033203e-01,5.279765129089355469e-01,-6.577796936035156250e-01,9.046753048896789551e-01,8.145649433135986328e-01,3.036432266235351562e-01,-2.850685119628906250e-01,-6.255328655242919922e-01
+-7.064499855041503906e-01,-2.062224969267845154e-02,-3.111209869384765625e-01,4.930980503559112549e-01,3.248074054718017578e-01,-4.202377498149871826e-01,-9.698390960693359375e-01,6.776201725006103516e-01,-3.915650844573974609e-01,3.221957683563232422e-01,1.799547672271728516e-01,-8.932898640632629395e-01

+ 4 - 0
Tests/UnitTest/PregeneratedData/lstm_1/kernel_hidden.txt

@@ -0,0 +1,4 @@
+# 3,12
+2.792534828186035156e-01,4.466640651226043701e-01,2.380489930510520935e-02,-3.270049095153808594e-01,-5.090649127960205078e-01,-3.902249336242675781e-01,5.557198524475097656e-01,7.088861465454101562e-01,-9.389566630125045776e-02,8.679037094116210938e-01,-3.193378448486328125e-01,4.721031188964843750e-01
+6.799073815345764160e-01,-1.027562618255615234e-01,-5.306129455566406250e-01,-3.784561157226562500e-01,-1.009175777435302734e-01,5.216462612152099609e-01,-3.758955299854278564e-01,-4.065167903900146484e-01,2.584350109100341797e-01,-1.556182000786066055e-02,-2.207176536321640015e-01,6.993966102600097656e-01
+-1.582279205322265625e-01,-9.879434108734130859e-01,3.595776259899139404e-01,-9.881803989410400391e-01,-5.250124931335449219e-01,5.356051921844482422e-01,2.748196125030517578e-01,-1.663946919143199921e-02,-6.769521832466125488e-01,4.591784477233886719e-01,7.282969355583190918e-01,-5.461788177490234375e-01

+ 4 - 0
Tests/UnitTest/PregeneratedData/lstm_1/kernel_recurrent.txt

@@ -0,0 +1,4 @@
+# 3,12
+-4.555177688598632812e-01,8.347518444061279297e-01,9.198055267333984375e-01,-2.391040474176406860e-01,-4.206430912017822266e-01,2.693762779235839844e-01,9.947328567504882812e-01,-3.824450671672821045e-01,3.603575229644775391e-01,-5.715112686157226562e-01,8.953621387481689453e-01,-1.841506958007812500e-01
+-8.593931198120117188e-01,1.502501964569091797e-01,7.702538967132568359e-01,8.099102973937988281e-02,-8.316326141357421875e-01,4.504671096801757812e-01,3.064627647399902344e-01,5.014181137084960938e-02,-1.694524139165878296e-01,-3.883800506591796875e-01,1.556826010346412659e-02,-4.678952693939208984e-01
+-9.732427000999450684e-01,3.301250934600830078e-01,6.551964282989501953e-01,9.694361686706542969e-02,-7.298562526702880859e-01,1.438834667205810547e-01,2.228982448577880859e-01,4.513302147388458252e-01,1.124048233032226562e-01,-2.678887844085693359e-01,-9.313118457794189453e-01,-5.661685466766357422e-01

+ 2 - 0
Tests/UnitTest/PregeneratedData/lstm_2/bias.txt

@@ -0,0 +1,2 @@
+# 20
+-4.073690995573997498e-02,4.896166324615478516e-01,-2.516083717346191406e-01,2.894654273986816406e-01,5.880310535430908203e-01,6.304214000701904297e-01,-9.660606384277343750e-01,6.768398284912109375e-01,-4.158082008361816406e-01,-4.098701849579811096e-02,-7.632277011871337891e-01,4.389555454254150391e-01,-7.727472782135009766e-01,-9.665346145629882812e-02,5.560185909271240234e-01,-7.438349723815917969e-02,-1.670682430267333984e-01,-2.150807380676269531e-01,8.131778240203857422e-01,9.899935722351074219e-01

+ 19 - 0
Tests/UnitTest/PregeneratedData/lstm_2/input.txt

@@ -0,0 +1,19 @@
+# 2,9,6
+-2.300000000000000000e+01,4.800000000000000000e+01,7.000000000000000000e+01,3.600000000000000000e+01,-1.240000000000000000e+02,8.600000000000000000e+01
+9.200000000000000000e+01,1.500000000000000000e+01,1.050000000000000000e+02,-4.600000000000000000e+01,8.700000000000000000e+01,-4.500000000000000000e+01
+1.080000000000000000e+02,-4.400000000000000000e+01,-4.000000000000000000e+01,2.000000000000000000e+00,-1.170000000000000000e+02,-7.500000000000000000e+01
+1.140000000000000000e+02,6.600000000000000000e+01,5.200000000000000000e+01,-9.900000000000000000e+01,4.600000000000000000e+01,1.130000000000000000e+02
+-8.300000000000000000e+01,1.110000000000000000e+02,3.000000000000000000e+01,1.800000000000000000e+01,6.700000000000000000e+01,-6.000000000000000000e+01
+1.040000000000000000e+02,-3.300000000000000000e+01,-7.000000000000000000e+01,2.000000000000000000e+01,3.200000000000000000e+01,6.700000000000000000e+01
+-8.500000000000000000e+01,-1.130000000000000000e+02,3.800000000000000000e+01,-1.200000000000000000e+02,3.100000000000000000e+01,8.700000000000000000e+01
+-4.400000000000000000e+01,-7.500000000000000000e+01,-8.700000000000000000e+01,7.000000000000000000e+01,7.600000000000000000e+01,4.200000000000000000e+01
+-1.170000000000000000e+02,-2.600000000000000000e+01,-6.200000000000000000e+01,-8.500000000000000000e+01,-1.300000000000000000e+01,-5.900000000000000000e+01
+9.700000000000000000e+01,2.000000000000000000e+01,6.700000000000000000e+01,9.500000000000000000e+01,-2.400000000000000000e+01,-6.000000000000000000e+00
+3.600000000000000000e+01,9.200000000000000000e+01,-9.700000000000000000e+01,-1.040000000000000000e+02,-9.800000000000000000e+01,-9.000000000000000000e+01
+1.040000000000000000e+02,4.700000000000000000e+01,-7.800000000000000000e+01,4.400000000000000000e+01,-1.120000000000000000e+02,-1.280000000000000000e+02
+1.260000000000000000e+02,1.500000000000000000e+01,1.240000000000000000e+02,2.300000000000000000e+01,-5.700000000000000000e+01,9.400000000000000000e+01
+1.170000000000000000e+02,-2.400000000000000000e+01,-8.100000000000000000e+01,1.120000000000000000e+02,4.000000000000000000e+00,2.600000000000000000e+01
+7.400000000000000000e+01,-8.100000000000000000e+01,3.100000000000000000e+01,1.100000000000000000e+01,-6.700000000000000000e+01,2.200000000000000000e+01
+1.170000000000000000e+02,1.160000000000000000e+02,-6.100000000000000000e+01,-5.400000000000000000e+01,7.000000000000000000e+01,-6.700000000000000000e+01
+-1.050000000000000000e+02,-3.000000000000000000e+01,3.300000000000000000e+01,2.600000000000000000e+01,1.240000000000000000e+02,2.700000000000000000e+01
+8.000000000000000000e+01,3.800000000000000000e+01,5.300000000000000000e+01,-1.240000000000000000e+02,5.200000000000000000e+01,-7.500000000000000000e+01

+ 7 - 0
Tests/UnitTest/PregeneratedData/lstm_2/kernel.txt

@@ -0,0 +1,7 @@
+# 6,20
+1.661818027496337891e-01,9.119001030921936035e-01,-8.396573066711425781e-01,-4.082736670970916748e-01,-8.051974177360534668e-01,1.538197994232177734e-01,-2.796692848205566406e-01,-5.684330463409423828e-01,8.638834953308105469e-02,-4.020528793334960938e-01,9.316275119781494141e-01,9.718518257141113281e-01,3.058922290802001953e-01,1.386353969573974609e-01,-6.852118968963623047e-01,-7.412499785423278809e-01,1.290516853332519531e-01,8.461329340934753418e-01,-9.457409381866455078e-01,1.918647140264511108e-01
+3.214113712310791016e-01,8.230186253786087036e-02,-5.836930274963378906e-01,9.783399105072021484e-01,-9.070117473602294922e-01,-2.421600818634033203e-01,5.940148830413818359e-01,-9.982938170433044434e-01,6.900560855865478516e-01,-9.213759899139404297e-01,1.870691776275634766e-01,9.970019459724426270e-01,-2.106444835662841797e-01,-7.973902225494384766e-01,3.761019706726074219e-01,-2.322929054498672485e-01,-8.410064578056335449e-01,-4.597587585449218750e-01,1.648206710815429688e-01,4.185912907123565674e-01
+-2.892267704010009766e-01,-3.134472370147705078e-01,3.245136737823486328e-01,-6.645143032073974609e-01,-9.121944904327392578e-01,-1.649403572082519531e-01,-4.392454922199249268e-01,-1.859473995864391327e-02,-6.158461570739746094e-01,-7.098824977874755859e-01,1.638605594635009766e-01,6.096541881561279297e-01,8.457213044166564941e-01,9.932565689086914062e-01,7.461952567100524902e-01,-3.014514446258544922e-01,-3.341236114501953125e-01,-1.660623550415039062e-01,-8.260729312896728516e-01,4.321007728576660156e-01
+-2.661883831024169922e-01,-2.182326465845108032e-01,-6.626846790313720703e-01,-9.762055873870849609e-01,-4.106507301330566406e-01,3.384494781494140625e-01,-1.598067283630371094e-01,-2.928872108459472656e-01,2.896375656127929688e-01,4.500701725482940674e-01,5.471484661102294922e-01,1.816320046782493591e-02,-2.348546981811523438e-01,5.214400291442871094e-01,7.919896245002746582e-01,-5.971026420593261719e-01,5.980230122804641724e-02,1.138322427868843079e-01,-5.156948566436767578e-01,6.360859870910644531e-01
+-8.599696159362792969e-01,9.930508136749267578e-01,-2.618448734283447266e-01,-2.905933856964111328e-01,1.348562240600585938e-01,1.700708866119384766e-01,-2.909522056579589844e-01,7.749868184328079224e-02,3.142352104187011719e-01,5.073850154876708984e-01,-3.408097848296165466e-02,-7.089710235595703125e-02,-4.358427524566650391e-01,5.866591930389404297e-01,-2.428631782531738281e-01,2.643582820892333984e-01,7.681429386138916016e-01,7.340732216835021973e-01,-5.979654788970947266e-01,-4.130139052867889404e-01
+-4.977273941040039062e-01,7.946271896362304688e-01,-3.794755935668945312e-01,-6.274055689573287964e-02,-2.951300144195556641e-01,-8.102863430976867676e-01,-7.995247840881347656e-02,3.966068848967552185e-02,-4.599206447601318359e-01,-6.383695602416992188e-01,9.888617396354675293e-01,7.553772926330566406e-01,-6.219182014465332031e-01,-7.646887302398681641e-01,-3.963985741138458252e-01,7.885161042213439941e-01,-3.340220078825950623e-02,3.328690528869628906e-01,-6.361782550811767578e-01,-9.234074950218200684e-01

+ 6 - 0
Tests/UnitTest/PregeneratedData/lstm_2/kernel_hidden.txt

@@ -0,0 +1,6 @@
+# 5,20
+-6.593630313873291016e-01,7.962141036987304688e-01,-6.928687095642089844e-01,2.368633747100830078e-01,9.172329306602478027e-01,8.302115648984909058e-02,5.752239227294921875e-01,-5.422427654266357422e-01,-8.374829888343811035e-01,3.132534027099609375e-01,-4.460000991821289062e-02,-1.313650608062744141e-01,8.669328689575195312e-01,6.950631141662597656e-01,6.920335292816162109e-01,-5.339722633361816406e-01,2.857351303100585938e-01,-3.116369247436523438e-01,7.440114021301269531e-02,5.543093681335449219e-01
+-4.398488998413085938e-02,3.665857315063476562e-01,2.662532329559326172e-01,7.422210182994604111e-03,8.082109093666076660e-01,3.157811164855957031e-01,-2.045240402221679688e-01,5.655047893524169922e-01,-7.395322322845458984e-01,-2.316243648529052734e-01,-7.021722197532653809e-01,6.882755756378173828e-01,4.072818756103515625e-01,4.023594856262207031e-01,4.706928730010986328e-01,-7.424597740173339844e-01,4.580695629119873047e-01,1.514892578125000000e-01,2.731442078948020935e-02,-8.225095272064208984e-01
+-6.087112426757812500e-01,6.876971721649169922e-01,-3.765513896942138672e-01,8.218061327934265137e-01,-5.461645126342773438e-02,2.927019596099853516e-01,-2.612423896789550781e-01,7.448296546936035156e-01,-8.326454162597656250e-01,7.329246997833251953e-01,2.162051014602184296e-02,-4.375789165496826172e-01,9.595411419868469238e-01,-6.142425537109375000e-01,-2.020692825317382812e-01,-9.496016502380371094e-01,8.684090971946716309e-01,-4.978873729705810547e-01,-9.166486263275146484e-01,-1.953840255737304688e-01
+-6.525969505310058594e-01,-6.695442199707031250e-01,8.382368087768554688e-02,-5.144407749176025391e-01,6.147224903106689453e-01,4.265814125537872314e-01,1.738669872283935547e-01,-1.175904273986816406e-01,8.597006201744079590e-01,-6.183502674102783203e-01,7.486710548400878906e-01,-9.582381844520568848e-01,-2.762377262115478516e-01,-2.989645004272460938e-01,4.916837215423583984e-01,5.918009281158447266e-01,2.975859642028808594e-01,-6.299729924649000168e-03,8.487107753753662109e-01,8.484196662902832031e-01
+-1.832137256860733032e-01,6.187088489532470703e-01,2.927470207214355469e-01,5.159492492675781250e-01,-7.133135795593261719e-01,-1.038196086883544922e-01,9.995863437652587891e-01,-8.320715427398681641e-01,6.377758979797363281e-01,1.752114146947860718e-01,3.491413593292236328e-01,-9.219565391540527344e-01,-2.389414459466934204e-01,9.399983882904052734e-01,3.133680820465087891e-01,-1.237440016120672226e-02,4.344639480113983154e-01,-9.983017444610595703e-01,-9.564023017883300781e-01,-1.894140057265758514e-02

+ 6 - 0
Tests/UnitTest/PregeneratedData/lstm_2/kernel_recurrent.txt

@@ -0,0 +1,6 @@
+# 5,20
+3.675816059112548828e-01,1.841671466827392578e-01,9.055058956146240234e-01,2.789447307586669922e-01,-4.911629855632781982e-01,6.841435432434082031e-01,-9.543693065643310547e-01,6.852719783782958984e-01,1.397290229797363281e-01,3.301973342895507812e-01,2.661414146423339844e-01,5.227684974670410156e-01,6.406128406524658203e-01,3.448357582092285156e-01,-3.084473609924316406e-01,3.465147316455841064e-01,-9.628999233245849609e-01,3.838341236114501953e-01,8.030827045440673828e-01,-1.078104972839355469e-02
+-9.833471775054931641e-01,-7.928318977355957031e-01,3.054619999602437019e-03,-2.797343730926513672e-01,-5.603556632995605469e-01,5.798821449279785156e-01,4.464201927185058594e-01,-8.416101336479187012e-01,-9.428064823150634766e-01,-2.229821532964706421e-01,-5.533781051635742188e-01,2.436683177947998047e-01,-7.640190124511718750e-01,1.291048526763916016e-01,-6.187627315521240234e-01,-2.841777801513671875e-01,8.331672549247741699e-01,-2.548739910125732422e-01,2.430558018386363983e-02,-1.856040954589843750e-02
+-2.767109870910644531e-01,-3.407449722290039062e-01,9.766950607299804688e-01,6.322631835937500000e-01,-6.569015979766845703e-01,2.179689407348632812e-01,-9.365463256835937500e-02,6.370887756347656250e-01,-1.733698844909667969e-01,3.276026248931884766e-01,7.476601600646972656e-01,-8.426282405853271484e-01,-4.014342129230499268e-01,9.461041092872619629e-01,-4.216067492961883545e-01,7.819738388061523438e-01,6.151978969573974609e-01,-1.572966575622558594e-01,3.754651546478271484e-01,-2.931649684906005859e-01
+5.126814842224121094e-01,-3.229615688323974609e-01,7.197425365447998047e-01,7.189798355102539062e-02,4.945569336414337158e-01,4.000475704669952393e-01,-5.259580612182617188e-01,3.446824252605438232e-01,7.433981895446777344e-01,-8.316636085510253906e-02,3.540642261505126953e-01,3.703892529010772705e-01,3.330652713775634766e-01,5.685889720916748047e-01,-6.256792545318603516e-01,1.998927593231201172e-01,-5.592110101133584976e-03,-5.488224029541015625e-01,-2.866299152374267578e-01,-1.411297321319580078e-01
+-1.257724761962890625e-01,4.159312546253204346e-01,-1.773474216461181641e-01,-7.051283717155456543e-01,-7.603826522827148438e-01,7.301757335662841797e-01,8.852329850196838379e-01,5.686295032501220703e-01,2.966997623443603516e-01,4.452195167541503906e-01,-8.296184539794921875e-01,5.707855224609375000e-01,3.211612701416015625e-01,-1.187782287597656250e-01,-6.306185722351074219e-01,6.443455219268798828e-01,-4.014253988862037659e-02,8.803474903106689453e-01,-3.846187591552734375e-01,8.534922599792480469e-01

+ 2 - 0
Tests/UnitTest/PregeneratedData/lstm_one_time_step/bias.txt

@@ -0,0 +1,2 @@
+# 16
+3.216696158051490784e-02,-8.216559886932373047e-01,-5.227050781250000000e-01,-2.881567478179931641e-01,-8.673424720764160156e-01,9.971568584442138672e-01,8.090770244598388672e-01,-4.975128173828125000e-02,1.665811538696289062e-01,-8.605656623840332031e-01,-4.847116470336914062e-01,8.471274375915527344e-01,-3.702373504638671875e-01,-1.846590042114257812e-01,-8.703420162200927734e-01,-8.328770995140075684e-01

+ 3 - 0
Tests/UnitTest/PregeneratedData/lstm_one_time_step/input.txt

@@ -0,0 +1,3 @@
+# 2,1,4
+-1.500000000000000000e+01,-6.200000000000000000e+01,1.060000000000000000e+02,3.600000000000000000e+01
+1.180000000000000000e+02,-4.500000000000000000e+01,-1.280000000000000000e+02,7.900000000000000000e+01

+ 5 - 0
Tests/UnitTest/PregeneratedData/lstm_one_time_step/kernel.txt

@@ -0,0 +1,5 @@
+# 4,16
+2.900106906890869141e-01,7.755599021911621094e-01,7.922054082155227661e-02,8.526008129119873047e-01,3.210427761077880859e-01,-1.802136749029159546e-01,7.328817844390869141e-01,8.415055274963378906e-02,-8.790833353996276855e-01,9.195221066474914551e-01,-4.563508033752441406e-01,-6.262743473052978516e-01,7.267713546752929688e-01,2.206329954788088799e-03,4.193100929260253906e-01,5.192861557006835938e-01
+-9.055540561676025391e-01,5.466990470886230469e-01,5.292341709136962891e-01,-1.976730069145560265e-03,7.725787162780761719e-02,1.860723346471786499e-01,6.046307086944580078e-01,5.227515697479248047e-01,1.132385730743408203e-01,6.383595466613769531e-01,-6.287109851837158203e-01,6.404249668121337891e-01,-1.653344631195068359e-01,7.536861896514892578e-01,2.584457397460937500e-01,-2.736773490905761719e-01
+-9.627959728240966797e-01,-7.743599824607372284e-03,3.372740745544433594e-01,-5.636198520660400391e-01,9.686639308929443359e-01,-6.265358924865722656e-01,7.797274589538574219e-01,4.143118858337402344e-02,-4.970877170562744141e-01,6.396889686584472656e-02,-2.111480236053466797e-01,-9.152874946594238281e-01,-1.851112842559814453e-01,-8.866858482360839844e-01,-6.840083003044128418e-01,3.396921157836914062e-01
+-7.482888698577880859e-01,-1.191082000732421875e-01,1.599609851837158203e-01,-8.524290919303894043e-01,-7.780900001525878906e-01,-6.197381019592285156e-02,8.995342254638671875e-02,-7.510848045349121094e-01,5.749554634094238281e-01,7.654082179069519043e-01,4.247298538684844971e-01,-9.222404956817626953e-01,5.924930572509765625e-01,-4.563517570495605469e-01,4.454617500305175781e-01,8.115255832672119141e-01

+ 5 - 0
Tests/UnitTest/PregeneratedData/lstm_one_time_step/kernel_hidden.txt

@@ -0,0 +1,5 @@
+# 4,16
+-8.448026180267333984e-01,7.455093860626220703e-01,9.982140064239501953e-01,-5.447411537170410156e-01,-4.016518294811248779e-01,9.282579421997070312e-01,5.494186878204345703e-01,9.086000919342041016e-01,-9.351596832275390625e-01,2.694845013320446014e-02,-5.857713222503662109e-01,2.620928287506103516e-01,-4.740834534168243408e-01,1.456742286682128906e-01,-2.340421527624130249e-01,-4.935574159026145935e-02
+6.547029018402099609e-01,9.464161396026611328e-01,-8.134319782257080078e-01,-2.273547649383544922e-01,-3.333241939544677734e-01,8.911027908325195312e-01,-2.118239402770996094e-01,1.433076858520507812e-01,8.577368259429931641e-01,3.631171882152557373e-01,1.393716335296630859e-01,-3.205575942993164062e-01,9.487107396125793457e-01,-4.531400501728057861e-01,-6.236875057220458984e-01,-2.659442424774169922e-01
+1.637687683105468750e-01,-1.230232715606689453e-01,-9.694294929504394531e-01,-9.102644920349121094e-01,5.397343635559082031e-01,-7.163450121879577637e-01,-3.907885551452636719e-01,3.160523995757102966e-02,7.672070059925317764e-03,-9.941599369049072266e-01,-1.272349357604980469e-01,-3.388483524322509766e-01,9.227545857429504395e-01,-1.442611217498779297e-01,9.868490695953369141e-01,3.379237651824951172e-01
+2.686395645141601562e-01,5.435261726379394531e-01,3.255803585052490234e-01,-7.326985001564025879e-01,2.032494544982910156e-01,8.947310447692871094e-01,9.395681023597717285e-01,-3.781902790069580078e-01,9.825201034545898438e-01,-8.196819424629211426e-01,8.586456775665283203e-01,1.708818040788173676e-02,-6.075851917266845703e-01,-6.350450515747070312e-01,-4.700508117675781250e-01,-7.988870143890380859e-01

+ 1 - 0
Tests/UnitTest/README.md

@@ -118,6 +118,7 @@ The script use a concept of test data sets, i.e. it need a test set data name as
 When adding a new test data set, new c files should be added or existing c files should be updated to use the new data set. See overview of the folders on how/where to add new c files.
 
 As it is now, when adding a new test data set, you would first have to go and edit the script to configure the parameters as you want.
+A new test data set (a unit test) can added by just running the script with a new name for the test data setTODO
 Once you are happy with the new test data set, it should be added in the load_all_testdatasets() function.
 
 ## Overview of the Folders

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/cell_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_cell_gate_bias[3] = {-21063, -26279, -32542};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/cell_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_1_cell_norm_coeff = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/cell_state_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+int16_t lstm_1_cell_state[3] = {0, 0, 0};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/cell_to_forget_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int16_t *lstm_1_cell_to_forget = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/cell_to_input_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int16_t *lstm_1_cell_to_input = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/cell_to_output_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int16_t *lstm_1_cell_to_output = NULL;

+ 33 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/config_data.h

@@ -0,0 +1,33 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#define LSTM_1_BUFFER_SIZE 3
+#define LSTM_1_INPUT_BATCHES 1
+#define LSTM_1_DST_SIZE 30
+#define LSTM_1_TIME_STEPS 10
+#define LSTM_1_NUMBER_UNITS 3
+#define LSTM_1_NUMBER_INPUTS 5
+#define LSTM_1_TIME_MAJOR 1
+#define LSTM_1_IN_ACTIVATION_MIN -32768
+#define LSTM_1_IN_ACTIVATION_MAX 32767
+#define LSTM_1_IN_TO_INPUT_MULTIPLIER 2107483520
+#define LSTM_1_IN_TO_INPUT_SHIFT -3
+#define LSTM_1_IN_TO_FORGET_MULTIPLIER 2091162880
+#define LSTM_1_IN_TO_FORGET_SHIFT -3
+#define LSTM_1_IN_TO_CELL_MULTIPLIER 2141523200
+#define LSTM_1_IN_TO_CELL_SHIFT -3
+#define LSTM_1_IN_TO_OUTPUT_MULTIPLIER 1941012480
+#define LSTM_1_IN_TO_OUTPUT_SHIFT -3
+#define LSTM_1_RECURRENT_TO_INPUT_MULTIPLIER 2049062656
+#define LSTM_1_RECURRENT_TO_INPUT_SHIFT -7
+#define LSTM_1_RECURRENT_TO_FORGET_MULTIPLIER 2049554304
+#define LSTM_1_RECURRENT_TO_FORGET_SHIFT -7
+#define LSTM_1_RECURRENT_TO_CELL_MULTIPLIER 1470278784
+#define LSTM_1_RECURRENT_TO_CELL_SHIFT -7
+#define LSTM_1_RECURRENT_TO_OUTPUT_MULTIPLIER 1800092160
+#define LSTM_1_RECURRENT_TO_OUTPUT_SHIFT -7
+#define LSTM_1_HIDDEN_MULTIPLIER 1120502447
+#define LSTM_1_HIDDEN_SHIFT -17
+#define LSTM_1_HIDDEN_OFFSET 127
+#define LSTM_1_OUTPUT_STATE_OFFSET 127
+#define LSTM_1_CELL_STATE_SHIFT -16

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/forget_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_forget_gate_bias[3] = {11276, -23268, -31803};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/forget_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_1_forget_norm_coeff = NULL;

+ 8 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_data.h

@@ -0,0 +1,8 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_input[50] = {-107, 12, 109,  114, -74, -19, 66,  -13,  33, -86, -101, 103, -35, 94,   50,   100, 80,
+                                 32,   -3, -118, 87,  87,  115, 113, 79,   73, 30,  -33,  118, -86, -119, -101, 17,  92,
+                                 -71,  -1, -7,   -91, -40, -14, 2,   -105, 85, 2,   -127, -95, -29, 108,  13,   21};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_input_gate_bias[3] = {-9823, -15275, 20806};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_1_input_norm_coeff = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_cell_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_input_to_cell_eff_bias[3] = {-60999, -14887, -28446};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_cell_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_input_to_cell_w[15] = {-111, -91, 100, -85, -125, -120, 127, -122, 117, 87, 53, -107, 31, 105, -50};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_forget_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_input_to_forget_eff_bias[3] = {-10228, -9444, -54715};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_forget_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_input_to_forget_w[15] = {-126, -82, -36, 11, 65, 26, 96, -125, 68, 43, -50, -127, -17, 70, -55};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_input_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_input_to_input_eff_bias[3] = {-41567, -40875, 10310};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_input_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_input_to_input_w[15] = {-38, -84, -21, -12, -93, -127, 22, 24, -116, -3, 4, -55, 97, -87, -41};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_output_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_input_to_output_eff_bias[3] = {-13575, -41499, -81259};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/input_to_output_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_input_to_output_w[15] = {86, -107, -20, 43, 46, 42, -46, -69, -41, 26, -91, -6, -59, -89, -127};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/output_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_output_gate_bias[3] = {-19719, -30235, -33643};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/output_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_1_output_norm_coeff = NULL;

+ 7 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/output_ref_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_output_ref[30] = {-128, 92, 10, -128, 80, -62, -108, 127, 3,   -128, 4,   -36, -106, 92, 80,
+                                      -128, 87, 65, -128, 50, 36,  -128, 25,  -81, -128, -67, 47,  -128, 1,  -28};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/output_state_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+int8_t lstm_1_output_state[3] = {127, 127, 127};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/projection_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t *lstm_1_projection_bias = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/projection_weights_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_1_projection_weights = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_cell_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_recurrent_input_to_cell_w[9] = {100, -67, 49, 127, -73, -3, -17, 46, -121};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_forget_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_recurrent_input_to_forget_w[9] = {-42, -49, -127, -65, -13, -67, -50, 67, 69};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_input_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_recurrent_input_to_input_w[9] = {36, 87, -20, 57, -13, -127, 3, -68, 46};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_input_to_output_w_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_1_recurrent_input_to_output_w[9] = {127, -2, 67, -47, -32, 107, 69, 102, -80};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_cell_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_recurrent_to_cell_eff_bias[3] = {-10414, -6477, 11684};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_forget_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_recurrent_to_forget_eff_bias[3] = {27686, 18415, -10922};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_input_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_recurrent_to_input_eff_bias[3] = {-13081, 10541, 2413};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/recurrent_to_output_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_1_recurrent_to_output_eff_bias[3] = {-24384, -3556, -11557};

+ 36 - 0
Tests/UnitTest/TestCases/TestData/lstm_1/test_data.h

@@ -0,0 +1,36 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#include "cell_gate_bias_data.h"
+#include "cell_norm_coeff_data.h"
+#include "cell_state_data.h"
+#include "cell_to_forget_data.h"
+#include "cell_to_input_data.h"
+#include "cell_to_output_data.h"
+#include "config_data.h"
+#include "forget_gate_bias_data.h"
+#include "forget_norm_coeff_data.h"
+#include "input_data.h"
+#include "input_gate_bias_data.h"
+#include "input_norm_coeff_data.h"
+#include "input_to_cell_eff_bias_data.h"
+#include "input_to_cell_w_data.h"
+#include "input_to_forget_eff_bias_data.h"
+#include "input_to_forget_w_data.h"
+#include "input_to_input_eff_bias_data.h"
+#include "input_to_input_w_data.h"
+#include "input_to_output_eff_bias_data.h"
+#include "input_to_output_w_data.h"
+#include "output_gate_bias_data.h"
+#include "output_norm_coeff_data.h"
+#include "output_ref_data.h"
+#include "output_state_data.h"
+#include "projection_bias_data.h"
+#include "projection_weights_data.h"
+#include "recurrent_input_to_cell_w_data.h"
+#include "recurrent_input_to_forget_w_data.h"
+#include "recurrent_input_to_input_w_data.h"
+#include "recurrent_input_to_output_w_data.h"
+#include "recurrent_to_cell_eff_bias_data.h"
+#include "recurrent_to_forget_eff_bias_data.h"
+#include "recurrent_to_input_eff_bias_data.h"
+#include "recurrent_to_output_eff_bias_data.h"

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/cell_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_cell_gate_bias[5] = {-24791, 14258, -25101, -3140, 18061};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/cell_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_2_cell_norm_coeff = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/cell_state_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+int16_t lstm_2_cell_state[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/cell_to_forget_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int16_t *lstm_2_cell_to_forget = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/cell_to_input_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int16_t *lstm_2_cell_to_input = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/cell_to_output_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int16_t *lstm_2_cell_to_output = NULL;

+ 33 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/config_data.h

@@ -0,0 +1,33 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#define LSTM_2_BUFFER_SIZE 10
+#define LSTM_2_INPUT_BATCHES 2
+#define LSTM_2_DST_SIZE 90
+#define LSTM_2_TIME_STEPS 9
+#define LSTM_2_NUMBER_UNITS 5
+#define LSTM_2_NUMBER_INPUTS 6
+#define LSTM_2_TIME_MAJOR 0
+#define LSTM_2_IN_ACTIVATION_MIN -32768
+#define LSTM_2_IN_ACTIVATION_MAX 32767
+#define LSTM_2_IN_TO_INPUT_MULTIPLIER 1078890624
+#define LSTM_2_IN_TO_INPUT_SHIFT -2
+#define LSTM_2_IN_TO_FORGET_MULTIPLIER 1084586752
+#define LSTM_2_IN_TO_FORGET_SHIFT -2
+#define LSTM_2_IN_TO_CELL_MULTIPLIER 1083183360
+#define LSTM_2_IN_TO_CELL_SHIFT -2
+#define LSTM_2_IN_TO_OUTPUT_MULTIPLIER 2054982400
+#define LSTM_2_IN_TO_OUTPUT_SHIFT -3
+#define LSTM_2_RECURRENT_TO_INPUT_MULTIPLIER 1656148352
+#define LSTM_2_RECURRENT_TO_INPUT_SHIFT -4
+#define LSTM_2_RECURRENT_TO_FORGET_MULTIPLIER 1804845056
+#define LSTM_2_RECURRENT_TO_FORGET_SHIFT -4
+#define LSTM_2_RECURRENT_TO_CELL_MULTIPLIER 1732539648
+#define LSTM_2_RECURRENT_TO_CELL_SHIFT -4
+#define LSTM_2_RECURRENT_TO_OUTPUT_MULTIPLIER 1802525568
+#define LSTM_2_RECURRENT_TO_OUTPUT_SHIFT -4
+#define LSTM_2_HIDDEN_MULTIPLIER 1287112152
+#define LSTM_2_HIDDEN_SHIFT -20
+#define LSTM_2_HIDDEN_OFFSET -94
+#define LSTM_2_OUTPUT_STATE_OFFSET -94
+#define LSTM_2_CELL_STATE_SHIFT -14

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/forget_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_forget_gate_bias[5] = {20451, -31339, 21957, -13489, -1330};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/forget_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_2_forget_norm_coeff = NULL;

+ 11 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_data.h

@@ -0,0 +1,11 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_input[108] = {
+    -23, 48,  70,  36,  -124, 86,   92,  15,  105,  -46, 87,  -45, 108, -44, -40, 2,    -117, -75,  114, 66,   52,  -99,
+    46,  113, -83, 111, 30,   18,   67,  -60, 104,  -33, -70, 20,  32,  67,  -85, -113, 38,   -120, 31,  87,   -44, -75,
+    -87, 70,  76,  42,  -117, -26,  -62, -85, -13,  -59, 97,  20,  67,  95,  -24, -6,   36,   92,   -97, -104, -98, -90,
+    104, 47,  -78, 44,  -112, -128, 126, 15,  124,  23,  -57, 94,  117, -24, -81, 112,  4,    26,   74,  -81,  31,  11,
+    -67, 22,  117, 116, -61,  -54,  70,  -67, -105, -30, 33,  26,  124, 27,  80,  38,   53,   -124, 52,  -75};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_input_gate_bias[5] = {-1328, 15967, -8205, 9440, 19177};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_2_input_norm_coeff = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_cell_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_input_to_cell_eff_bias[5] = {20777, 67762, -30861, 7996, 27789};

+ 7 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_cell_w_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_input_to_cell_w[30] = {119, 24,  21,  70, -4,   126, 124, 127, 78,  2,   -9, 96, 39,  -27, 108,
+                                           -30, -56, -79, 18, -102, 127, 66,  75,  -97, -87, 48, 95, 101, -31, -50};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_forget_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_input_to_forget_eff_bias[5] = {11491, -41963, -6587, -8497, -29106};

+ 7 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_forget_w_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_input_to_forget_w[30] = {20,  -31, -21, 43, 22, -103, -36, 76, -56, -20, -37,  -10, -72, -127, -2,
+                                             -37, 10,  5,   11, 88, -78,  37,  40, -59, -51, -117, -90, 57,  65,   -81};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_input_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_input_to_input_eff_bias[5] = {-24752, 52959, -47501, -13856, -33303};

+ 8 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_input_w_data.h

@@ -0,0 +1,8 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_input_to_input_w[30] = {21,  41,   -37,  -34, -110, -64,  117,  11,  -40, -28,
+                                            127, 102,  -107, -75, 42,   -85,  -33,  -49, -52, 125,
+                                            -85, -125, -37,  -8,  -103, -116, -117, -53, 17,  -38};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_output_eff_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_input_to_output_eff_bias[5] = {-16627, -10073, 16827, -29754, 39788};

+ 8 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/input_to_output_w_data.h

@@ -0,0 +1,8 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_input_to_output_w[30] = {-100, -31, -40, -80, 35,  106, 17, -113, -45,  8,
+                                             103,  -4,  114, -62, -22, 15,  99, 45,   -127, 22,
+                                             -111, -69, -80, -85, 26,  56,  58, 85,   -55,  -124};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/output_gate_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t lstm_2_output_gate_bias[5] = {-2547, -5721, -7365, 27846, 33900};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/output_norm_coeff_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_2_output_norm_coeff = NULL;

+ 11 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/output_ref_data.h

@@ -0,0 +1,11 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_output_ref[90] = {-30, 18,  -128, -102, -24, -61, 123, -128, -66,  -20, -62,  99,  -128, -51, 20,
+                                      -36, 114, -128, -82,  -56, -85, 83,  -128, -35,  39,  -45,  127, -128, -75, -31,
+                                      -73, 127, -128, -53,  7,   -65, 127, -128, -59,  46,  -124, 98,  -128, -24, 80,
+                                      -54, 70,  -123, -78,  -34, -79, 57,  -128, -128, 9,   -69,  69,  -126, -63, 10,
+                                      -46, 81,  -125, -80,  -43, -50, 127, -128, -75,  -6,  -50,  122, -128, -66, 8,
+                                      -60, 124, -128, -61,  -19, -74, 127, -128, -51,  46,  -75,  123, -128, -40, -8};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/output_state_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+int8_t lstm_2_output_state[10] = {-94, -94, -94, -94, -94, -94, -94, -94, -94, -94};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/projection_bias_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int32_t *lstm_2_projection_bias = NULL;

+ 6 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/projection_weights_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t *lstm_2_projection_weights = NULL;

+ 7 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_cell_w_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_recurrent_input_to_cell_w[25] = {-6,  -93, 3,  99, 46,  -17, 91,  -58, -127, -122, 115, 54, 127,
+                                                     -37, -32, 92, 53, -81, -40, 124, 92,  62,   -27,  65,  41};

+ 7 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_forget_w_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_recurrent_input_to_forget_w[25] = {11,  40,   37,   54,  -13,  73,  -26, -33, 22,  127, -69, 72, 95,
+                                                       -15, -106, -106, -94, -106, 109, 81,  40,  -29, 93,  -79, 22};

+ 7 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_input_w_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_recurrent_input_to_input_w[25] = {-91, -6, -84, -90, -25, 110, 51, 95,  -93, 86, -96, 37, -52,
+                                                      12,  41, 33,  1,   114, -71, 71, 127, 112, -8, 85,  -99};

+ 7 - 0
Tests/UnitTest/TestCases/TestData/lstm_2/recurrent_input_to_output_w_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using tensorflow version 2.9.1 (Keras version 2.9.0).
+// Interpreter from tflite_runtime version 2.11.0 and revision 0.6.0-135805-g25bfb32e73a.
+#pragma once
+#include <stdint.h>
+
+const int8_t lstm_2_recurrent_input_to_output_w[25] = {
+    -68, -94, -121, 75, -2, 36, 58, 110, 38, 55, -40, 19, -63, -1, -127, 9, 3, -117, 108, -122, 71, -105, -25, 108, -2};

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است