3 жил өмнө · 856cf46ba3
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc
@@ -23,6 +23,7 @@
 
				        - Support for int16 average and max pooling for reference implementation
			
 
				        - Support for elementwise add and mul int16 scalar version
			
 
				        - Support for softmax int16 scalar version
			
 
				+       - Support for SVDF with 8 bit state tensor
			
 
				       CMSIS-RTOS2:
			
 
				         - RTX 5.5.4 (see revision history for details)
			
 
				     </release>
			
@@ -2242,6 +2243,7 @@ and 8-bit Java bytecodes in Jazelle state.
 
				         <file category="source" name="CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c"/>
			
 
				+        <file category="source" name="CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c"/>
			
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
 
				  * Title:        arm_nnfunctions.h
			
 
				  * Description:  Public header file for CMSIS NN Library
			
 
				  *
			
 
				- * $Date:        7 April 2022
			
 
				- * $Revision:    V.8.1.2
			
 
				+ * $Date:        19 April 2022
			
 
				+ * $Revision:    V.9.0.0
			
 
				  *
			
 
				  * Target Processor:  Cortex-M CPUs
			
 
				  * -------------------------------------------------------------------- */
			
@@ -2430,7 +2430,7 @@ void arm_concatenation_s8_w(const int8_t *input,
 
				  */
			
 
				 
			
 
				 /**
			
 
				- * @brief s8 SVDF function
			
 
				+ * @brief s8 SVDF function with 8 bit state tensor and 8 bit time weights
			
 
				  *
			
 
				  * @param[in]   input_ctx Temporary scratch buffer
			
 
				  * @param[in]   output_ctx Temporary output scratch buffer
			
@@ -2467,16 +2467,64 @@ arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
 
				                        const cmsis_nn_dims *input_dims,
			
 
				                        const q7_t *input_data,
			
 
				                        const cmsis_nn_dims *state_dims,
			
 
				-                       q15_t *state_data,
			
 
				+                       q7_t *state_data,
			
 
				                        const cmsis_nn_dims *weights_feature_dims,
			
 
				                        const q7_t *weights_feature_data,
			
 
				                        const cmsis_nn_dims *weights_time_dims,
			
 
				-                       const q15_t *weights_time_data,
			
 
				+                       const q7_t *weights_time_data,
			
 
				                        const cmsis_nn_dims *bias_dims,
			
 
				                        const q31_t *bias_data,
			
 
				                        const cmsis_nn_dims *output_dims,
			
 
				                        q7_t *output_data);
			
 
				 
			
 
				+/**
			
 
				+ * @brief s8 SVDF function with 16 bit state tensor and 16 bit time weights
			
 
				+ *
			
 
				+ * @param[in]   input_ctx Temporary scratch buffer
			
 
				+ * @param[in]   output_ctx Temporary output scratch buffer
			
 
				+ * @param[in]   svdf_params SVDF Parameters
			
 
				+ *              Range of svdf_params->input_offset  : [-128, 127]
			
 
				+ *              Range of svdf_params->output_offset  : [-128, 127]
			
 
				+ * @param[in]   input_quant_params Input quantization parameters
			
 
				+ * @param[in]   output_quant_params Output quantization parameters
			
 
				+ * @param[in]   input_dims Input tensor dimensions
			
 
				+ * @param[in]   input_data Pointer to input tensor
			
 
				+ * @param[in]   state_dims State tensor dimensions
			
 
				+ * @param[in]   state_data Pointer to state tensor
			
 
				+ * @param[in]   weights_feature_dims Weights (feature) tensor dimensions
			
 
				+ * @param[in]   weights_feature_data Pointer to the weights (feature) tensor
			
 
				+ * @param[in]   weights_time_dims Weights (time) tensor dimensions
			
 
				+ * @param[in]   weights_time_data Pointer to the weights (time) tensor
			
 
				+ * @param[in]   bias_dims Bias tensor dimensions
			
 
				+ * @param[in]   bias_data Pointer to bias tensor
			
 
				+ * @param[in]   output_dims Output tensor dimensions
			
 
				+ * @param[out]  output_data Pointer to the output tensor
			
 
				+ *
			
 
				+ * @return     The function returns <code>ARM_MATH_SUCCESS</code>
			
 
				+ *
			
 
				+ * @details
			
 
				+ *    1. Supported framework: TensorFlow Lite micro
			
 
				+ *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
			
 
				+ *
			
 
				+ */
			
 
				+arm_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
			
 
				+                                 const cmsis_nn_context *output_ctx,
			
 
				+                                 const cmsis_nn_svdf_params *svdf_params,
			
 
				+                                 const cmsis_nn_per_tensor_quant_params *input_quant_params,
			
 
				+                                 const cmsis_nn_per_tensor_quant_params *output_quant_params,
			
 
				+                                 const cmsis_nn_dims *input_dims,
			
 
				+                                 const q7_t *input_data,
			
 
				+                                 const cmsis_nn_dims *state_dims,
			
 
				+                                 q15_t *state_data,
			
 
				+                                 const cmsis_nn_dims *weights_feature_dims,
			
 
				+                                 const q7_t *weights_feature_data,
			
 
				+                                 const cmsis_nn_dims *weights_time_dims,
			
 
				+                                 const q15_t *weights_time_data,
			
 
				+                                 const cmsis_nn_dims *bias_dims,
			
 
				+                                 const q31_t *bias_data,
			
 
				+                                 const cmsis_nn_dims *output_dims,
			
 
				+                                 q7_t *output_data);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/CMSIS/NN/Include/arm_nnsupportfunctions.h
@@ -21,9 +21,8 @@
 
				  * Title:        arm_nnsupportfunctions.h
			
 
				  * Description:  Public header file of support functions for CMSIS NN Library
			
 
				  *
			
 
				-
			
 
				- * $Date:        16. March 2022
			
 
				- * $Revision:    V.6.2.1
			
 
				+ * $Date:        16 March 2022
			
 
				+ * $Revision:    V.7.0.0
			
 
				  *
			
 
				  * Target Processor:  Cortex-M CPUs
			
 
				  * -------------------------------------------------------------------- */
			
@@ -388,6 +387,8 @@ arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
 
				  * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
			
 
				  * @param[in]      activation_min  Minimum value to clamp the output to. Range: int8
			
 
				  * @param[in]      activation_max  Maximum value to clamp the output to. Range: int8
			
 
				+ * @param[in]      address_offset  Memory position offset for dst. First output is stored at 'dst', the
			
 
				+ *                                 second at 'dst + address_offset' and so on. Default value is typically 1.
			
 
				  *
			
 
				  * @return         The function returns <code>ARM_MATH_SUCCESS</code>
			
 
				  *
			
@@ -404,7 +405,8 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				                                     const int32_t rhs_cols,
			
 
				                                     const int32_t rhs_rows,
			
 
				                                     const int32_t activation_min,
			
 
				-                                    const int32_t activation_max);
			
 
				+                                    const int32_t activation_max,
			
 
				+                                    const int32_t address_offset);
			
 
				 
			
 
				 /**
			
 
				  * @brief s16 Vector by Matrix (transposed) multiplication
			
--- a/CMSIS/NN/README.md
+++ b/CMSIS/NN/README.md
@@ -53,6 +53,7 @@ Group | API | Base Operator | Input Constraints | Additional memory required for
 
				 ||arm_softmax_u8()| SOFTMAX | None | None | No | No | Bit exact to TFLu |
			
 
				 |[SVDF](https://arm-software.github.io/CMSIS_5/NN/html/group__SVDF.html)||||| |  ||
			
 
				 ||arm_svdf_s8()| SVDF | None | None | Yes | Yes | Bit exact to TFLu |
			
 
				+||arm_svdf_state_s16_s8()| SVDF | None | None | Yes | Yes | Bit exact to TFLu |
			
 
				 |[Misc](https://arm-software.github.io/CMSIS_5/NN/html/group__groupNN.html)||||| |  ||
			
 
				 ||arm_reshape_s8()| SOFTMAX | None | None | No | No | |
			
 
				 ||arm_elementwise_add_s8()| ELEMENTWISE ADD | None | None | Yes| Yes| Reshape is not done in this function <br/> Only minor improvements are expected |
			
--- a/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
+++ b/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
			
 
				+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -21,8 +21,8 @@
 
				  * Title:        arm_fully_connected_s8
			
 
				  * Description:  Fully connected function compatible with TF Lite.
			
 
				  *
			
 
				- * $Date:        19. March 2021
			
 
				- * $Revision:    V.3.0.0
			
 
				+ * $Date:        8 April 2022
			
 
				+ * $Revision:    V.3.1.0
			
 
				  *
			
 
				  * Target Processor:  Cortex-M and Cortex-A cores
			
 
				  *
			
@@ -79,7 +79,8 @@ arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
 
				                                  filter_dims->n, /* col_dim or accum_depth */
			
 
				                                  output_dims->c, /* row_dim or output_depth */
			
 
				                                  fc_params->activation.min,
			
 
				-                                 fc_params->activation.max);
			
 
				+                                 fc_params->activation.max,
			
 
				+                                 1L);
			
 
				         input += filter_dims->n;
			
 
				         output += output_dims->c;
			
 
				         batch_cnt--;
			
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
			
 
				+ * Copyright (C) 2020-2022 Arm Limited or its affiliates.
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -21,8 +21,8 @@
 
				  * Title:        arm_nn_vec_mat_mult_t_s8
			
 
				  * Description:  s8 vector by matrix (transposed) multiplication
			
 
				  *
			
 
				- * $Date:        19. August 2021
			
 
				- * $Revision:    V.2.5.2
			
 
				+ * $Date:        16 March 2022
			
 
				+ * $Revision:    V.3.0.0
			
 
				  *
			
 
				  * Target Processor:  Cortex-M
			
 
				  *
			
@@ -57,11 +57,13 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				                                     const int32_t rhs_cols,
			
 
				                                     const int32_t rhs_rows,
			
 
				                                     const int32_t activation_min,
			
 
				-                                    const int32_t activation_max)
			
 
				+                                    const int32_t activation_max,
			
 
				+                                    const int32_t address_offset)
			
 
				 {
			
 
				     (void)rhs_offset;
			
 
				 #if defined(ARM_MATH_MVEI)
			
 
				     const int32_t row_loop_cnt = rhs_rows / 3;
			
 
				+    const uint32x4_t address_offset_array = {0, address_offset, address_offset * 2, address_offset * 3};
			
 
				 
			
 
				     for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++)
			
 
				     {
			
@@ -123,8 +125,9 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				         acc = vaddq_s32(acc, vdupq_n_s32(dst_offset));
			
 
				         acc = vmaxq_s32(acc, vdupq_n_s32(activation_min));
			
 
				         acc = vminq_s32(acc, vdupq_n_s32(activation_max));
			
 
				-        vstrbq_p_s32(dst, acc, p);
			
 
				-        dst += 3;
			
 
				+
			
 
				+        vstrbq_scatter_offset_s32(dst, address_offset_array, acc);
			
 
				+        dst += 3 * address_offset;
			
 
				     }
			
 
				 
			
 
				     const int loop_cnt = rhs_rows % 3;
			
@@ -165,14 +168,12 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				         // Clamp the result
			
 
				         acc_0 = MAX(acc_0, activation_min);
			
 
				         *dst = MIN(acc_0, activation_max);
			
 
				-        dst++;
			
 
				+        dst += address_offset;
			
 
				     }
			
 
				 
			
 
				 #elif defined(ARM_MATH_DSP)
			
 
				     const int32_t row_loop_cnt = rhs_rows / 2;
			
 
				-
			
 
				     const int16_t lhs_offset_s16 = (int16_t)lhs_offset;
			
 
				-
			
 
				     const uint32_t lhs_offset_s16x2 = __PKHBT(lhs_offset_s16, lhs_offset_s16, 16);
			
 
				 
			
 
				     for (int32_t i = 0; i < row_loop_cnt; i++)
			
@@ -235,9 +236,9 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				         acc_0 = MIN(acc_0, activation_max);
			
 
				         acc_1 = MAX(acc_1, activation_min);
			
 
				         acc_1 = MIN(acc_1, activation_max);
			
 
				-
			
 
				-        *dst++ = (q7_t)acc_0;
			
 
				-        *dst++ = (q7_t)acc_1;
			
 
				+        *dst = (int8_t)acc_0;
			
 
				+        *(dst + address_offset) = (int8_t)acc_1;
			
 
				+        dst += 2 * address_offset;
			
 
				     }
			
 
				 
			
 
				     if (rhs_rows & 0x1)
			
@@ -281,8 +282,8 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				         // Clamp the result
			
 
				         acc_0 = MAX(acc_0, activation_min);
			
 
				         acc_0 = MIN(acc_0, activation_max);
			
 
				-
			
 
				-        *dst++ = (q7_t)acc_0;
			
 
				+        *dst = (int8_t)acc_0;
			
 
				+        dst += address_offset;
			
 
				     }
			
 
				 
			
 
				 #else
			
@@ -339,9 +340,10 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				         res02 = MAX(res02, activation_min);
			
 
				         res02 = MIN(res02, activation_max);
			
 
				 
			
 
				-        *dst++ = (q7_t)res00;
			
 
				-        *dst++ = (q7_t)res01;
			
 
				-        *dst++ = (q7_t)res02;
			
 
				+        *dst = (q7_t)res00;
			
 
				+        *(dst + address_offset) = (q7_t)res01;
			
 
				+        *(dst + 2 * address_offset) = (q7_t)res02;
			
 
				+        dst += 3 * address_offset;
			
 
				 
			
 
				         rhs += 3 * rhs_cols;
			
 
				     }
			
@@ -380,7 +382,8 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
				         res00 = MAX(res00, activation_min);
			
 
				         res00 = MIN(res00, activation_max);
			
 
				 
			
 
				-        *dst++ = (q7_t)res00;
			
 
				+        *dst = (int8_t)res00;
			
 
				+        dst += address_offset;
			
 
				         rhs += rhs_cols;
			
 
				     }
			
 
				 #endif
			
--- a/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c
+++ b/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2021 Arm Limited or its affiliates.
			
 
				+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -21,8 +21,8 @@
 
				  * Title:        arm_svdf_s8.c
			
 
				  * Description:  S8 basic SVDF layer function
			
 
				  *
			
 
				- * $Date:        17. August 2021
			
 
				- * $Revision:    V.1.5.1
			
 
				+ * $Date:        16 March 2022
			
 
				+ * $Revision:    V.3.0.0
			
 
				  *
			
 
				  * Target Processor:  Cortex-M processors
			
 
				  *
			
@@ -41,7 +41,7 @@
 
				  */
			
 
				 
			
 
				 /*
			
 
				- * S8 SVDF layer function for TensorFlow Lite
			
 
				+ * S8 SVDF layer function for TensorFlow Lite with 8 bit state tensor
			
 
				  *
			
 
				  * Refer to header file for details.
			
 
				  *
			
@@ -55,11 +55,11 @@ arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
 
				                        const cmsis_nn_dims *input_dims,
			
 
				                        const q7_t *input_data,
			
 
				                        const cmsis_nn_dims *state_dims,
			
 
				-                       q15_t *state_data,
			
 
				+                       q7_t *state_data,
			
 
				                        const cmsis_nn_dims *weights_feature_dims,
			
 
				                        const q7_t *weights_feature_data,
			
 
				                        const cmsis_nn_dims *weights_time_dims,
			
 
				-                       const q15_t *weights_time_data,
			
 
				+                       const q7_t *weights_time_data,
			
 
				                        const cmsis_nn_dims *bias_dims,
			
 
				                        const q31_t *bias_data,
			
 
				                        const cmsis_nn_dims *output_dims,
			
@@ -99,28 +99,30 @@ arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
 
				     }
			
 
				     q31_t *buffer_b = (q31_t *)output_ctx->buf;
			
 
				 
			
 
				-    memmove((q15_t *)state_data,
			
 
				-            (q15_t *)state_data + 1,
			
 
				-            (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t)));
			
 
				+    memmove((int8_t *)state_data,
			
 
				+            (int8_t *)state_data + 1,
			
 
				+            (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int8_t)));
			
 
				 
			
 
				     for (int i_batch = 0; i_batch < input_batches; i_batch++)
			
 
				     {
			
 
				-        q15_t *res_ptr = state_data + (time_batches * i_batch * feature_batches) + (time_batches - 1);
			
 
				+        q7_t *res_ptr = state_data + (time_batches * i_batch * feature_batches) + (time_batches - 1);
			
 
				         const q7_t *weight = weights_feature_data;
			
 
				         const q7_t *input = input_data + i_batch * input_height;
			
 
				 
			
 
				-        arm_status res = arm_nn_vec_mat_mult_t_svdf_s8(input,
			
 
				-                                                       weight,
			
 
				-                                                       res_ptr,
			
 
				-                                                       -zp_in,
			
 
				-                                                       0,
			
 
				-                                                       time_batches,
			
 
				-                                                       multiplier_in,
			
 
				-                                                       shift_in,
			
 
				-                                                       input_height,
			
 
				-                                                       feature_batches,
			
 
				-                                                       in_activation_min,
			
 
				-                                                       in_activation_max);
			
 
				+        arm_status res = arm_nn_vec_mat_mult_t_s8(input,
			
 
				+                                                  weight,
			
 
				+                                                  NULL,
			
 
				+                                                  res_ptr,
			
 
				+                                                  -zp_in,
			
 
				+                                                  0,
			
 
				+                                                  0,
			
 
				+                                                  multiplier_in,
			
 
				+                                                  shift_in,
			
 
				+                                                  input_height,
			
 
				+                                                  feature_batches,
			
 
				+                                                  in_activation_min,
			
 
				+                                                  in_activation_max,
			
 
				+                                                  time_batches);
			
 
				 
			
 
				         if (res != ARM_MATH_SUCCESS)
			
 
				         {
			
@@ -130,10 +132,10 @@ arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
 
				 
			
 
				     {
			
 
				         q31_t *ptr_a = buffer_a;
			
 
				-        const q15_t *v2 = state_data;
			
 
				+        const int8_t *v2 = state_data;
			
 
				         for (int i_batch = 0; i_batch < input_batches; i_batch++)
			
 
				         {
			
 
				-            const q15_t *v1 = weights_time_data;
			
 
				+            const int8_t *v1 = weights_time_data;
			
 
				 
			
 
				             for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++)
			
 
				             {
			
@@ -145,8 +147,12 @@ arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
 
				                 for (int i = 0; i < block_count; i++)
			
 
				                 {
			
 
				                     j += 2;
			
 
				-                    q31_t r1 = arm_nn_read_q15x2_ia(&v1);
			
 
				-                    q31_t r2 = arm_nn_read_q15x2_ia(&v2);
			
 
				+
			
 
				+                    q31_t r1 = arm_nn_read_q7x4_ia(&v1);
			
 
				+                    r1 = __SXTB16(r1);
			
 
				+
			
 
				+                    q31_t r2 = arm_nn_read_q7x4_ia(&v2);
			
 
				+                    r2 = __SXTB16(r2);
			
 
				 
			
 
				                     sum = __SMLAD(r1, r2, sum);
			
 
				                 }
			
--- a/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c
+++ b/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c
@@ -0,0 +1,263 @@
 
				+/*
			
 
				+ * Copyright (C) 2022 Arm Limited or its affiliates.
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the License); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
			
 
				+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/* ----------------------------------------------------------------------
			
 
				+ * Project:      CMSIS NN Library
			
 
				+ * Title:        arm_svdf_s8.c
			
 
				+ * Description:  S8 basic SVDF layer function
			
 
				+ *
			
 
				+ * $Date:        8 April 2022
			
 
				+ * $Revision:    V.1.0.0
			
 
				+ *
			
 
				+ * Target Processor:  Cortex-M processors
			
 
				+ *
			
 
				+ * -------------------------------------------------------------------- */
			
 
				+
			
 
				+#include "arm_nnfunctions.h"
			
 
				+#include "arm_nnsupportfunctions.h"
			
 
				+
			
 
				+/**
			
 
				+ * @ingroup groupNN
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * @addtogroup SVDF
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * S8 SVDF layer function for TensorFlow Lite with 16 bit state tensor
			
 
				+ *
			
 
				+ * Refer to header file for details.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+arm_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
			
 
				+                                 const cmsis_nn_context *output_ctx,
			
 
				+                                 const cmsis_nn_svdf_params *svdf_params,
			
 
				+                                 const cmsis_nn_per_tensor_quant_params *input_quant_params,
			
 
				+                                 const cmsis_nn_per_tensor_quant_params *output_quant_params,
			
 
				+                                 const cmsis_nn_dims *input_dims,
			
 
				+                                 const q7_t *input_data,
			
 
				+                                 const cmsis_nn_dims *state_dims,
			
 
				+                                 q15_t *state_data,
			
 
				+                                 const cmsis_nn_dims *weights_feature_dims,
			
 
				+                                 const q7_t *weights_feature_data,
			
 
				+                                 const cmsis_nn_dims *weights_time_dims,
			
 
				+                                 const q15_t *weights_time_data,
			
 
				+                                 const cmsis_nn_dims *bias_dims,
			
 
				+                                 const q31_t *bias_data,
			
 
				+                                 const cmsis_nn_dims *output_dims,
			
 
				+                                 q7_t *output_data)
			
 
				+{
			
 
				+    (void)bias_dims;
			
 
				+    (void)state_dims;
			
 
				+    (void)output_dims;
			
 
				+
			
 
				+    const q31_t multiplier_in = input_quant_params->multiplier;
			
 
				+    const q31_t shift_in = input_quant_params->shift;
			
 
				+    const q31_t multiplier_out = output_quant_params->multiplier;
			
 
				+    const q31_t shift_2 = output_quant_params->shift;
			
 
				+    const int32_t zp_in = svdf_params->input_offset;
			
 
				+    const int32_t zp_out = svdf_params->output_offset;
			
 
				+    const int32_t in_activation_min = svdf_params->input_activation.min;
			
 
				+    const int32_t in_activation_max = svdf_params->input_activation.max;
			
 
				+    const int32_t out_activation_min = svdf_params->output_activation.min;
			
 
				+    const int32_t out_activation_max = svdf_params->output_activation.max;
			
 
				+    const int16_t rank = svdf_params->rank;
			
 
				+
			
 
				+    const int32_t input_batches = input_dims->n;
			
 
				+    const int32_t input_height = input_dims->h;
			
 
				+    const int32_t feature_batches = weights_feature_dims->n;
			
 
				+    const int32_t time_batches = weights_time_dims->h;
			
 
				+    const int32_t unit_count = feature_batches / rank;
			
 
				+
			
 
				+    if (input_ctx->buf == NULL)
			
 
				+    {
			
 
				+        return ARM_MATH_ARGUMENT_ERROR;
			
 
				+    }
			
 
				+    q31_t *buffer_a = (q31_t *)input_ctx->buf;
			
 
				+
			
 
				+    if (output_ctx->buf == NULL)
			
 
				+    {
			
 
				+        return ARM_MATH_ARGUMENT_ERROR;
			
 
				+    }
			
 
				+    q31_t *buffer_b = (q31_t *)output_ctx->buf;
			
 
				+
			
 
				+    memmove((q15_t *)state_data,
			
 
				+            (q15_t *)state_data + 1,
			
 
				+            (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t)));
			
 
				+
			
 
				+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
			
 
				+    {
			
 
				+        q15_t *res_ptr = state_data + (time_batches * i_batch * feature_batches) + (time_batches - 1);
			
 
				+        const q7_t *weight = weights_feature_data;
			
 
				+        const q7_t *input = input_data + i_batch * input_height;
			
 
				+
			
 
				+        arm_status res = arm_nn_vec_mat_mult_t_svdf_s8(input,
			
 
				+                                                       weight,
			
 
				+                                                       res_ptr,
			
 
				+                                                       -zp_in,
			
 
				+                                                       0,
			
 
				+                                                       time_batches,
			
 
				+                                                       multiplier_in,
			
 
				+                                                       shift_in,
			
 
				+                                                       input_height,
			
 
				+                                                       feature_batches,
			
 
				+                                                       in_activation_min,
			
 
				+                                                       in_activation_max);
			
 
				+
			
 
				+        if (res != ARM_MATH_SUCCESS)
			
 
				+        {
			
 
				+            return res;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    {
			
 
				+        q31_t *ptr_a = buffer_a;
			
 
				+        const q15_t *v2 = state_data;
			
 
				+        for (int i_batch = 0; i_batch < input_batches; i_batch++)
			
 
				+        {
			
 
				+            const q15_t *v1 = weights_time_data;
			
 
				+
			
 
				+            for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++)
			
 
				+            {
			
 
				+                *ptr_a = 0;
			
 
				+                int32_t sum = 0;
			
 
				+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
			
 
				+                int j = 0;
			
 
				+                int32_t block_count = time_batches >> 1;
			
 
				+                for (int i = 0; i < block_count; i++)
			
 
				+                {
			
 
				+                    j += 2;
			
 
				+                    q31_t r1 = arm_nn_read_q15x2_ia(&v1);
			
 
				+                    q31_t r2 = arm_nn_read_q15x2_ia(&v2);
			
 
				+
			
 
				+                    sum = __SMLAD(r1, r2, sum);
			
 
				+                }
			
 
				+
			
 
				+                // Process the remaining data
			
 
				+                for (; j < time_batches; j++)
			
 
				+                {
			
 
				+                    sum += *v1 * *v2;
			
 
				+                    v1++;
			
 
				+                    v2++;
			
 
				+                }
			
 
				+#else
			
 
				+                for (int j = 0; j < time_batches; j++)
			
 
				+                {
			
 
				+                    sum += *v1 * *v2;
			
 
				+                    v1++;
			
 
				+                    v2++;
			
 
				+                }
			
 
				+#endif
			
 
				+
			
 
				+                *ptr_a = sum;
			
 
				+                ptr_a++;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (bias_data)
			
 
				+    {
			
 
				+        if (unit_count == feature_batches)
			
 
				+        {
			
 
				+            for (int i = 0; i < input_batches; i++)
			
 
				+            {
			
 
				+                q31_t *output_temp = buffer_b + i * feature_batches;
			
 
				+                const q31_t *ptr_a = buffer_a + i * feature_batches;
			
 
				+
			
 
				+                const int32_t *bi = bias_data;
			
 
				+                for (int j = 0; j < feature_batches; j++)
			
 
				+                {
			
 
				+                    output_temp[j] = ptr_a[j] + bi[j];
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            for (int i_batch = 0; i_batch < input_batches; i_batch++)
			
 
				+            {
			
 
				+                q31_t *output_data_temp = buffer_b + i_batch * unit_count;
			
 
				+                q31_t *ptr_a = buffer_a + i_batch * feature_batches;
			
 
				+
			
 
				+                for (int i = 0; i < unit_count; i++)
			
 
				+                {
			
 
				+                    int32_t sum = bias_data[i];
			
 
				+                    for (int j = 0; j < rank; j++)
			
 
				+                    {
			
 
				+                        sum += *ptr_a;
			
 
				+                        ptr_a++;
			
 
				+                    }
			
 
				+                    output_data_temp[i] = sum;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        for (int i_batch = 0; i_batch < input_batches; i_batch++)
			
 
				+        {
			
 
				+            q31_t *output_data_temp = buffer_b + i_batch * unit_count;
			
 
				+            q31_t *ptr_a = buffer_a + i_batch * feature_batches;
			
 
				+
			
 
				+            for (int i = 0; i < unit_count; i++)
			
 
				+            {
			
 
				+                int32_t sum = 0;
			
 
				+                for (int j = 0; j < rank; j++)
			
 
				+                {
			
 
				+                    sum += *ptr_a;
			
 
				+                    ptr_a++;
			
 
				+                }
			
 
				+                output_data_temp[i] = sum;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+#if defined(ARM_MATH_MVEI)
			
 
				+    int32_t num_elements = input_batches * unit_count;
			
 
				+    const int32_t loop_count = (num_elements + 3) / 4;
			
 
				+    for (int i_op = 0; i_op < loop_count; i_op++)
			
 
				+    {
			
 
				+        mve_pred16_t p = vctp32q((uint32_t)num_elements);
			
 
				+        int32x4_t op = vldrwq_z_s32(buffer_b, p);
			
 
				+        op = arm_requantize_mve(op, multiplier_out, shift_2);
			
 
				+        op = vaddq_n_s32(op, zp_out);
			
 
				+        const int32x4_t min_vec = vdupq_n_s32((int8_t)out_activation_min);
			
 
				+        const int32x4_t max_vec = vdupq_n_s32((int8_t)out_activation_max);
			
 
				+        op = vmaxq_s32(op, min_vec);
			
 
				+        op = vminq_s32(op, max_vec);
			
 
				+        vstrbq_p_s32(output_data, op, p);
			
 
				+        output_data += 4;
			
 
				+        buffer_b += 4;
			
 
				+        num_elements -= 4;
			
 
				+    }
			
 
				+#else
			
 
				+    for (int i = 0; i < input_batches * unit_count; i++)
			
 
				+    {
			
 
				+        output_data[i] = (q7_t)CLAMP(
			
 
				+            arm_nn_requantize(buffer_b[i], multiplier_out, shift_2) + zp_out, out_activation_max, out_activation_min);
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				+    return (ARM_MATH_SUCCESS);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * @} end of SVDF group
			
 
				+ */
			
--- a/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
+++ b/CMSIS/NN/Tests/UnitTest/CMakeLists.txt
@@ -87,7 +87,7 @@ add_subdirectory(TestCases/test_arm_max_pool_s8)
 
				 add_subdirectory(TestCases/test_arm_softmax_s8)
			
 
				 add_subdirectory(TestCases/test_arm_softmax_s8_s16)
			
 
				 add_subdirectory(TestCases/test_arm_softmax_s16)
			
 
				-add_subdirectory(TestCases/test_arm_svdf_s8)
			
 
				+add_subdirectory(TestCases/test_arm_svdf_state_s16_s8)
			
 
				 
			
 
				 set(MAKE_CMD "python3")
			
 
				 set(MAKE_CMD_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unittest_targets.py")
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_state_s16_s8/CMakeLists.txt
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_state_s16_s8/CMakeLists.txt
@@ -1,5 +1,5 @@
 
				 #
			
 
				-# Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
			
 
				+# Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				 #
			
 
				 # SPDX-License-Identifier: Apache-2.0
			
 
				 #
			
@@ -16,8 +16,8 @@
 
				 # limitations under the License.
			
 
				 #
			
 
				 
			
 
				-add_cmsis_nn_unit_test_executable(test_arm_svdf_s8)
			
 
				+add_cmsis_nn_unit_test_executable(test_arm_svdf_state_s16_s8)
			
 
				 
			
 
				-target_sources(test_arm_svdf_s8 PRIVATE
			
 
				-    Unity/unity_test_arm_svdf_s8.c
			
 
				-    Unity/TestRunner/unity_test_arm_svdf_s8_runner.c)
			
 
				+target_sources(test_arm_svdf_state_s16_s8 PRIVATE
			
 
				+    Unity/unity_test_arm_svdf_state_s16_s8.c
			
 
				+    Unity/TestRunner/unity_test_arm_svdf_state_s16_s8_runner.c)
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_state_s16_s8/Unity/unity_test_arm_svdf_state_s16_s8.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_state_s16_s8/Unity/unity_test_arm_svdf_state_s16_s8.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
			
 
				+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -22,7 +22,7 @@
 
				 #include <stdlib.h>
			
 
				 #include <string.h>
			
 
				 
			
 
				-#include "../test_arm_svdf_s8.c"
			
 
				+#include "../test_arm_svdf_state_s16_s8.c"
			
 
				 #include "unity.h"
			
 
				 
			
 
				 #ifdef USING_FVP_CORSTONE_300
			
@@ -44,10 +44,10 @@ void setUp(void)
 
				  */
			
 
				 void tearDown(void) {}
			
 
				 
			
 
				-void test_svdf_arm_svdf_s8(void) { svdf_arm_svdf_s8(); }
			
 
				+void test_svdf_arm_state_s16_s8(void) { svdf_arm_svdf_state_s16_s8(); }
			
 
				 
			
 
				-void test_svdf_1_arm_svdf_s8(void) { svdf_1_arm_svdf_s8(); }
			
 
				+void test_svdf_1_arm_state_s16_s8(void) { svdf_1_arm_svdf_state_s16_s8(); }
			
 
				 
			
 
				-void test_svdf_2_arm_svdf_s8(void) { svdf_2_arm_svdf_s8(); }
			
 
				+void test_svdf_2_arm_state_s16_s8(void) { svdf_2_arm_svdf_state_s16_s8(); }
			
 
				 
			
 
				-void test_svdf_3_arm_svdf_s8(void) { svdf_3_arm_svdf_s8(); }
			
 
				+void test_svdf_3_arm_state_s16_s8(void) { svdf_3_arm_svdf_state_s16_s8(); }
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_state_s16_s8/test_arm_svdf_state_s16_s8.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_svdf_state_s16_s8/test_arm_svdf_state_s16_s8.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
			
 
				+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -41,7 +41,7 @@ static bool check_null_bias(const int32_t *bias, int32_t size)
 
				     return null_bias;
			
 
				 }
			
 
				 
			
 
				-void svdf_arm_svdf_s8(void)
			
 
				+void svdf_arm_svdf_state_s16_s8(void)
			
 
				 {
			
 
				     const arm_status expected = ARM_MATH_SUCCESS;
			
 
				     cmsis_nn_context input_ctx;
			
@@ -99,23 +99,23 @@ void svdf_arm_svdf_s8(void)
 
				         for (int j = 0; j < number_inputs; j++)
			
 
				         {
			
 
				             memcpy(input_data, svdf_input_sequence + j * input_round_size, input_round_size);
			
 
				-            arm_status result = arm_svdf_s8(&input_ctx,
			
 
				-                                            &output_ctx,
			
 
				-                                            &svdf_params,
			
 
				-                                            &input_quant_params,
			
 
				-                                            &output_quant_params,
			
 
				-                                            &input_dims,
			
 
				-                                            input_data,
			
 
				-                                            &state_dims,
			
 
				-                                            state_data,
			
 
				-                                            &weights_feature_dims,
			
 
				-                                            weights_feature_data,
			
 
				-                                            &weights_time_dims,
			
 
				-                                            weights_time_data,
			
 
				-                                            &bias_dims,
			
 
				-                                            null_bias == true ? NULL : svdf_biases,
			
 
				-                                            &output_dims,
			
 
				-                                            output_data);
			
 
				+            arm_status result = arm_svdf_state_s16_s8(&input_ctx,
			
 
				+                                                      &output_ctx,
			
 
				+                                                      &svdf_params,
			
 
				+                                                      &input_quant_params,
			
 
				+                                                      &output_quant_params,
			
 
				+                                                      &input_dims,
			
 
				+                                                      input_data,
			
 
				+                                                      &state_dims,
			
 
				+                                                      state_data,
			
 
				+                                                      &weights_feature_dims,
			
 
				+                                                      weights_feature_data,
			
 
				+                                                      &weights_time_dims,
			
 
				+                                                      weights_time_data,
			
 
				+                                                      &bias_dims,
			
 
				+                                                      null_bias == true ? NULL : svdf_biases,
			
 
				+                                                      &output_dims,
			
 
				+                                                      output_data);
			
 
				             TEST_ASSERT_EQUAL(expected, result);
			
 
				         }
			
 
				 
			
@@ -127,7 +127,7 @@ void svdf_arm_svdf_s8(void)
 
				     free(output_ctx.buf);
			
 
				 }
			
 
				 
			
 
				-void svdf_1_arm_svdf_s8(void)
			
 
				+void svdf_1_arm_svdf_state_s16_s8(void)
			
 
				 {
			
 
				     const arm_status expected = ARM_MATH_SUCCESS;
			
 
				     cmsis_nn_context input_ctx;
			
@@ -184,23 +184,23 @@ void svdf_1_arm_svdf_s8(void)
 
				         for (int j = 0; j < number_inputs; j++)
			
 
				         {
			
 
				             memcpy(input_data, svdf_1_input_sequence + j * input_round_size, input_round_size);
			
 
				-            arm_status result = arm_svdf_s8(&input_ctx,
			
 
				-                                            &output_ctx,
			
 
				-                                            &svdf_1_params,
			
 
				-                                            &input_quant_params,
			
 
				-                                            &output_quant_params,
			
 
				-                                            &input_dims,
			
 
				-                                            input_data,
			
 
				-                                            &state_dims,
			
 
				-                                            state_data,
			
 
				-                                            &weights_feature_dims,
			
 
				-                                            weights_feature_data,
			
 
				-                                            &weights_time_dims,
			
 
				-                                            weights_time_data,
			
 
				-                                            &bias_dims,
			
 
				-                                            null_bias == true ? NULL : svdf_1_biases,
			
 
				-                                            &output_dims,
			
 
				-                                            output_data);
			
 
				+            arm_status result = arm_svdf_state_s16_s8(&input_ctx,
			
 
				+                                                      &output_ctx,
			
 
				+                                                      &svdf_1_params,
			
 
				+                                                      &input_quant_params,
			
 
				+                                                      &output_quant_params,
			
 
				+                                                      &input_dims,
			
 
				+                                                      input_data,
			
 
				+                                                      &state_dims,
			
 
				+                                                      state_data,
			
 
				+                                                      &weights_feature_dims,
			
 
				+                                                      weights_feature_data,
			
 
				+                                                      &weights_time_dims,
			
 
				+                                                      weights_time_data,
			
 
				+                                                      &bias_dims,
			
 
				+                                                      null_bias == true ? NULL : svdf_1_biases,
			
 
				+                                                      &output_dims,
			
 
				+                                                      output_data);
			
 
				             TEST_ASSERT_EQUAL(expected, result);
			
 
				         }
			
 
				 
			
@@ -212,7 +212,7 @@ void svdf_1_arm_svdf_s8(void)
 
				     free(output_ctx.buf);
			
 
				 }
			
 
				 
			
 
				-void svdf_2_arm_svdf_s8(void)
			
 
				+void svdf_2_arm_svdf_state_s16_s8(void)
			
 
				 {
			
 
				     const arm_status expected = ARM_MATH_SUCCESS;
			
 
				     cmsis_nn_context input_ctx;
			
@@ -269,23 +269,23 @@ void svdf_2_arm_svdf_s8(void)
 
				         for (int j = 0; j < number_inputs; j++)
			
 
				         {
			
 
				             memcpy(input_data, svdf_2_input_sequence + j * input_round_size, input_round_size);
			
 
				-            arm_status result = arm_svdf_s8(&input_ctx,
			
 
				-                                            &output_ctx,
			
 
				-                                            &svdf_2_params,
			
 
				-                                            &input_quant_params,
			
 
				-                                            &output_quant_params,
			
 
				-                                            &input_dims,
			
 
				-                                            input_data,
			
 
				-                                            &state_dims,
			
 
				-                                            state_data,
			
 
				-                                            &weights_feature_dims,
			
 
				-                                            weights_feature_data,
			
 
				-                                            &weights_time_dims,
			
 
				-                                            weights_time_data,
			
 
				-                                            &bias_dims,
			
 
				-                                            null_bias == true ? NULL : svdf_2_biases,
			
 
				-                                            &output_dims,
			
 
				-                                            output_data);
			
 
				+            arm_status result = arm_svdf_state_s16_s8(&input_ctx,
			
 
				+                                                      &output_ctx,
			
 
				+                                                      &svdf_2_params,
			
 
				+                                                      &input_quant_params,
			
 
				+                                                      &output_quant_params,
			
 
				+                                                      &input_dims,
			
 
				+                                                      input_data,
			
 
				+                                                      &state_dims,
			
 
				+                                                      state_data,
			
 
				+                                                      &weights_feature_dims,
			
 
				+                                                      weights_feature_data,
			
 
				+                                                      &weights_time_dims,
			
 
				+                                                      weights_time_data,
			
 
				+                                                      &bias_dims,
			
 
				+                                                      null_bias == true ? NULL : svdf_2_biases,
			
 
				+                                                      &output_dims,
			
 
				+                                                      output_data);
			
 
				             TEST_ASSERT_EQUAL(expected, result);
			
 
				         }
			
 
				 
			
@@ -297,7 +297,7 @@ void svdf_2_arm_svdf_s8(void)
 
				     free(output_ctx.buf);
			
 
				 }
			
 
				 
			
 
				-void svdf_3_arm_svdf_s8(void)
			
 
				+void svdf_3_arm_svdf_state_s16_s8(void)
			
 
				 {
			
 
				     const arm_status expected = ARM_MATH_SUCCESS;
			
 
				     cmsis_nn_context input_ctx;
			
@@ -354,23 +354,23 @@ void svdf_3_arm_svdf_s8(void)
 
				         for (int j = 0; j < number_inputs; j++)
			
 
				         {
			
 
				             memcpy(input_data, svdf_3_input_sequence + j * input_round_size, input_round_size);
			
 
				-            arm_status result = arm_svdf_s8(&input_ctx,
			
 
				-                                            &output_ctx,
			
 
				-                                            &svdf_3_params,
			
 
				-                                            &input_quant_params,
			
 
				-                                            &output_quant_params,
			
 
				-                                            &input_dims,
			
 
				-                                            input_data,
			
 
				-                                            &state_dims,
			
 
				-                                            state_data,
			
 
				-                                            &weights_feature_dims,
			
 
				-                                            weights_feature_data,
			
 
				-                                            &weights_time_dims,
			
 
				-                                            weights_time_data,
			
 
				-                                            &bias_dims,
			
 
				-                                            null_bias == true ? NULL : svdf_3_biases,
			
 
				-                                            &output_dims,
			
 
				-                                            output_data);
			
 
				+            arm_status result = arm_svdf_state_s16_s8(&input_ctx,
			
 
				+                                                      &output_ctx,
			
 
				+                                                      &svdf_3_params,
			
 
				+                                                      &input_quant_params,
			
 
				+                                                      &output_quant_params,
			
 
				+                                                      &input_dims,
			
 
				+                                                      input_data,
			
 
				+                                                      &state_dims,
			
 
				+                                                      state_data,
			
 
				+                                                      &weights_feature_dims,
			
 
				+                                                      weights_feature_data,
			
 
				+                                                      &weights_time_dims,
			
 
				+                                                      weights_time_data,
			
 
				+                                                      &bias_dims,
			
 
				+                                                      null_bias == true ? NULL : svdf_3_biases,
			
 
				+                                                      &output_dims,
			
 
				+                                                      output_data);
			
 
				             TEST_ASSERT_EQUAL(expected, result);
			
 
				         }